浏览代码

add cu8 operational guide

barbaravaldez 4 年之前
父节点
当前提交
f47e1833a3
共有 100 个文件被更改,包括 38280 次插入0 次删除
  1. 2 0
      Big-Data-Clusters/CU8/Public/_config.yml
  2. 387 0
      Big-Data-Clusters/CU8/Public/_data/toc.yml
  3. 727 0
      Big-Data-Clusters/CU8/Public/content/cert-management/cer001-create-root-ca.ipynb
  4. 564 0
      Big-Data-Clusters/CU8/Public/content/cert-management/cer002-download-existing-root-ca.ipynb
  5. 587 0
      Big-Data-Clusters/CU8/Public/content/cert-management/cer003-upload-existing-root-ca.ipynb
  6. 707 0
      Big-Data-Clusters/CU8/Public/content/cert-management/cer004-download-upload-existing-root-ca.ipynb
  7. 888 0
      Big-Data-Clusters/CU8/Public/content/cert-management/cer005-install-existing-root-ca.ipynb
  8. 612 0
      Big-Data-Clusters/CU8/Public/content/cert-management/cer010-install-generated-root-ca-locally.ipynb
  9. 402 0
      Big-Data-Clusters/CU8/Public/content/cert-management/cer020-create-management-service-proxy-cert.ipynb
  10. 402 0
      Big-Data-Clusters/CU8/Public/content/cert-management/cer021-create-knox-cert.ipynb
  11. 402 0
      Big-Data-Clusters/CU8/Public/content/cert-management/cer022-create-app-proxy-cert.ipynb
  12. 403 0
      Big-Data-Clusters/CU8/Public/content/cert-management/cer023-create-master-certs.ipynb
  13. 407 0
      Big-Data-Clusters/CU8/Public/content/cert-management/cer024-create-controller-cert.ipynb
  14. 818 0
      Big-Data-Clusters/CU8/Public/content/cert-management/cer030-sign-service-proxy-generated-cert.ipynb
  15. 818 0
      Big-Data-Clusters/CU8/Public/content/cert-management/cer031-sign-knox-generated-cert.ipynb
  16. 818 0
      Big-Data-Clusters/CU8/Public/content/cert-management/cer032-sign-app-proxy-generated-cert.ipynb
  17. 817 0
      Big-Data-Clusters/CU8/Public/content/cert-management/cer033-sign-master-generated-certs.ipynb
  18. 859 0
      Big-Data-Clusters/CU8/Public/content/cert-management/cer034-sign-controller-generated-cert.ipynb
  19. 926 0
      Big-Data-Clusters/CU8/Public/content/cert-management/cer040-install-service-proxy-cert.ipynb
  20. 918 0
      Big-Data-Clusters/CU8/Public/content/cert-management/cer041-install-knox-cert.ipynb
  21. 923 0
      Big-Data-Clusters/CU8/Public/content/cert-management/cer042-install-app-proxy-cert.ipynb
  22. 1027 0
      Big-Data-Clusters/CU8/Public/content/cert-management/cer043-install-master-certs.ipynb
  23. 915 0
      Big-Data-Clusters/CU8/Public/content/cert-management/cer044-install-controller-cert.ipynb
  24. 264 0
      Big-Data-Clusters/CU8/Public/content/cert-management/cer050-wait-cluster-healthly.ipynb
  25. 467 0
      Big-Data-Clusters/CU8/Public/content/cert-management/cer100-create-root-ca-install-certs.ipynb
  26. 456 0
      Big-Data-Clusters/CU8/Public/content/cert-management/cer101-use-root-ca-install-certs.ipynb
  27. 455 0
      Big-Data-Clusters/CU8/Public/content/cert-management/cer102-use-bdc-ca-install-certs.ipynb
  28. 81 0
      Big-Data-Clusters/CU8/Public/content/cert-management/readme.md
  29. 55 0
      Big-Data-Clusters/CU8/Public/content/cert-management/toc.yml
  30. 25 0
      Big-Data-Clusters/CU8/Public/content/common/readme.md
  31. 404 0
      Big-Data-Clusters/CU8/Public/content/common/sop005-az-login.ipynb
  32. 404 0
      Big-Data-Clusters/CU8/Public/content/common/sop006-az-logout.ipynb
  33. 359 0
      Big-Data-Clusters/CU8/Public/content/common/sop007-get-key-version-information.ipynb
  34. 389 0
      Big-Data-Clusters/CU8/Public/content/common/sop011-set-kubernetes-context.ipynb
  35. 398 0
      Big-Data-Clusters/CU8/Public/content/common/sop013-create-secret-for-azdata-login.ipynb
  36. 382 0
      Big-Data-Clusters/CU8/Public/content/common/sop014-delete-secret-for-azdata-login.ipynb
  37. 381 0
      Big-Data-Clusters/CU8/Public/content/common/sop028-azdata-login.ipynb
  38. 360 0
      Big-Data-Clusters/CU8/Public/content/common/sop033-azdata-logout.ipynb
  39. 269 0
      Big-Data-Clusters/CU8/Public/content/common/sop034-wait-cluster-healthly.ipynb
  40. 23 0
      Big-Data-Clusters/CU8/Public/content/common/toc.yml
  41. 31 0
      Big-Data-Clusters/CU8/Public/content/diagnose/readme.md
  42. 29 0
      Big-Data-Clusters/CU8/Public/content/diagnose/toc.yml
  43. 385 0
      Big-Data-Clusters/CU8/Public/content/diagnose/tsg027-observe-bdc-create.ipynb
  44. 180 0
      Big-Data-Clusters/CU8/Public/content/diagnose/tsg029-find-dumps-in-the-cluster.ipynb
  45. 158 0
      Big-Data-Clusters/CU8/Public/content/diagnose/tsg032-get-cpu-and-memory-for-all-containers.ipynb
  46. 387 0
      Big-Data-Clusters/CU8/Public/content/diagnose/tsg037-determine-primary-master-replica.ipynb
  47. 548 0
      Big-Data-Clusters/CU8/Public/content/diagnose/tsg055-time-curl-to-sparkhead.ipynb
  48. 562 0
      Big-Data-Clusters/CU8/Public/content/diagnose/tsg060-get-disk-space-for-all-pvcs.ipynb
  49. 361 0
      Big-Data-Clusters/CU8/Public/content/diagnose/tsg078-is-cluster-healthy.ipynb
  50. 359 0
      Big-Data-Clusters/CU8/Public/content/diagnose/tsg079-generate-controller-core-dump.ipynb
  51. 146 0
      Big-Data-Clusters/CU8/Public/content/diagnose/tsg086-run-top-for-all-containers.ipynb
  52. 493 0
      Big-Data-Clusters/CU8/Public/content/diagnose/tsg087-use-hadoop-fs.ipynb
  53. 494 0
      Big-Data-Clusters/CU8/Public/content/diagnose/tsg108-controller-failed-to-upgrade.ipynb
  54. 481 0
      Big-Data-Clusters/CU8/Public/content/diagnose/tsg114-port-forwarding-for-controldb.ipynb
  55. 37 0
      Big-Data-Clusters/CU8/Public/content/install/readme.md
  56. 390 0
      Big-Data-Clusters/CU8/Public/content/install/sop010-upgrade-bdc.ipynb
  57. 401 0
      Big-Data-Clusters/CU8/Public/content/install/sop012-brew-install-odbc-for-sql-server.ipynb
  58. 463 0
      Big-Data-Clusters/CU8/Public/content/install/sop036-install-kubectl.ipynb
  59. 413 0
      Big-Data-Clusters/CU8/Public/content/install/sop037-uninstall-kubectl.ipynb
  60. 410 0
      Big-Data-Clusters/CU8/Public/content/install/sop038-install-az.ipynb
  61. 401 0
      Big-Data-Clusters/CU8/Public/content/install/sop039-uninstall-az.ipynb
  62. 403 0
      Big-Data-Clusters/CU8/Public/content/install/sop040-upgrade-pip.ipynb
  63. 359 0
      Big-Data-Clusters/CU8/Public/content/install/sop054-install-azdata.ipynb
  64. 428 0
      Big-Data-Clusters/CU8/Public/content/install/sop055-uninstall-azdata.ipynb
  65. 419 0
      Big-Data-Clusters/CU8/Public/content/install/sop059-install-kubernetes-module.ipynb
  66. 419 0
      Big-Data-Clusters/CU8/Public/content/install/sop060-uninstall-kubernetes-module.ipynb
  67. 433 0
      Big-Data-Clusters/CU8/Public/content/install/sop062-install-ipython-sql-module.ipynb
  68. 359 0
      Big-Data-Clusters/CU8/Public/content/install/sop063-packman-install-azdata.ipynb
  69. 358 0
      Big-Data-Clusters/CU8/Public/content/install/sop064-packman-uninstall-azdata.ipynb
  70. 63 0
      Big-Data-Clusters/CU8/Public/content/install/sop069-install-odbc-driver-for-sql-server.ipynb
  71. 35 0
      Big-Data-Clusters/CU8/Public/content/install/toc.yml
  72. 49 0
      Big-Data-Clusters/CU8/Public/content/log-analyzers/readme.md
  73. 47 0
      Big-Data-Clusters/CU8/Public/content/log-analyzers/toc.yml
  74. 289 0
      Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg030-get-errorlog-from-all-pods.ipynb
  75. 287 0
      Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg031-get-polybase-logs-for-all-pods.ipynb
  76. 291 0
      Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg034-get-livy-logs.ipynb
  77. 291 0
      Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg035-get-sparkhistory-logs.ipynb
  78. 327 0
      Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg036-get-controller-logs.ipynb
  79. 328 0
      Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg046-get-knox-logs.ipynb
  80. 285 0
      Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg073-get-influxdb-logs.ipynb
  81. 288 0
      Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg076-get-elastic-search-logs.ipynb
  82. 285 0
      Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg077-get-kibana-logs.ipynb
  83. 290 0
      Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg088-get-datanode-logs.ipynb
  84. 288 0
      Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg090-get-nodemanager-logs.ipynb
  85. 288 0
      Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg092-get-all-supervisord-log-tails.ipynb
  86. 262 0
      Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg093-get-all-agent-log-tails.ipynb
  87. 285 0
      Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg094-get-grafana-logs.ipynb
  88. 288 0
      Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg095-get-namenode-logs.ipynb
  89. 288 0
      Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg096-get-zookeeper-logs.ipynb
  90. 297 0
      Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg117-get-approxy-nginx-logs.ipynb
  91. 249 0
      Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg120-get-all-provisioner-log-tails.ipynb
  92. 291 0
      Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg121-get-all-supervisor-mssql-logs.ipynb
  93. 288 0
      Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg122-get-hive-metastore-logs.ipynb
  94. 288 0
      Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg123-get-hive-logs.ipynb
  95. 19 0
      Big-Data-Clusters/CU8/Public/content/log-files/readme.md
  96. 17 0
      Big-Data-Clusters/CU8/Public/content/log-files/toc.yml
  97. 359 0
      Big-Data-Clusters/CU8/Public/content/log-files/tsg001-copy-logs.ipynb
  98. 205 0
      Big-Data-Clusters/CU8/Public/content/log-files/tsg061-tail-bdc-container-logs.ipynb
  99. 200 0
      Big-Data-Clusters/CU8/Public/content/log-files/tsg062-tail-bdc-previous-container-logs.ipynb
  100. 405 0
      Big-Data-Clusters/CU8/Public/content/log-files/tsg083-run-kubectl-cluster-info-dump.ipynb

+ 2 - 0
Big-Data-Clusters/CU8/Public/_config.yml

@@ -0,0 +1,2 @@
+title: Operations and Support - SQL Server 2019 Big Data Clusters
+description: A collection of notebooks to help operate and support SQL Server Big Data Clusters.

+ 387 - 0
Big-Data-Clusters/CU8/Public/_data/toc.yml

@@ -0,0 +1,387 @@
+- title: Welcome
+  url: /readme
+  not_numbered: true
+- title: Search
+  search: true
+
+- title: Troubleshooters
+  url: /troubleshooters/readme
+  not_numbered: true
+  expand_sections: true
+  sections:
+  - title: TSG100 - The Big Data Cluster troubleshooter
+    url: troubleshooters/tsg100-troubleshoot-bdc
+  - title: TSG101 - SQL Server troubleshooter
+    url: troubleshooters/tsg101-troubleshoot-sql-server
+  - title: TSG102 - HDFS troubleshooter
+    url: troubleshooters/tsg102-troubleshoot-hdfs
+  - title: TSG103 - Spark troubleshooter
+    url: troubleshooters/tsg103-troubleshoot-spark
+  - title: TSG104 - Control troubleshooter
+    url: troubleshooters/tsg104-troubleshoot-control
+  - title: TSG105 - Gateway troubleshooter
+    url: troubleshooters/tsg105-troubleshoot-gateway
+  - title: TSG106 - App troubleshooter
+    url: troubleshooters/tsg106-troubleshoot-app
+- title: Log Analyzers
+  url: /log-analyzers/readme
+  not_numbered: true
+  expand_sections: true
+  sections:
+  - title: TSG046 - Knox gateway logs
+    url: log-analyzers/tsg046-get-knox-logs
+  - title: TSG036 - Controller logs
+    url: log-analyzers/tsg036-get-controller-logs
+  - title: TSG034 - Livy logs
+    url: log-analyzers/tsg034-get-livy-logs
+  - title: TSG035 - Spark History logs
+    url: log-analyzers/tsg035-get-sparkhistory-logs
+  - title: TSG030 - SQL Server errorlog files
+    url: log-analyzers/tsg030-get-errorlog-from-all-pods
+  - title: TSG031 - SQL Server PolyBase logs
+    url: log-analyzers/tsg031-get-polybase-logs-for-all-pods
+  - title: TSG095 - Hadoop namenode logs
+    url: log-analyzers/tsg095-get-namenode-logs
+  - title: TSG090 - Yarn nodemanager logs
+    url: log-analyzers/tsg090-get-nodemanager-logs
+  - title: TSG088 - Hadoop datanode logs
+    url: log-analyzers/tsg088-get-datanode-logs
+  - title: TSG096 - Zookeeper logs
+    url: log-analyzers/tsg096-get-zookeeper-logs
+  - title: TSG073 - InfluxDB logs
+    url: log-analyzers/tsg073-get-influxdb-logs
+  - title: TSG076 - Elastic Search logs
+    url: log-analyzers/tsg076-get-elastic-search-logs
+  - title: TSG077 - Kibana logs
+    url: log-analyzers/tsg077-get-kibana-logs
+  - title: TSG092 - Supervisord log tail for all containers in BDC
+    url: log-analyzers/tsg092-get-all-supervisord-log-tails
+  - title: TSG093 - Agent log tail for all containers in BDC
+    url: log-analyzers/tsg093-get-all-agent-log-tails
+  - title: TSG094 - Grafana logs
+    url: log-analyzers/tsg094-get-grafana-logs
+  - title: TSG117 - App-Deploy Proxy Nginx Logs
+    url: log-analyzers/tsg117-get-approxy-nginx-logs
+  - title: TSG120 - Provisioner log tail for all containers in BDC
+    url: log-analyzers/tsg120-get-all-provisioner-log-tails
+  - title: TSG121 - Supervisor mssql-server logs
+    url: log-analyzers/tsg121-get-all-supervisor-mssql-logs
+  - title: TSG122 - Hive Metastore logs
+    url: log-analyzers/tsg122-get-hive-metastore-logs
+  - title: TSG123 - Hive logs
+    url: log-analyzers/tsg123-get-hive-logs
+- title: Diagnose
+  url: /diagnose/readme
+  not_numbered: true
+  expand_sections: true
+  sections:
+  - title: TSG027 - Observe cluster deployment
+    url: diagnose/tsg027-observe-bdc-create
+  - title: TSG078 - Is cluster healthy
+    url: diagnose/tsg078-is-cluster-healthy
+  - title: TSG029 - Find dumps in the cluster
+    url: diagnose/tsg029-find-dumps-in-the-cluster
+  - title: TSG032 - CPU and Memory usage for all containers
+    url: diagnose/tsg032-get-cpu-and-memory-for-all-containers
+  - title: TSG060 - Persistent Volume disk space for all BDC PVCs
+    url: diagnose/tsg060-get-disk-space-for-all-pvcs
+  - title: TSG087 - Use hadoop fs CLI on nmnode pod
+    url: diagnose/tsg087-use-hadoop-fs
+  - title: TSG037 - Determine master pool pod hosting primary replica
+    url: diagnose/tsg037-determine-primary-master-replica
+  - title: TSG055 - Time Curl to Sparkhead
+    url: diagnose/tsg055-time-curl-to-sparkhead
+  - title: TSG079 - Generate `controller` core dump
+    url: diagnose/tsg079-generate-controller-core-dump
+  - title: TSG086 - Run `top` in all containers
+    url: diagnose/tsg086-run-top-for-all-containers
+  - title: TSG108 - View the controller upgrade config map
+    url: diagnose/tsg108-controller-failed-to-upgrade
+  - title: TSG114 - Connect to ControlDB using Port Fowarding
+    url: diagnose/tsg114-port-forwarding-for-controldb
+- title: Repair 
+  url: /repair/readme
+  not_numbered: true
+  expand_sections: false
+  sections:
+  - title: TSG024 - Namenode is in safe mode
+    url: repair/tsg024-name-node-is-in-safe-mode
+  - title: TSG041 - Unable to create a new asynchronous I/O context (increase sysctl fs.aio-max-nr)
+    url: repair/tsg041-increase-fs-aio-max-nr
+  - title: TSG048 - Deployment stuck at "Waiting for controller pod to be up"
+    url: repair/tsg048-create-stuck-waiting-for-controller
+  - title: TSG038 - BDC create failures due to - doc is missing key
+    url: repair/tsg038-doc-is-missing-key-error
+  - title: TSG047 - ConfigException - Expected only one object with name
+    url: repair/tsg047-expected-only-one-object-with-name
+  - title: TSG050 - Cluster create hangs with "timeout expired waiting for volumes to attach or mount for pod"
+    url: repair/tsg050-timeout-expired-waiting-for-volumes
+  - title: TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it
+    url: repair/tsg056-kubectl-no-connection-could-be-made
+  - title: TSG057 - Failed when starting controller service. System.TimeoutException
+    url: repair/tsg057-failed-when-starting-controller
+  - title: TSG067 - Failed to complete kube config setup
+    url: repair/tsg067-failed-to-complete-kube-config-setup
+  - title: TSG075 - FailedCreatePodSandBox due to NetworkPlugin cni failed to set up pod
+    url: repair/tsg075-networkplugin-cni-failed-to-setup-pod
+  - title: TSG110 - Azdata returns ApiError
+    url: repair/tsg110-azdata-returns-apierror
+  - title: TSG028 - Restart node manager on all storage pool nodes
+    url: repair/tsg028-restart-nodemanager-in-storage-pool
+  - title: TSG045 - The maximum number of data disks allowed to be attached to a VM of this size (AKS)
+    url: repair/tsg045-max-number-data-disks-allowed
+  - title: TSG109 - Set upgrade timeouts
+    url: repair/tsg109-upgrade-stalled
+  - title: TSG053 - ADS Provided Books must be saved before use
+    url: repair/tsg053-save-book-first
+  - title: SOP016 - Get SID for Active Directory user/group
+    url: repair/sop016-get-sid-for-principal
+  - title: SOP017 - Add app-deploy AD group
+    url: repair/sop017-add-app-deploy-group
+  - title: TSG124 - 'No credentials were supplied' error from azdata login
+    url: repair/tsg124-no-credentials-were-supplied
+  - title: TSG126 - azdata fails with 'accept the license terms to use this product'
+    url: repair/tsg126-accept-license-terms
+- title: Monitor - Big Data Cluster
+  url: /monitor-bdc/readme
+  not_numbered: true
+  expand_sections: true
+  sections:
+  - title: TSG014 - Show BDC endpoints
+    url: monitor-bdc/tsg014-azdata-bdc-endpoint-list
+  - title: TSG012 - Show BDC Status
+    url: monitor-bdc/tsg012-azdata-bdc-status
+  - title: TSG069 - Show Big Data Cluster Gateway status
+    url: monitor-bdc/tsg069-azdata-bdc-gateway-status
+  - title: TSG049 - Show BDC Controller status
+    url: monitor-bdc/tsg049-azdata-bdc-control-status
+  - title: TSG033 - Show BDC SQL status
+    url: monitor-bdc/tsg033-azdata-bdc-sql-status
+  - title: TSG068 - Show BDC HDFS status
+    url: monitor-bdc/tsg068-azdata-bdc-hdfs-status
+  - title: TSG017 - Show BDC Configuration
+    url: monitor-bdc/tsg017-azdata-bdc-config-show
+  - title: TSG004 - Show BDC Apps
+    url: monitor-bdc/tsg004-show-app-list
+  - title: TSG003 - Show BDC Spark sessions
+    url: monitor-bdc/tsg003-show-spark-sessions
+  - title: TSG013 - Show file list in Storage Pool (HDFS)
+    url: monitor-bdc/tsg013-azdata-bdc-hdfs-ls
+  - title: TSG070 - Query SQL master pool
+    url: monitor-bdc/tsg070-use-azdata-sql-query
+- title: Monitor - Kubernetes
+  url: /monitor-k8s/readme
+  not_numbered: true
+  expand_sections: false
+  sections:
+  - title: TSG021 - Get cluster info (Kubernetes)
+    url: monitor-k8s/tsg021-get-k8s-cluster-info
+  - title: TSG008 - Get version information (Kubernetes)
+    url: monitor-k8s/tsg008-get-k8s-version-info
+  - title: TSG081 - Get namespaces (Kubernetes)
+    url: monitor-k8s/tsg081-get-kubernetes-namespaces
+  - title: TSG009 - Get nodes (Kubernetes)
+    url: monitor-k8s/tsg009-get-nodes
+  - title: TSG006 - Get system pod status
+    url: monitor-k8s/tsg006-view-system-pod-status
+  - title: TSG007 - Get BDC pod status
+    url: monitor-k8s/tsg007-view-bdc-pod-status
+  - title: TSG015 - View BDC services (Kubernetes)
+    url: monitor-k8s/tsg015-view-k8s-services-for-bdc
+  - title: TSG097 - Get BDC stateful sets (Kubernetes)
+    url: monitor-k8s/tsg097-get-statefulsets
+  - title: TSG098 - Get BDC replicasets (Kubernetes)
+    url: monitor-k8s/tsg098-get-replicasets
+  - title: TSG099 - Get BDC daemonsets (Kubernetes)
+    url: monitor-k8s/tsg099-get-daemonsets
+  - title: TSG023 - Get all BDC objects (Kubernetes)
+    url: monitor-k8s/tsg023-run-kubectl-get-all
+  - title: TSG063 - Get storage classes (Kubernetes)
+    url: monitor-k8s/tsg063-get-storage-classes
+  - title: TSG072 - Get Persistent Volumes (Kubernetes)
+    url: monitor-k8s/tsg072-get-persistent-volumes
+  - title: TSG064 - Get BDC Persistent Volume Claims
+    url: monitor-k8s/tsg064-get-persistent-volume-claims
+  - title: TSG065 - Get BDC secrets (Kubernetes)
+    url: monitor-k8s/tsg065-get-secrets-for-bdc-namespace
+  - title: TSG066 - Get BDC event (Kubernetes)
+    url: monitor-k8s/tsg066-get-kubernetes-events
+  - title: TSG020- Describe nodes (Kubernetes)
+    url: monitor-k8s/tsg020-describe-all-nodes
+  - title: TSG016- Describe BDC pods
+    url: monitor-k8s/tsg016-describe-all-pods-in-bdc-namespace
+  - title: TSG089 - Describe BDC non-running pods
+    url: monitor-k8s/tsg089-describe-non-running-pods-in-bdc
+  - title: TSG010 - Get configuration contexts
+    url: monitor-k8s/tsg010-get-kubernetes-contexts
+  - title: TSG022 - Get external IP address for kubeadm host
+    url: monitor-k8s/tsg022-get-external-ip-of-kubeadm-host
+  - title: TSG042 - Get `node name` and external mounts for `Data` and `Logs` `PVCs`
+    url: monitor-k8s/tsg042-get-hosting-node-and-data-log-mount
+- title: Logs
+  url: /log-files/readme
+  not_numbered: true
+  expand_sections: false
+  sections:
+  - title: TSG001 - Run azdata copy-logs
+    url: log-files/tsg001-copy-logs
+  - title: TSG091 - Get the azdata CLI logs
+    url: log-files/tsg091-get-azdata-logs
+  - title: TSG083 - Run kubectl cluster-info dump
+    url: log-files/tsg083-run-kubectl-cluster-info-dump
+  - title: TSG061 - Get tail of all container logs for pods in BDC namespace
+    url: log-files/tsg061-tail-bdc-container-logs
+  - title: TSG062 - Get tail of all previous container logs for pods in BDC namespace
+    url: log-files/tsg062-tail-bdc-previous-container-logs
+  - title: TSG084 - Internal Query Processor Error
+    url: log-files/tsg084-internal-query-process-error
+- title: Samples 
+  url: /sample/readme
+  not_numbered: true
+  expand_sections: false
+  sections:
+  - title: SAM001a - Query Storage Pool from SQL Server Master Pool (1 of 3) - Load sample data
+    url: sample/sam001a-load-sample-data-into-bdc
+  - title: SAM001b - Query Storage Pool from SQL Server Master Pool (2 of 3) - Convert data to parquet
+    url: sample/sam001b-convert-csv-to-parquet
+  - title: SAM001c - Query Storage Pool from SQL Server Master Pool (3 of 3)  - Query HDFS from SQL Server
+    url: sample/sam001c-query-hdfs-in-sql-server
+  - title: SAM002 - Storage Pool (2 of 2) - Query HDFS
+    url: sample/sam002-query-hdfs-in-sql-server
+  - title: SAM003 - Data Pool Example
+    url: sample/sam003-data-pool
+  - title: SAM008 - Spark using azdata
+    url: sample/sam008-spark-using-azdata
+  - title: SAM009 - HDFS using azdata
+    url: sample/sam009-hdfs-using-azdata
+  - title: SAM010 - App using azdata
+    url: sample/sam010-app-using-azdata
+- title: Install 
+  url: /install/readme
+  not_numbered: true
+  expand_sections: false
+  sections:
+  - title: SOP036 - Install kubectl command line interface
+    url: install/sop036-install-kubectl
+  - title: SOP037 - Uninstall kubectl command line interface
+    url: install/sop037-uninstall-kubectl
+  - title: SOP059 - Install Kubernetes Python module
+    url: install/sop059-install-kubernetes-module
+  - title: SOP060 - Uninstall kubernetes module
+    url: install/sop060-uninstall-kubernetes-module
+  - title: SOP062 - Install ipython-sql and pyodbc modules
+    url: install/sop062-install-ipython-sql-module
+  - title: SOP063 - Install azdata CLI (using package manager)
+    url: install/sop063-packman-install-azdata
+  - title: SOP064 - Uninstall azdata CLI (using package manager)
+    url: install/sop064-packman-uninstall-azdata
+  - title: SOP054 - Install azdata CLI (using pip)
+    url: install/sop054-install-azdata
+  - title: SOP055 - Uninstall azdata CLI (using pip)
+    url: install/sop055-uninstall-azdata
+  - title: SOP038 - Install azure command line interface
+    url: install/sop038-install-az
+  - title: SOP039 - Uninstall azure command line interface
+    url: install/sop039-uninstall-az
+  - title: SOP040 - Upgrade pip in ADS Python sandbox
+    url: install/sop040-upgrade-pip
+  - title: SOP069 - Install ODBC for SQL Server
+    url: install/sop069-install-odbc-driver-for-sql-server
+  - title: SOP012 - Install unixodbc for Mac
+    url: install/sop012-brew-install-odbc-for-sql-server
+  - title: SOP010 - Upgrade a big data cluster
+    url: install/sop010-upgrade-bdc
+- title: Certificate Management
+  url: /cert-management/readme
+  not_numbered: true
+  expand_sections: false
+  sections:
+  - title: CER001 - Generate a Root CA certificate
+    url: cert-management/cer001-create-root-ca
+  - title: CER002 - Download existing Root CA certificate
+    url: cert-management/cer002-download-existing-root-ca
+  - title: CER003 - Upload existing Root CA certificate
+    url: cert-management/cer003-upload-existing-root-ca
+  - title: CER004 - Download and Upload existing Root CA certificate
+    url: cert-management/cer004-download-upload-existing-root-ca
+  - title: CER005 - Install new Root CA certificate
+    url: cert-management/cer005-install-existing-root-ca
+  - title: CER010 - Install generated Root CA locally
+    url: cert-management/cer010-install-generated-root-ca-locally
+  - title: CER020 - Create Management Proxy certificate
+    url: cert-management/cer020-create-management-service-proxy-cert
+  - title: CER021 - Create Knox certificate
+    url: cert-management/cer021-create-knox-cert
+  - title: CER022 - Create App Proxy certificate
+    url: cert-management/cer022-create-app-proxy-cert
+  - title: CER023 - Create Master certificates
+    url: cert-management/cer023-create-master-certs
+  - title: CER024 - Create Controller certificate
+    url: cert-management/cer024-create-controller-cert
+  - title: CER030 - Sign Management Proxy certificate with generated CA
+    url: cert-management/cer030-sign-service-proxy-generated-cert
+  - title: CER031 - Sign Knox certificate with generated CA
+    url: cert-management/cer031-sign-knox-generated-cert
+  - title: CER032 - Sign App-Proxy certificate with generated CA
+    url: cert-management/cer032-sign-app-proxy-generated-cert
+  - title: CER033 - Sign Master certificates with generated CA
+    url: cert-management/cer033-sign-master-generated-certs
+  - title: CER034 - Sign Controller certificate with cluster Root CA
+    url: cert-management/cer034-sign-controller-generated-cert
+  - title: CER040 - Install signed Management Proxy certificate
+    url: cert-management/cer040-install-service-proxy-cert
+  - title: CER041 - Install signed Knox certificate
+    url: cert-management/cer041-install-knox-cert
+  - title: CER042 - Install signed App-Proxy certificate
+    url: cert-management/cer042-install-app-proxy-cert
+  - title: CER043 - Install signed Master certificates
+    url: cert-management/cer043-install-master-certs
+  - title: CER044 - Install signed Controller certificate
+    url: cert-management/cer044-install-controller-cert
+  - title: CER050 - Wait for BDC to be Healthy
+    url: cert-management/cer050-wait-cluster-healthly
+  - title: CER100 - Configure Cluster with Self Signed Certificates
+    url: cert-management/cer100-create-root-ca-install-certs
+  - title: CER101 - Configure Cluster with Self Signed Certificates using existing Root CA
+    url: cert-management/cer101-use-root-ca-install-certs
+  - title: CER102 - Configure Cluster with Self Signed Certificates using existing Big Data Cluster CA
+    url: cert-management/cer102-use-bdc-ca-install-certs
+- title: Encryption Key Management 
+  url: /tde/readme
+  not_numbered: true
+  expand_sections: false
+  sections:
+
+  - title: SOP0124 - List Keys For Encryption At Rest.
+    url: tde/sop124-list-keys-encryption-at-rest
+  - title: SOP0128 - Enable HDFS Encryption zones in Big Data Clusters.
+    url: tde/sop128-enable-encryption-zones
+  - title: SOP0125 - Delete Key For Encryption At Rest
+    url: tde/sop125-delete-keys-encryption-at-rest
+  - title: SOP0126 - Backup Keys For Encryption At Rest
+    url: tde/sop126-backup-keys-encryption-at-rest
+  - title: SOP0127 - Restore Keys For Encryption At Rest
+    url: tde/sop127-restore-keys-encryption-at-rest
+- title: Common 
+  url: /common/readme
+  not_numbered: true
+  expand_sections: false
+  sections:
+  - title: SOP005 - az login
+    url: common/sop005-az-login
+  - title: SOP006 - az logout
+    url: common/sop006-az-logout
+  - title: SOP007 - Version information (azdata, bdc, kubernetes)
+    url: common/sop007-get-key-version-information
+  - title: SOP011 - Set kubernetes configuration context
+    url: common/sop011-set-kubernetes-context
+  - title: SOP013 - Create secret for azdata login (inside cluster)
+    url: common/sop013-create-secret-for-azdata-login
+  - title: SOP014 - Delete secret for azdata login (inside cluster)
+    url: common/sop014-delete-secret-for-azdata-login
+  - title: SOP028 - azdata login
+    url: common/sop028-azdata-login
+  - title: SOP033 - azdata logout
+    url: common/sop033-azdata-logout
+  - title: SOP034 - Wait for BDC to be Healthy
+    url: common/sop034-wait-cluster-healthly

+ 727 - 0
Big-Data-Clusters/CU8/Public/content/cert-management/cer001-create-root-ca.ipynb

@@ -0,0 +1,727 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "CER001 - Generate a Root CA certificate\n",
+                "=======================================\n",
+                "\n",
+                "If a Certificate Authority certificate for the test environmnet has\n",
+                "never been generated, generate one using this notebook.\n",
+                "\n",
+                "If a Certificate Authoriy has been generated in another cluster, and you\n",
+                "want to reuse the same CA for multiple clusters, then use CER002/CER003\n",
+                "download and upload the already generated Root CA.\n",
+                "\n",
+                "-   [CER002 - Download existing Root CA\n",
+                "    certificate](../cert-management/cer002-download-existing-root-ca.ipynb)\n",
+                "-   [CER003 - Upload existing Root CA\n",
+                "    certificate](../cert-management/cer003-upload-existing-root-ca.ipynb)\n",
+                "\n",
+                "Consider using one Root CA certificate for all non-production clusters\n",
+                "in each environment, as this reduces the number of Root CA certificates\n",
+                "that need to be uploaded to clients connecting to these clusters.\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "import getpass\n",
+                "\n",
+                "common_name = \"SQL Server Big Data Clusters Test CA\"\n",
+                "\n",
+                "country_name = \"US\"\n",
+                "state_or_province_name = \"Illinois\"\n",
+                "locality_name = \"Chicago\"\n",
+                "organization_name = \"Contoso\"\n",
+                "organizational_unit_name = \"Finance\"\n",
+                "email_address = f\"{getpass.getuser().lower()}@contoso.com\"\n",
+                "\n",
+                "days = \"398\" # Max supported validity period in Safari - https://www.thesslstore.com/blog/ssl-certificate-validity-will-be-limited-to-one-year-by-apples-safari-browser/\n",
+                "\n",
+                "test_cert_store_root = \"/var/opt/secrets/test-certificates\""
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"cer001-create-root-ca.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n",
+                "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n",
+                "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the Kubernetes namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster use the kubectl command line\n",
+                "interface .\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n",
+                "    except:\n",
+                "        from IPython.display import Markdown\n",
+                "        print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'.  SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Create a temporary directory to stage files"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Create a temporary directory to hold configuration files\n",
+                "\n",
+                "import tempfile\n",
+                "\n",
+                "temp_dir = tempfile.mkdtemp()\n",
+                "\n",
+                "print(f\"Temporary directory created: {temp_dir}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Helper function to save configuration files to disk"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define helper function 'save_file' to save configuration files to the temporary directory created above\n",
+                "import os\n",
+                "import io\n",
+                "\n",
+                "def save_file(filename, contents):\n",
+                "    with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n",
+                "      text_file.write(contents)\n",
+                "\n",
+                "      print(\"File saved: \" + os.path.join(temp_dir, filename))\n",
+                "\n",
+                "print(\"Function `save_file` defined successfully.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Certificate configuration file"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "certificate = f\"\"\"\n",
+                "[ ca ]\n",
+                "default_ca    = CA_default      # The default ca section\n",
+                "\n",
+                "[ CA_default ]\n",
+                "default_days     = 1000         # How long to certify for\n",
+                "default_crl_days = 30           # How long before next CRL\n",
+                "default_md       = sha256       # Use public key default MD\n",
+                "preserve         = no           # Keep passed DN ordering\n",
+                "\n",
+                "x509_extensions  = ca_extensions # The extensions to add to the cert\n",
+                "\n",
+                "email_in_dn      = no            # Don't concat the email in the DN\n",
+                "copy_extensions  = copy          # Required to copy SANs from CSR to cert\n",
+                "\n",
+                "[ req ]\n",
+                "default_bits       = 2048\n",
+                "default_keyfile    = {test_cert_store_root}/cakey.pem\n",
+                "distinguished_name = ca_distinguished_name\n",
+                "x509_extensions    = ca_extensions\n",
+                "string_mask        = utf8only\n",
+                "\n",
+                "[ ca_distinguished_name ]\n",
+                "countryName         = Country Name (2 letter code)\n",
+                "countryName_default = {country_name}\n",
+                "\n",
+                "stateOrProvinceName         = State or Province Name (full name)\n",
+                "stateOrProvinceName_default = {state_or_province_name}\n",
+                "\n",
+                "localityName                = Locality Name (eg, city)\n",
+                "localityName_default        = {locality_name}\n",
+                "\n",
+                "organizationName            = Organization Name (eg, company)\n",
+                "organizationName_default    = {organization_name}\n",
+                "\n",
+                "organizationalUnitName         = Organizational Unit (eg, division)\n",
+                "organizationalUnitName_default = {organizational_unit_name}\n",
+                "\n",
+                "commonName         = Common Name (e.g. server FQDN or YOUR name)\n",
+                "commonName_default = {common_name}\n",
+                "\n",
+                "emailAddress         = Email Address\n",
+                "emailAddress_default = {email_address}\n",
+                "\n",
+                "[ ca_extensions ]\n",
+                "subjectKeyIdentifier   = hash\n",
+                "authorityKeyIdentifier = keyid:always, issuer\n",
+                "basicConstraints       = critical, CA:true\n",
+                "keyUsage               = keyCertSign, cRLSign\n",
+                "\"\"\"\n",
+                "\n",
+                "save_file(\"ca.openssl.cnf\", certificate)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get name of the \u2018Running\u2019 `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place the name  of the 'Running' controller pod in variable `controller`\n",
+                "\n",
+                "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n",
+                "\n",
+                "print(f\"Controller pod name: {controller}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Create folder on controller to hold Test Certificates"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"mkdir -p {test_cert_store_root}\" ')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Copy certificate configuration to `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "\n",
+                "cwd = os.getcwd()\n",
+                "os.chdir(temp_dir) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n",
+                "\n",
+                "run(f'kubectl cp ca.openssl.cnf {controller}:{test_cert_store_root}/ca.openssl.cnf -c controller -n {namespace}')\n",
+                "\n",
+                "os.chdir(cwd)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Generate certificate"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "cmd = f\"openssl req -x509 -config {test_cert_store_root}/ca.openssl.cnf -newkey rsa:2048 -sha256 -nodes -days {days} -out {test_cert_store_root}/cacert.pem -outform PEM -subj '/C={country_name}/ST={state_or_province_name}/L={locality_name}/O={organization_name}/OU={organizational_unit_name}/CN={common_name}'\"\n",
+                "\n",
+                "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Clean up temporary directory for staging configuration files"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Delete the temporary directory used to hold configuration files\n",
+                "\n",
+                "import shutil\n",
+                "\n",
+                "shutil.rmtree(temp_dir)\n",
+                "\n",
+                "print(f'Temporary directory deleted: {temp_dir}')"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "Related\n",
+                "-------\n",
+                "\n",
+                "-   [CER002 - Download existing Root CA\n",
+                "    certificate](../cert-management/cer002-download-existing-root-ca.ipynb)\n",
+                "\n",
+                "-   [CER003 - Upload existing Root CA\n",
+                "    certificate](../cert-management/cer003-upload-existing-root-ca.ipynb)\n",
+                "\n",
+                "-   [CER010 - Install generated Root CA\n",
+                "    locally](../cert-management/cer010-install-generated-root-ca-locally.ipynb)"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": true
+        }
+    }
+}

+ 564 - 0
Big-Data-Clusters/CU8/Public/content/cert-management/cer002-download-existing-root-ca.ipynb

@@ -0,0 +1,564 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "CER002 - Download existing Root CA certificate\n",
+                "==============================================\n",
+                "\n",
+                "Use this notebook to download a generated Root CA certificate from a\n",
+                "cluster that installed one using:\n",
+                "\n",
+                "-   [CER001 - Generate a Root CA\n",
+                "    certificate](../cert-management/cer001-create-root-ca.ipynb)\n",
+                "\n",
+                "And then to upload the generated Root CA to another cluster use:\n",
+                "\n",
+                "-   [CER003 - Upload existing Root CA\n",
+                "    certificate](../cert-management/cer003-upload-existing-root-ca.ipynb)\n",
+                "\n",
+                "If needed, use these notebooks to view and set the Kubernetes\n",
+                "configuration context appropriately to enable downloading the Root CA\n",
+                "from a Big Data Cluster in one Kubernetes cluster, and to upload it to a\n",
+                "Big Data Cluster in another Kubernetes cluster.\n",
+                "\n",
+                "-   [TSG010 - Get configuration\n",
+                "    contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb)\n",
+                "-   [SOP011 - Set kubernetes configuration\n",
+                "    context](../common/sop011-set-kubernetes-context.ipynb)\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "local_folder_name = \"mssql-cluster-root-ca\"\n",
+                "\n",
+                "test_cert_store_root = \"/var/opt/secrets/test-certificates\""
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"cer002-download-existing-root-ca.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n",
+                "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n",
+                "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the Kubernetes namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster use the kubectl command line\n",
+                "interface .\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n",
+                "    except:\n",
+                "        from IPython.display import Markdown\n",
+                "        print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'.  SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get name of the \u2018Running\u2019 `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place the name  of the 'Running' controller pod in variable `controller`\n",
+                "\n",
+                "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n",
+                "\n",
+                "print(f\"Controller pod name: {controller}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Create a temporary folder to hold Root CA certificate"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "import tempfile\n",
+                "import shutil\n",
+                "\n",
+                "path = os.path.join(tempfile.gettempdir(), local_folder_name)\n",
+                "\n",
+                "if os.path.isdir(path):\n",
+                "  shutil.rmtree(path)\n",
+                "\n",
+                "os.mkdir(path)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Copy Root CA certificate from `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "\n",
+                "cwd = os.getcwd()\n",
+                "os.chdir(path) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n",
+                "\n",
+                "run(f'kubectl cp {controller}:{test_cert_store_root}/cacert.pem cacert.pem -c controller -n {namespace}')\n",
+                "run(f'kubectl cp {controller}:{test_cert_store_root}/cakey.pem cakey.pem -c controller -n {namespace}')\n",
+                "\n",
+                "os.chdir(cwd)"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "Related\n",
+                "-------\n",
+                "\n",
+                "-   [CER001 - Generate a Root CA\n",
+                "    certificate](../cert-management/cer001-create-root-ca.ipynb)\n",
+                "\n",
+                "-   [CER003 - Upload existing Root CA\n",
+                "    certificate](../cert-management/cer003-upload-existing-root-ca.ipynb)\n",
+                "\n",
+                "-   [CER010 - Install generated Root CA\n",
+                "    locally](../cert-management/cer010-install-generated-root-ca-locally.ipynb)"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": true
+        }
+    }
+}

+ 587 - 0
Big-Data-Clusters/CU8/Public/content/cert-management/cer003-upload-existing-root-ca.ipynb

@@ -0,0 +1,587 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "CER003 - Upload existing Root CA certificate\n",
+                "============================================\n",
+                "\n",
+                "Use this notebook to upload a Root CA certificate to a cluster that was\n",
+                "downloaded to this machine using:\n",
+                "\n",
+                "-   [CER002 - Download existing Root CA\n",
+                "    certificate](../cert-management/cer002-download-existing-root-ca.ipynb)\n",
+                "\n",
+                "If needed, use these notebooks to view and set the Kubernetes\n",
+                "configuration context appropriately to enable downloading the Root CA\n",
+                "from a Big Data Cluster in one Kubernetes cluster, and to upload it to a\n",
+                "Big Data Cluster in another Kubernetes cluster.\n",
+                "\n",
+                "-   [TSG010 - Get configuration\n",
+                "    contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb)\n",
+                "-   [SOP011 - Set kubernetes configuration\n",
+                "    context](../common/sop011-set-kubernetes-context.ipynb)\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "local_folder_name = \"mssql-cluster-root-ca\"\n",
+                "\n",
+                "test_cert_store_root = \"/var/opt/secrets/test-certificates\""
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"cer003-upload-existing-root-ca.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n",
+                "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n",
+                "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the Kubernetes namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster use the kubectl command line\n",
+                "interface .\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n",
+                "    except:\n",
+                "        from IPython.display import Markdown\n",
+                "        print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'.  SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get name of the \u2018Running\u2019 `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place the name  of the 'Running' controller pod in variable `controller`\n",
+                "\n",
+                "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n",
+                "\n",
+                "print(f\"Controller pod name: {controller}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Set temporary folder to hold Root CA certificate"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "import tempfile\n",
+                "\n",
+                "path = os.path.join(tempfile.gettempdir(), local_folder_name)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Create folder on `controller` to hold Root CA certificate"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"mkdir -p {test_cert_store_root}\" ')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Copy Root CA certificate to `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "\n",
+                "cwd = os.getcwd()\n",
+                "os.chdir(path) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n",
+                "\n",
+                "run(f'kubectl cp cacert.pem {controller}:{test_cert_store_root}/cacert.pem -c controller -n {namespace}')\n",
+                "run(f'kubectl cp cakey.pem {controller}:{test_cert_store_root}/cakey.pem -c controller -n {namespace}')\n",
+                "\n",
+                "os.chdir(cwd)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Delete the temporary folder holding the Root CA certificate"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import shutil\n",
+                "\n",
+                "shutil.rmtree(path)"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "Related\n",
+                "-------\n",
+                "\n",
+                "-   [CER001 - Generate a Root CA\n",
+                "    certificate](../cert-management/cer001-create-root-ca.ipynb)\n",
+                "\n",
+                "-   [CER002 - Download existing Root CA\n",
+                "    certificate](../cert-management/cer002-download-existing-root-ca.ipynb)\n",
+                "\n",
+                "-   [CER010 - Install generated Root CA\n",
+                "    locally](../cert-management/cer010-install-generated-root-ca-locally.ipynb)"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": true
+        }
+    }
+}

+ 707 - 0
Big-Data-Clusters/CU8/Public/content/cert-management/cer004-download-upload-existing-root-ca.ipynb

@@ -0,0 +1,707 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "CER004 - Download and Upload existing Root CA certificate\n",
+                "=========================================================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "k8s_context_source = ''\n",
+                "k8s_context_destination = ''\n",
+                "\n",
+                "local_folder_name = \"mssql-cluster-root-ca\"\n",
+                "\n",
+                "test_cert_store_root = \"/var/opt/secrets/test-certificates\""
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"cer004-download-upload-existing-root-ca.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n",
+                "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n",
+                "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Set Kubernetes context to source cluster"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(f'kubectl config use-context {k8s_context_source}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the Kubernetes namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster use the kubectl command line\n",
+                "interface .\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n",
+                "    except:\n",
+                "        from IPython.display import Markdown\n",
+                "        print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'.  SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get name of the \u2018Running\u2019 `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place the name  of the 'Running' controller pod in variable `controller`\n",
+                "\n",
+                "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n",
+                "\n",
+                "print(f\"Controller pod name: {controller}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Create a temporary folder to hold Root CA certificate"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "import tempfile\n",
+                "import shutil\n",
+                "\n",
+                "path = os.path.join(tempfile.gettempdir(), local_folder_name)\n",
+                "\n",
+                "if os.path.isdir(path):\n",
+                "  shutil.rmtree(path)\n",
+                "\n",
+                "os.mkdir(path)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Copy Root CA certificate from `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "\n",
+                "cwd = os.getcwd()\n",
+                "os.chdir(path) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n",
+                "\n",
+                "run(f'kubectl cp {controller}:{test_cert_store_root}/cacert.pem cacert.pem -c controller -n {namespace}')\n",
+                "run(f'kubectl cp {controller}:{test_cert_store_root}/cakey.pem cakey.pem -c controller -n {namespace}')\n",
+                "\n",
+                "os.chdir(cwd)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Set Kubernetes context to destination cluster"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(f'kubectl config use-context {k8s_context_destination}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the Kubernetes namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster use the kubectl command line\n",
+                "interface .\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n",
+                "    except:\n",
+                "        from IPython.display import Markdown\n",
+                "        print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'.  SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get name of the \u2018Running\u2019 `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place the name  of the 'Running' controller pod in variable `controller`\n",
+                "\n",
+                "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n",
+                "\n",
+                "print(f\"Controller pod name: {controller}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Create folder on `controller` to hold Root CA certificate"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"mkdir -p {test_cert_store_root}\" ')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Copy Root CA certificate to `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "\n",
+                "cwd = os.getcwd()\n",
+                "os.chdir(path) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n",
+                "\n",
+                "run(f'kubectl cp cacert.pem {controller}:{test_cert_store_root}/cacert.pem -c controller -n {namespace}')\n",
+                "run(f'kubectl cp cakey.pem {controller}:{test_cert_store_root}/cakey.pem -c controller -n {namespace}')\n",
+                "\n",
+                "os.chdir(cwd)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Delete the temporary folder holding the Root CA certificate"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import shutil\n",
+                "\n",
+                "shutil.rmtree(path)"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "Related\n",
+                "-------\n",
+                "\n",
+                "-   [CER001 - Generate a Root CA\n",
+                "    certificate](../cert-management/cer001-create-root-ca.ipynb)\n",
+                "\n",
+                "-   [CER002 - Download existing Root CA\n",
+                "    certificate](../cert-management/cer002-download-existing-root-ca.ipynb)\n",
+                "\n",
+                "-   [CER010 - Install generated Root CA\n",
+                "    locally](../cert-management/cer010-install-generated-root-ca-locally.ipynb)"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": true
+        }
+    }
+}

+ 888 - 0
Big-Data-Clusters/CU8/Public/content/cert-management/cer005-install-existing-root-ca.ipynb

@@ -0,0 +1,888 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "CER005 - Install new Root CA certificate\n",
+                "========================================\n",
+                "\n",
+                "Use this notebook to install a new Root CA certificate to a cluster.\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "local_certificate_dir = \"mssql-cluster-root-ca\"\n",
+                "\n",
+                "ca_certificate_file_name = \"cacert.pem\"\n",
+                "target_ca_certificate_file_name = \"cacert.pem\"\n",
+                "\n",
+                "test_cert_store_root = \"/var/opt/secrets/test-certificates\"\n",
+                "timeout = 600  # amount of time to wait before cluster is healthy:  default to 10 minutes\n",
+                "check_interval = 10  # amount of time between health checks - default 10 seconds\n",
+                "min_pod_count = 10  # minimum number of healthy pods required to assert health"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"cer005-install-existing-root-ca.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n",
+                "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n",
+                "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Helper functions for waiting for the cluster to become healthy"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "import threading\n",
+                "import time\n",
+                "import sys\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "isRunning = True\n",
+                "\n",
+                "def all_containers_ready(pod):\n",
+                "    \"\"\"helper method returns true if all the containers within the given pod are ready\n",
+                "\n",
+                "    Arguments:\n",
+                "        pod {v1Pod} -- Metadata retrieved from the api call to.\n",
+                "    \"\"\"\n",
+                "         \n",
+                "    return all(map(lambda c: c.ready is True, pod.status.container_statuses))\n",
+                "\n",
+                "\n",
+                "def pod_is_ready(pod):\n",
+                "    \"\"\"tests that the pod, and all containers are ready\n",
+                "\n",
+                "    Arguments:\n",
+                "        pod {v1Pod} -- Metadata retrieved from api call.\n",
+                "    \"\"\"\n",
+                "\n",
+                "    return \"job-name\" in pod.metadata.labels or (pod.status.phase == \"Running\" and all_containers_ready(pod))\n",
+                "\n",
+                "\n",
+                "def waitReady():\n",
+                "    \"\"\"Waits for all pods, and containers to become ready.\n",
+                "    \"\"\"\n",
+                "    while isRunning:\n",
+                "        try:\n",
+                "            time.sleep(check_interval)\n",
+                "            pods = get_pods()\n",
+                "            allReady = len(pods.items) >= min_pod_count and all(map(pod_is_ready, pods.items))\n",
+                "\n",
+                "            if allReady:\n",
+                "                return True\n",
+                "            else:\n",
+                "                display(Markdown(get_pod_failures(pods)))\n",
+                "                display(Markdown(f\"cluster not healthy, rechecking in {check_interval} seconds.\"))\n",
+                "        except Exception as ex:\n",
+                "            last_error_message = str(ex)\n",
+                "            display(Markdown(last_error_message))\n",
+                "            time.sleep(check_interval)\n",
+                "\n",
+                "def get_pod_failures(pods=None):\n",
+                "    \"\"\"Returns a status message for any pods that are not ready.\n",
+                "    \"\"\"\n",
+                "    results = \"\"\n",
+                "    if not pods:\n",
+                "        pods = get_pods()\n",
+                "\n",
+                "    for pod in pods.items:\n",
+                "        if \"job-name\" not in pod.metadata.labels:\n",
+                "            if pod.status and pod.status.container_statuses:\n",
+                "                for container in filter(lambda c: c.ready is False, pod.status.container_statuses):\n",
+                "                    results = results + \"Container {0} in Pod {1} is not ready. Reported status: {2} <br/>\".format(container.name, pod.metadata.name, container.state)       \n",
+                "            else:\n",
+                "                results = results + \"Pod {0} is not ready.  <br/>\".format(pod.metadata.name)\n",
+                "    return results\n",
+                "\n",
+                "\n",
+                "def get_pods():\n",
+                "    \"\"\"Returns a list of pods by namespace, or all namespaces if no namespace is specified\n",
+                "    \"\"\"\n",
+                "    pods = None\n",
+                "    if namespace is not None:\n",
+                "        display(Markdown(f'Checking namespace {namespace}'))\n",
+                "        pods = api.list_namespaced_pod(namespace, _request_timeout=30) \n",
+                "    else:\n",
+                "        display(Markdown('Checking all namespaces'))\n",
+                "        pods = api.list_pod_for_all_namespaces(_request_timeout=30)\n",
+                "    return pods\n",
+                "\n",
+                "def wait_for_cluster_healthy():\n",
+                "    isRunning = True\n",
+                "    mt = threading.Thread(target=waitReady)\n",
+                "    mt.start()\n",
+                "    mt.join(timeout=timeout)\n",
+                "\n",
+                "    if mt.isAlive():\n",
+                "      raise SystemExit(\"Timeout waiting for all cluster to be healthy.\")\n",
+                "      \n",
+                "    isRunning = False"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the Kubernetes namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster use the kubectl command line\n",
+                "interface .\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n",
+                "    except:\n",
+                "        from IPython.display import Markdown\n",
+                "        print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'.  SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get name of the \u2018Running\u2019 `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place the name  of the 'Running' controller pod in variable `controller`\n",
+                "\n",
+                "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n",
+                "\n",
+                "print(f\"Controller pod name: {controller}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Add Root CA certificate to config map"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "import tempfile\n",
+                "from pathlib import Path\n",
+                "\n",
+                "cwd = os.getcwd()\n",
+                "path = os.path.join(tempfile.gettempdir(), local_certificate_dir)\n",
+                "os.chdir(path)\n",
+                "\n",
+                "bdc_local_cert_name = \"cluster-ca-certificate.crt\"\n",
+                "\n",
+                "run(f'kubectl cp {controller}:/var/run/configmaps/cluster/..data/cluster-ca-certificate.crt {bdc_local_cert_name} -c controller -n {namespace}')\n",
+                "\n",
+                "f = open(bdc_local_cert_name, \"r\")\n",
+                "currentCaCertficates = f.read()\n",
+                "currentCaCertficates = re.sub('(\\n)', r'\\\\r\\\\n', currentCaCertficates)\n",
+                "\n",
+                "f = open(ca_certificate_file_name, \"r\")\n",
+                "rootCA = f.read()\n",
+                "rootCA = re.sub('(\\n)', r'\\\\r\\\\n', rootCA)\n",
+                "configmap_certificate_name = Path(ca_certificate_file_name).stem + \".crt\"\n",
+                "\n",
+                "patchCmd = f'kubectl patch configmap cluster-configmap -n {namespace} --type merge -p \\'{{\"data\":{{\"cluster-ca-certificate.crt\":\"{currentCaCertficates}{rootCA}\", \"{configmap_certificate_name}\":\"{rootCA}\"}}}}\\''\n",
+                "\n",
+                "run(patchCmd)\n",
+                "\n",
+                "os.chdir(cwd)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Restart pods to pick up new CA certificate."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "pods = run(f'kubectl get pods -n {namespace} --selector role!=master-pool,role!=controller,app!=clustertest --output=jsonpath={{.items[*].metadata.name}}', return_output=True)\n",
+                "\n",
+                "for pod in pods.split(' '):\n",
+                "    run(f'kubectl delete pod {pod} -n {namespace}')\n",
+                "    wait_for_cluster_healthy()"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the name of the `master` `pods`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place the name of the master pods in variable `pods`\n",
+                "\n",
+                "podNames = run(f'kubectl get pod --selector=app=master -n {namespace} -o jsonpath={{.items[*].metadata.name}}', return_output=True)\n",
+                "pods = podNames.split(\" \")\n",
+                "\n",
+                "print(f\"Master pod names: {pods}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Restart Pods"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import threading\n",
+                "import time\n",
+                "\n",
+                "if len(pods) == 1:\n",
+                "  # One master pod indicates non-HA environment, just delete it\n",
+                "  run(f'kubectl delete pod {pods[0]} -n {namespace}')\n",
+                "else:\n",
+                "  # HA setup, delete secondaries before primary\n",
+                "  timeout_s = 300\n",
+                "  check_interval_s = 10\n",
+                "\n",
+                "  master_primary_svc_ip = run(f'kubectl get service master-p-svc -n {namespace} -o jsonpath={{.spec.clusterIP}}', return_output=True) \n",
+                "  master_password = run(f'kubectl exec -it master-0 -c mssql-server -n {namespace} -- cat /var/run/secrets/credentials/pool/mssql-system-password', return_output=True) \n",
+                "\n",
+                "  def run_query(cmd):\n",
+                "    run(f\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- /opt/mssql-tools/bin/sqlcmd -S {master_primary_svc_ip} -U system -P {master_password} -h -1 -q \\\"SET NOCOUNT ON; {cmd}\\\" -o /tmp/res.csv\")\n",
+                "    res=run(f\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- head -n -2 /tmp/res.csv \", return_output=True)\n",
+                "    return res\n",
+                "\n",
+                "  def get_number_of_unsynchronized_replicas():\n",
+                "    cmd = 'select count(*) from sys.dm_hadr_database_replica_states where synchronization_state <> 2'\n",
+                "    res = run_query(cmd)\n",
+                "    return int(res.split()[0])\n",
+                "\n",
+                "  def get_primary_replica():\n",
+                "    cmd = 'select distinct replica_server_name from sys.dm_hadr_database_replica_states s join sys.availability_replicas r on s.replica_id = r.replica_id where is_primary_replica = 1'\n",
+                "    res = run_query(cmd)\n",
+                "    return res.split()[0]\n",
+                "\n",
+                "  def get_secondary_replicas():\n",
+                "    cmd = 'select distinct replica_server_name from sys.dm_hadr_database_replica_states s join sys.availability_replicas r on s.replica_id = r.replica_id where is_primary_replica = 0'\n",
+                "    res = run_query(cmd)\n",
+                "    rows = res.strip().split(\"\\n\")\n",
+                "    res = []\n",
+                "    for row in rows:\n",
+                "      res.append(row.strip())\n",
+                "    return res\n",
+                "\n",
+                "  def all_replicas_syncrhonized():\n",
+                "    while True:\n",
+                "      time.sleep(check_interval_s)\n",
+                "      unsynchronized_replicas_cnt=get_number_of_unsynchronized_replicas()\n",
+                "      if unsynchronized_replicas_cnt == 0:\n",
+                "        return True\n",
+                "\n",
+                "  def wait_for_replicas_to_synchronize():\n",
+                "    mt = threading.Thread(target=all_replicas_syncrhonized)\n",
+                "    mt.start()\n",
+                "    mt.join(timeout=timeout_s)\n",
+                "\n",
+                "    if mt.isAlive():\n",
+                "      raise SystemExit(\"Timeout waiting for all replicas to be synchronized.\")\n",
+                "\n",
+                "  secondary_replicas = get_secondary_replicas()\n",
+                "  for replica in secondary_replicas:\n",
+                "    wait_for_replicas_to_synchronize()\n",
+                "    run(f'kubectl delete pod {replica} -n {namespace}')\n",
+                "\n",
+                "  primary_replica = get_primary_replica() \n",
+                "  wait_for_replicas_to_synchronize()\n",
+                "\n",
+                "  key = \"/var/run/secrets/certificates/sqlha/mssql-ha-operator-controller-client/mssql-ha-operator-controller-client-privatekey.pem\"\n",
+                "  cert = \"/var/run/secrets/certificates/sqlha/mssql-ha-operator-controller-client/mssql-ha-operator-controller-client-certificate.pem\"\n",
+                "  content_type_header = \"Content-Type: application/json\"\n",
+                "  authorization_header = \"Authorization: Certificate\"\n",
+                "  data = f'{{\"TargetReplicaName\":\"{secondary_replicas[0]}\",\"ForceFailover\":\"false\"}}'\n",
+                "  request_url = f'https://controller-svc:443/internal/api/v1/bdc/services/sql/resources/master/availabilitygroups/containedag/failover'\n",
+                "\n",
+                "  manual_failover_api_command = f\"curl -sS --key {key} --cert  {cert} -X POST --header '{content_type_header}'  --header '{authorization_header}' --data '{data}' {request_url}\"\n",
+                "\n",
+                "  operator_pod = run(f'kubectl get pod --selector=app=mssql-operator -n {namespace} -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n",
+                "\n",
+                "  run(f'kubectl exec {operator_pod} -c mssql-ha-operator -n {namespace} -- {manual_failover_api_command}')\n",
+                " \n",
+                "  wait_for_replicas_to_synchronize()\n",
+                "  \n",
+                "  run(f'kubectl delete pod {primary_replica} -n {namespace}')\n",
+                "  wait_for_replicas_to_synchronize()\n",
+                "\n",
+                "  run(f\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- rm -rf /tmp/res.csv \")\n"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get name of the \u2018Running\u2019 `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place the name  of the 'Running' controller pod in variable `controller`\n",
+                "\n",
+                "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n",
+                "\n",
+                "print(f\"Controller pod name: {controller}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Restart controller pod."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(f'kubectl delete pod {controller} -n {namespace}')\n",
+                "wait_for_cluster_healthy()"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Update ca certificates on clustertest pod"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "pods = api.list_namespaced_pod(namespace, _request_timeout=30)\n",
+                "\n",
+                "if any(pod.metadata.name == \"clustertest\" for pod in pods.items):\n",
+                "    run(f'kubectl exec clustertest -c mssql-test -n {namespace} -- update-ca-certificates')"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "Related\n",
+                "-------\n",
+                "\n",
+                "-   [CER001 - Generate a Root CA\n",
+                "    certificate](../cert-management/cer001-create-root-ca.ipynb)\n",
+                "\n",
+                "-   [CER010 - Install generated Root CA\n",
+                "    locally](../cert-management/cer010-install-generated-root-ca-locally.ipynb)"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": true,
+            "timeout": "3600"
+        }
+    }
+}

+ 612 - 0
Big-Data-Clusters/CU8/Public/content/cert-management/cer010-install-generated-root-ca-locally.ipynb

@@ -0,0 +1,612 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "CER010 - Install generated Root CA locally\n",
+                "==========================================\n",
+                "\n",
+                "This notebook will copy locally (from a Big Data Cluster) the generated\n",
+                "Root CA certificate that was installed using either:\n",
+                "\n",
+                "-   [CER001 - Generate a Root CA\n",
+                "    certificate](../cert-management/cer001-create-root-ca.ipynb)\n",
+                "-   [CER003 - Upload existing Root CA\n",
+                "    certificate](../cert-management/cer003-upload-existing-root-ca.ipynb)\n",
+                "\n",
+                "And then install the Root CA certificate into this machine\u2019s local\n",
+                "certificate store.\n",
+                "\n",
+                "NOTE: A Security Dialog popup will appear, accept this dialog to install\n",
+                "the certificate into the local certificate store.\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "test_cert_store_root = \"/var/opt/secrets/test-certificates\""
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"cer010-install-generated-root-ca-locally.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n",
+                "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n",
+                "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the Kubernetes namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster use the kubectl command line\n",
+                "interface .\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n",
+                "    except:\n",
+                "        from IPython.display import Markdown\n",
+                "        print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'.  SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Create a temporary directory to stage files"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Create a temporary directory to hold configuration files\n",
+                "\n",
+                "import tempfile\n",
+                "\n",
+                "temp_dir = tempfile.mkdtemp()\n",
+                "\n",
+                "print(f\"Temporary directory created: {temp_dir}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get name of the \u2018Running\u2019 `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place the name  of the 'Running' controller pod in variable `controller`\n",
+                "\n",
+                "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n",
+                "\n",
+                "print(f\"Controller pod name: {controller}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Copy certficates local"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "\n",
+                "cwd = os.getcwd()\n",
+                "os.chdir(temp_dir) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n",
+                "\n",
+                "run(f'kubectl cp {controller}:{test_cert_store_root}/cacert.pem cacert.crt -c controller -n {namespace}')\n",
+                "\n",
+                "# Verify the cacert.cert file is actually there (there is a bug in earlier versions of kubectl)\n",
+                "#\n",
+                "file_exists = os.path.isfile('cacert.crt')\n",
+                "\n",
+                "if not file_exists:\n",
+                "  raise SystemExit(\"File `cacert.crt` does not exist (after `kubectl cp`). This can happen if running older versions of `kubectl`, such as the v1.13 release, run `kubectl version` and upgrade if running an older version of `kubectl`.  `kubectl` v1.18 does work.\")\n",
+                "\n",
+                "os.chdir(cwd)\n",
+                "\n",
+                "print(f'Certificates copied locally to: {temp_dir}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Install the Root CA\n",
+                "\n",
+                "Documented here:\n",
+                "\n",
+                "-   https://docs.microsoft.com/en-us/windows-hardware/drivers/install/using-certmgr-to-install-test-certificates-on-a-test-computer\n",
+                "\n",
+                "TODO: Add Mac (and Linux) support here!"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(f'powershell -Command \"Import-Certificate -FilePath {os.path.join(temp_dir, \"cacert.crt\")} -CertStoreLocation cert:\\\\CurrentUser\\\\Root\"')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Clean up temporary directory for staging configuration files"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Delete the temporary directory used to hold configuration files\n",
+                "\n",
+                "import shutil\n",
+                "\n",
+                "shutil.rmtree(temp_dir)\n",
+                "\n",
+                "print(f'Temporary directory deleted: {temp_dir}')"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "Related\n",
+                "-------\n",
+                "\n",
+                "-   [CER020 - Create Management Proxy\n",
+                "    certificate](../cert-management/cer020-create-management-service-proxy-cert.ipynb)\n",
+                "\n",
+                "-   [CER021 - Create Knox\n",
+                "    certificate](../cert-management/cer021-create-knox-cert.ipynb)\n",
+                "\n",
+                "-   [CER022 - Create App Proxy\n",
+                "    certificate](../cert-management/cer022-create-app-proxy-cert.ipynb)"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": true
+        }
+    }
+}

文件差异内容过多而无法显示
+ 402 - 0
Big-Data-Clusters/CU8/Public/content/cert-management/cer020-create-management-service-proxy-cert.ipynb


文件差异内容过多而无法显示
+ 402 - 0
Big-Data-Clusters/CU8/Public/content/cert-management/cer021-create-knox-cert.ipynb


文件差异内容过多而无法显示
+ 402 - 0
Big-Data-Clusters/CU8/Public/content/cert-management/cer022-create-app-proxy-cert.ipynb


文件差异内容过多而无法显示
+ 403 - 0
Big-Data-Clusters/CU8/Public/content/cert-management/cer023-create-master-certs.ipynb


文件差异内容过多而无法显示
+ 407 - 0
Big-Data-Clusters/CU8/Public/content/cert-management/cer024-create-controller-cert.ipynb


+ 818 - 0
Big-Data-Clusters/CU8/Public/content/cert-management/cer030-sign-service-proxy-generated-cert.ipynb

@@ -0,0 +1,818 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "CER030 - Sign Management Proxy certificate with generated CA\n",
+                "============================================================\n",
+                "\n",
+                "This notebook signs the certificate created using:\n",
+                "\n",
+                "-   [CER020 - Create Management Proxy\n",
+                "    certificate](../cert-management/cer020-create-management-service-proxy-cert.ipynb)\n",
+                "\n",
+                "with the Big Data Cluster CA or the generated Root CA Certificate,\n",
+                "created using either:\n",
+                "\n",
+                "-   [CER001 - Generate a Root CA\n",
+                "    certificate](../cert-management/cer001-create-root-ca.ipynb)\n",
+                "-   [CER003 - Upload existing Root CA\n",
+                "    certificate](../cert-management/cer003-upload-existing-root-ca.ipynb)\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "import getpass\n",
+                "\n",
+                "app_name = \"mgmtproxy\"\n",
+                "scaledset_name = \"mgmtproxy\"\n",
+                "container_name = \"service-proxy\"\n",
+                "prefix_keyfile_name = \"service-proxy\"\n",
+                "common_name = \"mgmtproxy-svc\"\n",
+                "\n",
+                "country_name = \"US\"\n",
+                "state_or_province_name = \"Illinois\"\n",
+                "locality_name = \"Chicago\"\n",
+                "organization_name = \"Contoso\"\n",
+                "organizational_unit_name = \"Finance\"\n",
+                "email_address = f\"{getpass.getuser().lower()}@contoso.com\"\n",
+                "\n",
+                "ssl_configuration_file = \"ca.openssl.cnf\"\n",
+                "\n",
+                "days = \"398\" # the number of days to certify the certificate for\n",
+                "\n",
+                "certificate_filename = \"cacert.pem\"\n",
+                "private_key_filename = \"cakey.pem\"\n",
+                "use_bdc_ca = False\n",
+                "\n",
+                "test_cert_store_root = \"/var/opt/secrets/test-certificates\""
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"cer030-sign-service-proxy-generated-cert.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n",
+                "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n",
+                "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the Kubernetes namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster use the kubectl command line\n",
+                "interface .\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n",
+                "    except:\n",
+                "        from IPython.display import Markdown\n",
+                "        print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'.  SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Create a temporary directory to stage files"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Create a temporary directory to hold configuration files\n",
+                "\n",
+                "import tempfile\n",
+                "\n",
+                "temp_dir = tempfile.mkdtemp()\n",
+                "\n",
+                "print(f\"Temporary directory created: {temp_dir}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Helper function to save configuration files to disk"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define helper function 'save_file' to save configuration files to the temporary directory created above\n",
+                "import os\n",
+                "import io\n",
+                "\n",
+                "def save_file(filename, contents):\n",
+                "    with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n",
+                "      text_file.write(contents)\n",
+                "\n",
+                "      print(\"File saved: \" + os.path.join(temp_dir, filename))\n",
+                "\n",
+                "print(\"Function `save_file` defined successfully.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get name of the \u2018Running\u2019 `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place the name  of the 'Running' controller pod in variable `controller`\n",
+                "\n",
+                "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n",
+                "\n",
+                "print(f\"Controller pod name: {controller}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Create Signing Request configuration file"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "certificate = f\"\"\"\n",
+                "[ ca ]\n",
+                "default_ca    = CA_default      # The default ca section\n",
+                "\n",
+                "[ CA_default ]\n",
+                "default_days     = 1000         # How long to certify for\n",
+                "default_crl_days = 30           # How long before next CRL\n",
+                "default_md       = sha256       # Use public key default MD\n",
+                "preserve         = no           # Keep passed DN ordering\n",
+                "\n",
+                "x509_extensions = ca_extensions # The extensions to add to the cert\n",
+                "\n",
+                "email_in_dn     = no            # Don't concat the email in the DN\n",
+                "copy_extensions = copy          # Required to copy SANs from CSR to cert\n",
+                "\n",
+                "base_dir      = {test_cert_store_root}\n",
+                "certificate   = $base_dir/{certificate_filename}   # The CA certifcate\n",
+                "private_key   = $base_dir/{private_key_filename}    # The CA private key\n",
+                "new_certs_dir = $base_dir              # Location for new certs after signing\n",
+                "database      = $base_dir/index.txt    # Database index file\n",
+                "serial        = $base_dir/serial.txt   # The current serial number\n",
+                "\n",
+                "unique_subject = no  # Set to 'no' to allow creation of\n",
+                "                     # several certificates with same subject.\n",
+                "\n",
+                "[ req ]\n",
+                "default_bits       = 2048\n",
+                "default_keyfile    = {test_cert_store_root}/{private_key_filename}\n",
+                "distinguished_name = ca_distinguished_name\n",
+                "x509_extensions    = ca_extensions\n",
+                "string_mask        = utf8only\n",
+                "\n",
+                "[ ca_distinguished_name ]\n",
+                "countryName         = Country Name (2 letter code)\n",
+                "countryName_default = {country_name}\n",
+                "\n",
+                "stateOrProvinceName         = State or Province Name (full name)\n",
+                "stateOrProvinceName_default = {state_or_province_name}\n",
+                "\n",
+                "localityName                = Locality Name (eg, city)\n",
+                "localityName_default        = {locality_name}\n",
+                "\n",
+                "organizationName            = Organization Name (eg, company)\n",
+                "organizationName_default    = {organization_name}\n",
+                "\n",
+                "organizationalUnitName         = Organizational Unit (eg, division)\n",
+                "organizationalUnitName_default = {organizational_unit_name}\n",
+                "\n",
+                "commonName         = Common Name (e.g. server FQDN or YOUR name)\n",
+                "commonName_default = {common_name}\n",
+                "\n",
+                "emailAddress         = Email Address\n",
+                "emailAddress_default = {email_address}\n",
+                "\n",
+                "[ ca_extensions ]\n",
+                "subjectKeyIdentifier   = hash\n",
+                "authorityKeyIdentifier = keyid:always, issuer\n",
+                "basicConstraints       = critical, CA:true\n",
+                "keyUsage               = keyCertSign, cRLSign\n",
+                "\n",
+                "[ signing_policy ]\n",
+                "countryName            = optional\n",
+                "stateOrProvinceName    = optional\n",
+                "localityName           = optional\n",
+                "organizationName       = optional\n",
+                "organizationalUnitName = optional\n",
+                "commonName             = supplied\n",
+                "emailAddress           = optional\n",
+                "\n",
+                "[ signing_req ]\n",
+                "subjectKeyIdentifier   = hash\n",
+                "authorityKeyIdentifier = keyid,issuer\n",
+                "basicConstraints       = CA:FALSE\n",
+                "keyUsage               = digitalSignature, keyEncipherment\n",
+                "\"\"\"\n",
+                "\n",
+                "save_file(ssl_configuration_file, certificate)\n"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Copy certificate configuration to `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "\n",
+                "cwd = os.getcwd()\n",
+                "os.chdir(temp_dir) # Use chdir to workaround kubectl bug on Windows, which incorrectly processes 'c:\\' on kubectl cp cmd line \n",
+                "\n",
+                "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"mkdir -p {test_cert_store_root}/{app_name}\"')\n",
+                "\n",
+                "run(f'kubectl cp {ssl_configuration_file} {controller}:{test_cert_store_root}/{app_name}/{ssl_configuration_file} -c controller -n {namespace}')\n",
+                "\n",
+                "os.chdir(cwd)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Obtain CA certificate and extract the private key from pfx if Big Data Cluster CA is used"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "if use_bdc_ca:\n",
+                "\n",
+                "  cmd = f\"cp /var/run/secrets/certificates/rootca/cluster-ca-certificate.crt {test_cert_store_root}/{certificate_filename}\"\n",
+                "\n",
+                "  run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')\n",
+                "\n",
+                "  cmd = f\"NO_PASSWORD= openssl pkcs12 -in /var/run/secrets/certificates/rootca/cluster-ca-certificate.p12 -out {test_cert_store_root}/{private_key_filename} -nocerts -nodes -password env:NO_PASSWORD\"\n",
+                "\n",
+                "  run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Set next serial number"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"test -f {test_cert_store_root}/index.txt || touch {test_cert_store_root}/index.txt\"')\n",
+                "run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"test -f {test_cert_store_root}/serial.txt || echo '00' > {test_cert_store_root}/serial.txt\" \"\"\")\n",
+                "\n",
+                "current_serial_number = run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"cat {test_cert_store_root}/serial.txt\" \"\"\", return_output=True)\n",
+                "\n",
+                "# The serial number is hex\n",
+                "new_serial_number = int(f\"0x{current_serial_number}\", 0) + 1\n",
+                "\n",
+                "run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"echo '{new_serial_number:02X}' > {test_cert_store_root}/serial.txt\" \"\"\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Create private key and certificate signing request\n",
+                "\n",
+                "Use openssl ca to create a private key and signing request. See:\n",
+                "\n",
+                "-   https://www.openssl.org/docs/man1.0.2/man1/ca.html"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "cmd = f\"openssl ca -notext -batch -config {test_cert_store_root}/{app_name}/ca.openssl.cnf -policy signing_policy -extensions signing_req -out {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -infiles {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-signingrequest.csr\"\n",
+                "\n",
+                "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Display certificate\n",
+                "\n",
+                "Use openssl x509 to display the certificate, so it can be visually\n",
+                "verified to be correct."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "cmd = f\"openssl x509 -in {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -text -noout\"\n",
+                "\n",
+                "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Clean up temporary directory for staging configuration files"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Delete the temporary directory used to hold configuration files\n",
+                "\n",
+                "import shutil\n",
+                "\n",
+                "shutil.rmtree(temp_dir)\n",
+                "\n",
+                "print(f'Temporary directory deleted: {temp_dir}')"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "Related\n",
+                "-------\n",
+                "\n",
+                "-   [CER031 - Sign Knox certificate with generated\n",
+                "    CA](../cert-management/cer031-sign-knox-generated-cert.ipynb)\n",
+                "\n",
+                "-   [CER020 - Create Management Proxy\n",
+                "    certificate](../cert-management/cer020-create-management-service-proxy-cert.ipynb)\n",
+                "\n",
+                "-   [CER040 - Install signed Management Proxy\n",
+                "    certificate](../cert-management/cer040-install-service-proxy-cert.ipynb)"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": true
+        }
+    }
+}

+ 818 - 0
Big-Data-Clusters/CU8/Public/content/cert-management/cer031-sign-knox-generated-cert.ipynb

@@ -0,0 +1,818 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "CER031 - Sign Knox certificate with generated CA\n",
+                "================================================\n",
+                "\n",
+                "This notebook signs the certificate created using:\n",
+                "\n",
+                "-   [CER021 - Create Knox\n",
+                "    certificate](../cert-management/cer021-create-knox-cert.ipynb)\n",
+                "\n",
+                "with the Big Data Cluster CA or the generated Root CA Certificate,\n",
+                "created using either:\n",
+                "\n",
+                "-   [CER001 - Generate a Root CA\n",
+                "    certificate](../cert-management/cer001-create-root-ca.ipynb)\n",
+                "-   [CER003 - Upload existing Root CA\n",
+                "    certificate](../cert-management/cer003-upload-existing-root-ca.ipynb)\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "import getpass\n",
+                "\n",
+                "app_name = \"gateway\"\n",
+                "scaledset_name = \"gateway/pods/gateway-0\"\n",
+                "container_name = \"knox\"\n",
+                "prefix_keyfile_name = \"knox\"\n",
+                "common_name = \"gateway-svc\"\n",
+                "\n",
+                "country_name = \"US\"\n",
+                "state_or_province_name = \"Illinois\"\n",
+                "locality_name = \"Chicago\"\n",
+                "organization_name = \"Contoso\"\n",
+                "organizational_unit_name = \"Finance\"\n",
+                "email_address = f\"{getpass.getuser().lower()}@contoso.com\"\n",
+                "\n",
+                "ssl_configuration_file = \"ca.openssl.cnf\"\n",
+                "\n",
+                "days = \"398\" # the number of days to certify the certificate for\n",
+                "\n",
+                "certificate_filename = \"cacert.pem\"\n",
+                "private_key_filename = \"cakey.pem\"\n",
+                "use_bdc_ca = False\n",
+                "\n",
+                "test_cert_store_root = \"/var/opt/secrets/test-certificates\""
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"cer031-sign-knox-generated-cert.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n",
+                "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n",
+                "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the Kubernetes namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster use the kubectl command line\n",
+                "interface .\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n",
+                "    except:\n",
+                "        from IPython.display import Markdown\n",
+                "        print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'.  SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Create a temporary directory to stage files"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Create a temporary directory to hold configuration files\n",
+                "\n",
+                "import tempfile\n",
+                "\n",
+                "temp_dir = tempfile.mkdtemp()\n",
+                "\n",
+                "print(f\"Temporary directory created: {temp_dir}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Helper function to save configuration files to disk"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define helper function 'save_file' to save configuration files to the temporary directory created above\n",
+                "import os\n",
+                "import io\n",
+                "\n",
+                "def save_file(filename, contents):\n",
+                "    with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n",
+                "      text_file.write(contents)\n",
+                "\n",
+                "      print(\"File saved: \" + os.path.join(temp_dir, filename))\n",
+                "\n",
+                "print(\"Function `save_file` defined successfully.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get name of the \u2018Running\u2019 `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place the name  of the 'Running' controller pod in variable `controller`\n",
+                "\n",
+                "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n",
+                "\n",
+                "print(f\"Controller pod name: {controller}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Create Signing Request configuration file"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "certificate = f\"\"\"\n",
+                "[ ca ]\n",
+                "default_ca    = CA_default      # The default ca section\n",
+                "\n",
+                "[ CA_default ]\n",
+                "default_days     = 1000         # How long to certify for\n",
+                "default_crl_days = 30           # How long before next CRL\n",
+                "default_md       = sha256       # Use public key default MD\n",
+                "preserve         = no           # Keep passed DN ordering\n",
+                "\n",
+                "x509_extensions = ca_extensions # The extensions to add to the cert\n",
+                "\n",
+                "email_in_dn     = no            # Don't concat the email in the DN\n",
+                "copy_extensions = copy          # Required to copy SANs from CSR to cert\n",
+                "\n",
+                "base_dir      = {test_cert_store_root}\n",
+                "certificate   = $base_dir/{certificate_filename}   # The CA certifcate\n",
+                "private_key   = $base_dir/{private_key_filename}    # The CA private key\n",
+                "new_certs_dir = $base_dir              # Location for new certs after signing\n",
+                "database      = $base_dir/index.txt    # Database index file\n",
+                "serial        = $base_dir/serial.txt   # The current serial number\n",
+                "\n",
+                "unique_subject = no  # Set to 'no' to allow creation of\n",
+                "                     # several certificates with same subject.\n",
+                "\n",
+                "[ req ]\n",
+                "default_bits       = 2048\n",
+                "default_keyfile    = {test_cert_store_root}/{private_key_filename}\n",
+                "distinguished_name = ca_distinguished_name\n",
+                "x509_extensions    = ca_extensions\n",
+                "string_mask        = utf8only\n",
+                "\n",
+                "[ ca_distinguished_name ]\n",
+                "countryName         = Country Name (2 letter code)\n",
+                "countryName_default = {country_name}\n",
+                "\n",
+                "stateOrProvinceName         = State or Province Name (full name)\n",
+                "stateOrProvinceName_default = {state_or_province_name}\n",
+                "\n",
+                "localityName                = Locality Name (eg, city)\n",
+                "localityName_default        = {locality_name}\n",
+                "\n",
+                "organizationName            = Organization Name (eg, company)\n",
+                "organizationName_default    = {organization_name}\n",
+                "\n",
+                "organizationalUnitName         = Organizational Unit (eg, division)\n",
+                "organizationalUnitName_default = {organizational_unit_name}\n",
+                "\n",
+                "commonName         = Common Name (e.g. server FQDN or YOUR name)\n",
+                "commonName_default = {common_name}\n",
+                "\n",
+                "emailAddress         = Email Address\n",
+                "emailAddress_default = {email_address}\n",
+                "\n",
+                "[ ca_extensions ]\n",
+                "subjectKeyIdentifier   = hash\n",
+                "authorityKeyIdentifier = keyid:always, issuer\n",
+                "basicConstraints       = critical, CA:true\n",
+                "keyUsage               = keyCertSign, cRLSign\n",
+                "\n",
+                "[ signing_policy ]\n",
+                "countryName            = optional\n",
+                "stateOrProvinceName    = optional\n",
+                "localityName           = optional\n",
+                "organizationName       = optional\n",
+                "organizationalUnitName = optional\n",
+                "commonName             = supplied\n",
+                "emailAddress           = optional\n",
+                "\n",
+                "[ signing_req ]\n",
+                "subjectKeyIdentifier   = hash\n",
+                "authorityKeyIdentifier = keyid,issuer\n",
+                "basicConstraints       = CA:FALSE\n",
+                "keyUsage               = digitalSignature, keyEncipherment\n",
+                "\"\"\"\n",
+                "\n",
+                "save_file(ssl_configuration_file, certificate)\n"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Copy certificate configuration to `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "\n",
+                "cwd = os.getcwd()\n",
+                "os.chdir(temp_dir) # Use chdir to workaround kubectl bug on Windows, which incorrectly processes 'c:\\' on kubectl cp cmd line \n",
+                "\n",
+                "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"mkdir -p {test_cert_store_root}/{app_name}\"')\n",
+                "\n",
+                "run(f'kubectl cp {ssl_configuration_file} {controller}:{test_cert_store_root}/{app_name}/{ssl_configuration_file} -c controller -n {namespace}')\n",
+                "\n",
+                "os.chdir(cwd)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Obtain CA certificate and extract the private key from pfx if Big Data Cluster CA is used"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "if use_bdc_ca:\n",
+                "\n",
+                "  cmd = f\"cp /var/run/secrets/certificates/rootca/cluster-ca-certificate.crt {test_cert_store_root}/{certificate_filename}\"\n",
+                "\n",
+                "  run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')\n",
+                "\n",
+                "  cmd = f\"NO_PASSWORD= openssl pkcs12 -in /var/run/secrets/certificates/rootca/cluster-ca-certificate.p12 -out {test_cert_store_root}/{private_key_filename} -nocerts -nodes -password env:NO_PASSWORD\"\n",
+                "\n",
+                "  run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Set next serial number"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"test -f {test_cert_store_root}/index.txt || touch {test_cert_store_root}/index.txt\"')\n",
+                "run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"test -f {test_cert_store_root}/serial.txt || echo '00' > {test_cert_store_root}/serial.txt\" \"\"\")\n",
+                "\n",
+                "current_serial_number = run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"cat {test_cert_store_root}/serial.txt\" \"\"\", return_output=True)\n",
+                "\n",
+                "# The serial number is hex\n",
+                "new_serial_number = int(f\"0x{current_serial_number}\", 0) + 1\n",
+                "\n",
+                "run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"echo '{new_serial_number:02X}' > {test_cert_store_root}/serial.txt\" \"\"\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Create private key and certificate signing request\n",
+                "\n",
+                "Use openssl ca to create a private key and signing request. See:\n",
+                "\n",
+                "-   https://www.openssl.org/docs/man1.0.2/man1/ca.html"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "cmd = f\"openssl ca -notext -batch -config {test_cert_store_root}/{app_name}/ca.openssl.cnf -policy signing_policy -extensions signing_req -out {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -infiles {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-signingrequest.csr\"\n",
+                "\n",
+                "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Display certificate\n",
+                "\n",
+                "Use openssl x509 to display the certificate, so it can be visually\n",
+                "verified to be correct."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "cmd = f\"openssl x509 -in {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -text -noout\"\n",
+                "\n",
+                "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Clean up temporary directory for staging configuration files"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Delete the temporary directory used to hold configuration files\n",
+                "\n",
+                "import shutil\n",
+                "\n",
+                "shutil.rmtree(temp_dir)\n",
+                "\n",
+                "print(f'Temporary directory deleted: {temp_dir}')"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "Related\n",
+                "-------\n",
+                "\n",
+                "-   [CER032 - Sign App-Proxy certificate with generated\n",
+                "    CA](../cert-management/cer032-sign-app-proxy-generated-cert.ipynb)\n",
+                "\n",
+                "-   [CER021 - Create Knox\n",
+                "    certificate](../cert-management/cer021-create-knox-cert.ipynb)\n",
+                "\n",
+                "-   [CER041 - Install signed Knox\n",
+                "    certificate](../cert-management/cer041-install-knox-cert.ipynb)"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": true
+        }
+    }
+}

+ 818 - 0
Big-Data-Clusters/CU8/Public/content/cert-management/cer032-sign-app-proxy-generated-cert.ipynb

@@ -0,0 +1,818 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "CER032 - Sign App-Proxy certificate with generated CA\n",
+                "=====================================================\n",
+                "\n",
+                "This notebook signs the certificate created using:\n",
+                "\n",
+                "-   [CER022 - Create App Proxy\n",
+                "    certificate](../cert-management/cer022-create-app-proxy-cert.ipynb)\n",
+                "\n",
+                "with the Big Data Cluster CA or the generated Root CA Certificate,\n",
+                "created using either:\n",
+                "\n",
+                "-   [CER001 - Generate a Root CA\n",
+                "    certificate](../cert-management/cer001-create-root-ca.ipynb)\n",
+                "-   [CER003 - Upload existing Root CA\n",
+                "    certificate](../cert-management/cer003-upload-existing-root-ca.ipynb)\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "import getpass\n",
+                "\n",
+                "app_name = \"app-proxy\"\n",
+                "scaledset_name = \"appproxy\"\n",
+                "container_name = \"app-service-proxy\"\n",
+                "prefix_keyfile_name = \"service-proxy\"\n",
+                "common_name = \"appproxy-svc\"\n",
+                "\n",
+                "country_name = \"US\"\n",
+                "state_or_province_name = \"Illinois\"\n",
+                "locality_name = \"Chicago\"\n",
+                "organization_name = \"Contoso\"\n",
+                "organizational_unit_name = \"Finance\"\n",
+                "email_address = f\"{getpass.getuser().lower()}@contoso.com\"\n",
+                "\n",
+                "ssl_configuration_file = \"ca.openssl.cnf\"\n",
+                "\n",
+                "days = \"398\" # the number of days to certify the certificate for\n",
+                "\n",
+                "certificate_filename = \"cacert.pem\"\n",
+                "private_key_filename = \"cakey.pem\"\n",
+                "use_bdc_ca = False\n",
+                "\n",
+                "test_cert_store_root = \"/var/opt/secrets/test-certificates\""
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"cer032-sign-app-proxy-generated-cert.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n",
+                "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n",
+                "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the Kubernetes namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster use the kubectl command line\n",
+                "interface .\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n",
+                "    except:\n",
+                "        from IPython.display import Markdown\n",
+                "        print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'.  SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Create a temporary directory to stage files"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Create a temporary directory to hold configuration files\n",
+                "\n",
+                "import tempfile\n",
+                "\n",
+                "temp_dir = tempfile.mkdtemp()\n",
+                "\n",
+                "print(f\"Temporary directory created: {temp_dir}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Helper function to save configuration files to disk"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define helper function 'save_file' to save configuration files to the temporary directory created above\n",
+                "import os\n",
+                "import io\n",
+                "\n",
+                "def save_file(filename, contents):\n",
+                "    with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n",
+                "      text_file.write(contents)\n",
+                "\n",
+                "      print(\"File saved: \" + os.path.join(temp_dir, filename))\n",
+                "\n",
+                "print(\"Function `save_file` defined successfully.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get name of the \u2018Running\u2019 `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place the name  of the 'Running' controller pod in variable `controller`\n",
+                "\n",
+                "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n",
+                "\n",
+                "print(f\"Controller pod name: {controller}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Create Signing Request configuration file"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "certificate = f\"\"\"\n",
+                "[ ca ]\n",
+                "default_ca    = CA_default      # The default ca section\n",
+                "\n",
+                "[ CA_default ]\n",
+                "default_days     = 1000         # How long to certify for\n",
+                "default_crl_days = 30           # How long before next CRL\n",
+                "default_md       = sha256       # Use public key default MD\n",
+                "preserve         = no           # Keep passed DN ordering\n",
+                "\n",
+                "x509_extensions = ca_extensions # The extensions to add to the cert\n",
+                "\n",
+                "email_in_dn     = no            # Don't concat the email in the DN\n",
+                "copy_extensions = copy          # Required to copy SANs from CSR to cert\n",
+                "\n",
+                "base_dir      = {test_cert_store_root}\n",
+                "certificate   = $base_dir/{certificate_filename}   # The CA certifcate\n",
+                "private_key   = $base_dir/{private_key_filename}    # The CA private key\n",
+                "new_certs_dir = $base_dir              # Location for new certs after signing\n",
+                "database      = $base_dir/index.txt    # Database index file\n",
+                "serial        = $base_dir/serial.txt   # The current serial number\n",
+                "\n",
+                "unique_subject = no  # Set to 'no' to allow creation of\n",
+                "                     # several certificates with same subject.\n",
+                "\n",
+                "[ req ]\n",
+                "default_bits       = 2048\n",
+                "default_keyfile    = {test_cert_store_root}/{private_key_filename}\n",
+                "distinguished_name = ca_distinguished_name\n",
+                "x509_extensions    = ca_extensions\n",
+                "string_mask        = utf8only\n",
+                "\n",
+                "[ ca_distinguished_name ]\n",
+                "countryName         = Country Name (2 letter code)\n",
+                "countryName_default = {country_name}\n",
+                "\n",
+                "stateOrProvinceName         = State or Province Name (full name)\n",
+                "stateOrProvinceName_default = {state_or_province_name}\n",
+                "\n",
+                "localityName                = Locality Name (eg, city)\n",
+                "localityName_default        = {locality_name}\n",
+                "\n",
+                "organizationName            = Organization Name (eg, company)\n",
+                "organizationName_default    = {organization_name}\n",
+                "\n",
+                "organizationalUnitName         = Organizational Unit (eg, division)\n",
+                "organizationalUnitName_default = {organizational_unit_name}\n",
+                "\n",
+                "commonName         = Common Name (e.g. server FQDN or YOUR name)\n",
+                "commonName_default = {common_name}\n",
+                "\n",
+                "emailAddress         = Email Address\n",
+                "emailAddress_default = {email_address}\n",
+                "\n",
+                "[ ca_extensions ]\n",
+                "subjectKeyIdentifier   = hash\n",
+                "authorityKeyIdentifier = keyid:always, issuer\n",
+                "basicConstraints       = critical, CA:true\n",
+                "keyUsage               = keyCertSign, cRLSign\n",
+                "\n",
+                "[ signing_policy ]\n",
+                "countryName            = optional\n",
+                "stateOrProvinceName    = optional\n",
+                "localityName           = optional\n",
+                "organizationName       = optional\n",
+                "organizationalUnitName = optional\n",
+                "commonName             = supplied\n",
+                "emailAddress           = optional\n",
+                "\n",
+                "[ signing_req ]\n",
+                "subjectKeyIdentifier   = hash\n",
+                "authorityKeyIdentifier = keyid,issuer\n",
+                "basicConstraints       = CA:FALSE\n",
+                "keyUsage               = digitalSignature, keyEncipherment\n",
+                "\"\"\"\n",
+                "\n",
+                "save_file(ssl_configuration_file, certificate)\n"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Copy certificate configuration to `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "\n",
+                "cwd = os.getcwd()\n",
+                "os.chdir(temp_dir) # Use chdir to workaround kubectl bug on Windows, which incorrectly processes 'c:\\' on kubectl cp cmd line \n",
+                "\n",
+                "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"mkdir -p {test_cert_store_root}/{app_name}\"')\n",
+                "\n",
+                "run(f'kubectl cp {ssl_configuration_file} {controller}:{test_cert_store_root}/{app_name}/{ssl_configuration_file} -c controller -n {namespace}')\n",
+                "\n",
+                "os.chdir(cwd)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Obtain CA certificate and extract the private key from pfx if Big Data Cluster CA is used"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "if use_bdc_ca:\n",
+                "\n",
+                "  cmd = f\"cp /var/run/secrets/certificates/rootca/cluster-ca-certificate.crt {test_cert_store_root}/{certificate_filename}\"\n",
+                "\n",
+                "  run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')\n",
+                "\n",
+                "  cmd = f\"NO_PASSWORD= openssl pkcs12 -in /var/run/secrets/certificates/rootca/cluster-ca-certificate.p12 -out {test_cert_store_root}/{private_key_filename} -nocerts -nodes -password env:NO_PASSWORD\"\n",
+                "\n",
+                "  run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Set next serial number"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"test -f {test_cert_store_root}/index.txt || touch {test_cert_store_root}/index.txt\"')\n",
+                "run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"test -f {test_cert_store_root}/serial.txt || echo '00' > {test_cert_store_root}/serial.txt\" \"\"\")\n",
+                "\n",
+                "current_serial_number = run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"cat {test_cert_store_root}/serial.txt\" \"\"\", return_output=True)\n",
+                "\n",
+                "# The serial number is hex\n",
+                "new_serial_number = int(f\"0x{current_serial_number}\", 0) + 1\n",
+                "\n",
+                "run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"echo '{new_serial_number:02X}' > {test_cert_store_root}/serial.txt\" \"\"\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Create private key and certificate signing request\n",
+                "\n",
+                "Use openssl ca to create a private key and signing request. See:\n",
+                "\n",
+                "-   https://www.openssl.org/docs/man1.0.2/man1/ca.html"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "cmd = f\"openssl ca -notext -batch -config {test_cert_store_root}/{app_name}/ca.openssl.cnf -policy signing_policy -extensions signing_req -out {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -infiles {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-signingrequest.csr\"\n",
+                "\n",
+                "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Display certificate\n",
+                "\n",
+                "Use openssl x509 to display the certificate, so it can be visually\n",
+                "verified to be correct."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "cmd = f\"openssl x509 -in {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -text -noout\"\n",
+                "\n",
+                "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Clean up temporary directory for staging configuration files"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Delete the temporary directory used to hold configuration files\n",
+                "\n",
+                "import shutil\n",
+                "\n",
+                "shutil.rmtree(temp_dir)\n",
+                "\n",
+                "print(f'Temporary directory deleted: {temp_dir}')"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "Related\n",
+                "-------\n",
+                "\n",
+                "-   [CER040 - Install signed Management Proxy\n",
+                "    certificate](../cert-management/cer040-install-service-proxy-cert.ipynb)\n",
+                "\n",
+                "-   [CER022 - Create App Proxy\n",
+                "    certificate](../cert-management/cer022-create-app-proxy-cert.ipynb)\n",
+                "\n",
+                "-   [CER042 - Install signed App-Proxy\n",
+                "    certificate](../cert-management/cer042-install-app-proxy-cert.ipynb)"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": true
+        }
+    }
+}

+ 817 - 0
Big-Data-Clusters/CU8/Public/content/cert-management/cer033-sign-master-generated-certs.ipynb

@@ -0,0 +1,817 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "CER033 - Sign Master certificates with generated CA\n",
+                "===================================================\n",
+                "\n",
+                "This notebook signs the certificates created using:\n",
+                "\n",
+                "-   [CER023 - Create Master\n",
+                "    certificates](../cert-management/cer023-create-master-certs.ipynb)\n",
+                "\n",
+                "with the Big Data Cluster CA or the generated Root CA Certificate,\n",
+                "created using either:\n",
+                "\n",
+                "-   [CER001 - Generate a Root CA\n",
+                "    certificate](../cert-management/cer001-create-root-ca.ipynb)\n",
+                "-   [CER003 - Upload existing Root CA\n",
+                "    certificate](../cert-management/cer003-upload-existing-root-ca.ipynb)\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "import getpass\n",
+                "\n",
+                "app_name = \"master\"\n",
+                "scaledset_name = \"master\"\n",
+                "container_name = \"mssql-server\"\n",
+                "prefix_keyfile_name = \"sql\"\n",
+                "common_name = \"master-svc\"\n",
+                "\n",
+                "country_name = \"US\"\n",
+                "state_or_province_name = \"Illinois\"\n",
+                "locality_name = \"Chicago\"\n",
+                "organization_name = \"Contoso\"\n",
+                "organizational_unit_name = \"Finance\"\n",
+                "email_address = f\"{getpass.getuser().lower()}@contoso.com\"\n",
+                "\n",
+                "ssl_configuration_file = \"ca.openssl.cnf\"\n",
+                "\n",
+                "days = \"398\" # the number of days to certify the certificate for\n",
+                "\n",
+                "certificate_filename = \"cacert.pem\"\n",
+                "private_key_filename = \"cakey.pem\"\n",
+                "use_bdc_ca = False\n",
+                "\n",
+                "test_cert_store_root = \"/var/opt/secrets/test-certificates\""
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"cer033-sign-master-generated-certs.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n",
+                "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n",
+                "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the Kubernetes namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster use the kubectl command line\n",
+                "interface .\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n",
+                "    except:\n",
+                "        from IPython.display import Markdown\n",
+                "        print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'.  SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Create a temporary directory to stage files"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Create a temporary directory to hold configuration files\n",
+                "\n",
+                "import tempfile\n",
+                "\n",
+                "temp_dir = tempfile.mkdtemp()\n",
+                "\n",
+                "print(f\"Temporary directory created: {temp_dir}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Helper function to save configuration files to disk"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define helper function 'save_file' to save configuration files to the temporary directory created above\n",
+                "import os\n",
+                "import io\n",
+                "\n",
+                "def save_file(filename, contents):\n",
+                "    with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n",
+                "      text_file.write(contents)\n",
+                "\n",
+                "      print(\"File saved: \" + os.path.join(temp_dir, filename))\n",
+                "\n",
+                "print(\"Function `save_file` defined successfully.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get name of the \u2018Running\u2019 `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place the name  of the 'Running' controller pod in variable `controller`\n",
+                "\n",
+                "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n",
+                "\n",
+                "print(f\"Controller pod name: {controller}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Create Signing Request configuration file"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "certificate = f\"\"\"\n",
+                "[ ca ]\n",
+                "default_ca    = CA_default      # The default ca section\n",
+                "\n",
+                "[ CA_default ]\n",
+                "default_days     = 1000         # How long to certify for\n",
+                "default_crl_days = 30           # How long before next CRL\n",
+                "default_md       = sha256       # Use public key default MD\n",
+                "preserve         = no           # Keep passed DN ordering\n",
+                "\n",
+                "x509_extensions = ca_extensions # The extensions to add to the cert\n",
+                "\n",
+                "email_in_dn     = no            # Don't concat the email in the DN\n",
+                "copy_extensions = copy          # Required to copy SANs from CSR to cert\n",
+                "\n",
+                "base_dir      = {test_cert_store_root}\n",
+                "certificate   = $base_dir/{certificate_filename}   # The CA certifcate\n",
+                "private_key   = $base_dir/{private_key_filename}    # The CA private key\n",
+                "new_certs_dir = $base_dir              # Location for new certs after signing\n",
+                "database      = $base_dir/index.txt    # Database index file\n",
+                "serial        = $base_dir/serial.txt   # The current serial number\n",
+                "\n",
+                "unique_subject = no  # Set to 'no' to allow creation of\n",
+                "                     # several certificates with same subject.\n",
+                "\n",
+                "[ req ]\n",
+                "default_bits       = 2048\n",
+                "default_keyfile    = {test_cert_store_root}/{private_key_filename}\n",
+                "distinguished_name = ca_distinguished_name\n",
+                "x509_extensions    = ca_extensions\n",
+                "string_mask        = utf8only\n",
+                "\n",
+                "[ ca_distinguished_name ]\n",
+                "countryName         = Country Name (2 letter code)\n",
+                "countryName_default = {country_name}\n",
+                "\n",
+                "stateOrProvinceName         = State or Province Name (full name)\n",
+                "stateOrProvinceName_default = {state_or_province_name}\n",
+                "\n",
+                "localityName                = Locality Name (eg, city)\n",
+                "localityName_default        = {locality_name}\n",
+                "\n",
+                "organizationName            = Organization Name (eg, company)\n",
+                "organizationName_default    = {organization_name}\n",
+                "\n",
+                "organizationalUnitName         = Organizational Unit (eg, division)\n",
+                "organizationalUnitName_default = {organizational_unit_name}\n",
+                "\n",
+                "commonName         = Common Name (e.g. server FQDN or YOUR name)\n",
+                "commonName_default = {common_name}\n",
+                "\n",
+                "emailAddress         = Email Address\n",
+                "emailAddress_default = {email_address}\n",
+                "\n",
+                "[ ca_extensions ]\n",
+                "subjectKeyIdentifier   = hash\n",
+                "authorityKeyIdentifier = keyid:always, issuer\n",
+                "basicConstraints       = critical, CA:true\n",
+                "keyUsage               = keyCertSign, cRLSign\n",
+                "\n",
+                "[ signing_policy ]\n",
+                "countryName            = optional\n",
+                "stateOrProvinceName    = optional\n",
+                "localityName           = optional\n",
+                "organizationName       = optional\n",
+                "organizationalUnitName = optional\n",
+                "commonName             = supplied\n",
+                "emailAddress           = optional\n",
+                "\n",
+                "[ signing_req ]\n",
+                "subjectKeyIdentifier   = hash\n",
+                "authorityKeyIdentifier = keyid,issuer\n",
+                "basicConstraints       = CA:FALSE\n",
+                "keyUsage               = digitalSignature, keyEncipherment\n",
+                "\"\"\"\n",
+                "\n",
+                "save_file(ssl_configuration_file, certificate)\n"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Copy certificate configuration to `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "\n",
+                "cwd = os.getcwd()\n",
+                "os.chdir(temp_dir) # Use chdir to workaround kubectl bug on Windows, which incorrectly processes 'c:\\' on kubectl cp cmd line \n",
+                "\n",
+                "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"mkdir -p {test_cert_store_root}/{app_name}\"')\n",
+                "\n",
+                "run(f'kubectl cp {ssl_configuration_file} {controller}:{test_cert_store_root}/{app_name}/{ssl_configuration_file} -c controller -n {namespace}')\n",
+                "\n",
+                "os.chdir(cwd)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Obtain CA certificate and extract the private key from pfx if Big Data Cluster CA is used"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "if use_bdc_ca:\n",
+                "\n",
+                "  cmd = f\"cp /var/run/secrets/certificates/rootca/cluster-ca-certificate.crt {test_cert_store_root}/{certificate_filename}\"\n",
+                "\n",
+                "  run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')\n",
+                "\n",
+                "  cmd = f\"NO_PASSWORD= openssl pkcs12 -in /var/run/secrets/certificates/rootca/cluster-ca-certificate.p12 -out {test_cert_store_root}/{private_key_filename} -nocerts -nodes -password env:NO_PASSWORD\"\n",
+                "\n",
+                "  run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the name of the `master` `pods`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place the name of the master pods in variable `pods`\n",
+                "\n",
+                "podNames = run(f'kubectl get pod --selector=app=master -n {namespace} -o jsonpath={{.items[*].metadata.name}}', return_output=True)\n",
+                "pods = podNames.split(\" \")\n",
+                "\n",
+                "print(f\"Master pod names: {pods}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Set next serial number and create the certificates"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "pods.sort()\n",
+                "  \n",
+                "for pod_name in pods:\n",
+                "    \n",
+                "    run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"test -f {test_cert_store_root}/index.txt || touch {test_cert_store_root}/index.txt\"')\n",
+                "    run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"test -f {test_cert_store_root}/serial.txt || echo '00' > {test_cert_store_root}/serial.txt\" \"\"\")\n",
+                "\n",
+                "    current_serial_number = run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"cat {test_cert_store_root}/serial.txt\" \"\"\", return_output=True)\n",
+                "\n",
+                "    # The serial number is hex\n",
+                "\n",
+                "    new_serial_number = int(f\"0x{current_serial_number}\", 0) + 1\n",
+                "\n",
+                "    run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"echo '{new_serial_number:02X}' > {test_cert_store_root}/serial.txt\" \"\"\")\n",
+                "\n",
+                "    # Create certificate \n",
+                "\n",
+                "    cmd = f\"openssl ca -notext -batch -config {test_cert_store_root}/{app_name}/ca.openssl.cnf -policy signing_policy -extensions signing_req -out {test_cert_store_root}/{app_name}/{pod_name}-certificate.pem -infiles {test_cert_store_root}/{app_name}/{pod_name}-signingrequest.csr\"\n",
+                "\n",
+                "    run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')\n",
+                "\n",
+                "    ### Display certificate. Use openssl x509 to display the certificate, so it can be visually verified to be correct. \n",
+                "\n",
+                "    cmd = f\"openssl x509 -in {test_cert_store_root}/{app_name}/{pod_name}-certificate.pem -text -noout\"\n",
+                "\n",
+                "    run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Clean up temporary directory for staging configuration files"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Delete the temporary directory used to hold configuration files\n",
+                "\n",
+                "import shutil\n",
+                "\n",
+                "shutil.rmtree(temp_dir)\n",
+                "\n",
+                "print(f'Temporary directory deleted: {temp_dir}')"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "Related\n",
+                "-------\n",
+                "\n",
+                "-   [CER034 - Sign Controller certificate with cluster Root\n",
+                "    CA](../cert-management/cer034-sign-controller-generated-cert.ipynb)\n",
+                "\n",
+                "-   [CER023 - Create Master\n",
+                "    certificates](../cert-management/cer023-create-master-certs.ipynb)\n",
+                "\n",
+                "-   [CER043 - Install signed Master\n",
+                "    certificates](../cert-management/cer043-install-master-certs.ipynb)"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": true
+        }
+    }
+}

+ 859 - 0
Big-Data-Clusters/CU8/Public/content/cert-management/cer034-sign-controller-generated-cert.ipynb

@@ -0,0 +1,859 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "CER034 - Sign Controller certificate with cluster Root CA\n",
+                "=========================================================\n",
+                "\n",
+                "This notebook signs the certificate created using:\n",
+                "\n",
+                "-   [CER024 - Create Controller\n",
+                "    certificate](../cert-management/cer024-create-controller-cert.ipynb)\n",
+                "\n",
+                "with the Big Data Cluster CA or the generated Root CA Certificate,\n",
+                "created using either:\n",
+                "\n",
+                "-   [CER001 - Generate a Root CA\n",
+                "    certificate](../cert-management/cer001-create-root-ca.ipynb)\n",
+                "-   [CER003 - Upload existing Root CA\n",
+                "    certificate](../cert-management/cer003-upload-existing-root-ca.ipynb)\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "import getpass\n",
+                "\n",
+                "app_name = \"controller\"\n",
+                "scaledset_name = \"control\"\n",
+                "container_name = \"controller\"\n",
+                "prefix_keyfile_name = \"controller\"\n",
+                "common_name = \"controller-svc\"\n",
+                "\n",
+                "country_name = \"US\"\n",
+                "state_or_province_name = \"Illinois\"\n",
+                "locality_name = \"Chicago\"\n",
+                "organization_name = \"Contoso\"\n",
+                "organizational_unit_name = \"Finance\"\n",
+                "email_address = f\"{getpass.getuser().lower()}@contoso.com\"\n",
+                "\n",
+                "ssl_configuration_file = \"ca.openssl.cnf\"\n",
+                "\n",
+                "days = \"398\" # the number of days to certify the certificate for\n",
+                "\n",
+                "certificate_filename = \"cacert.pem\"\n",
+                "private_key_filename = \"cakey.pem\"\n",
+                "use_bdc_ca = False\n",
+                "\n",
+                "test_cert_store_root = \"/var/opt/secrets/test-certificates\""
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"cer034-sign-controller-generated-cert.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n",
+                "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n",
+                "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the Kubernetes namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster use the kubectl command line\n",
+                "interface .\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n",
+                "    except:\n",
+                "        from IPython.display import Markdown\n",
+                "        print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'.  SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Create a temporary directory to stage files"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Create a temporary directory to hold configuration files\n",
+                "\n",
+                "import tempfile\n",
+                "\n",
+                "temp_dir = tempfile.mkdtemp()\n",
+                "\n",
+                "print(f\"Temporary directory created: {temp_dir}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Helper function to save configuration files to disk"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define helper function 'save_file' to save configuration files to the temporary directory created above\n",
+                "import os\n",
+                "import io\n",
+                "\n",
+                "def save_file(filename, contents):\n",
+                "    with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n",
+                "      text_file.write(contents)\n",
+                "\n",
+                "      print(\"File saved: \" + os.path.join(temp_dir, filename))\n",
+                "\n",
+                "print(\"Function `save_file` defined successfully.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get name of the \u2018Running\u2019 `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place the name  of the 'Running' controller pod in variable `controller`\n",
+                "\n",
+                "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n",
+                "\n",
+                "print(f\"Controller pod name: {controller}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Create Signing Request configuration file"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "certificate = f\"\"\"\n",
+                "[ ca ]\n",
+                "default_ca    = CA_default      # The default ca section\n",
+                "\n",
+                "[ CA_default ]\n",
+                "default_days     = 1000         # How long to certify for\n",
+                "default_crl_days = 30           # How long before next CRL\n",
+                "default_md       = sha256       # Use public key default MD\n",
+                "preserve         = no           # Keep passed DN ordering\n",
+                "\n",
+                "x509_extensions = ca_extensions # The extensions to add to the cert\n",
+                "\n",
+                "email_in_dn     = no            # Don't concat the email in the DN\n",
+                "copy_extensions = copy          # Required to copy SANs from CSR to cert\n",
+                "\n",
+                "base_dir      = {test_cert_store_root}\n",
+                "certificate   = $base_dir/{certificate_filename}   # The CA certifcate\n",
+                "private_key   = $base_dir/{private_key_filename}    # The CA private key\n",
+                "new_certs_dir = $base_dir              # Location for new certs after signing\n",
+                "database      = $base_dir/index.txt    # Database index file\n",
+                "serial        = $base_dir/serial.txt   # The current serial number\n",
+                "\n",
+                "unique_subject = no  # Set to 'no' to allow creation of\n",
+                "                     # several certificates with same subject.\n",
+                "\n",
+                "[ req ]\n",
+                "default_bits       = 2048\n",
+                "default_keyfile    = {test_cert_store_root}/{private_key_filename}\n",
+                "distinguished_name = ca_distinguished_name\n",
+                "x509_extensions    = ca_extensions\n",
+                "string_mask        = utf8only\n",
+                "\n",
+                "[ ca_distinguished_name ]\n",
+                "countryName         = Country Name (2 letter code)\n",
+                "countryName_default = {country_name}\n",
+                "\n",
+                "stateOrProvinceName         = State or Province Name (full name)\n",
+                "stateOrProvinceName_default = {state_or_province_name}\n",
+                "\n",
+                "localityName                = Locality Name (eg, city)\n",
+                "localityName_default        = {locality_name}\n",
+                "\n",
+                "organizationName            = Organization Name (eg, company)\n",
+                "organizationName_default    = {organization_name}\n",
+                "\n",
+                "organizationalUnitName         = Organizational Unit (eg, division)\n",
+                "organizationalUnitName_default = {organizational_unit_name}\n",
+                "\n",
+                "commonName         = Common Name (e.g. server FQDN or YOUR name)\n",
+                "commonName_default = {common_name}\n",
+                "\n",
+                "emailAddress         = Email Address\n",
+                "emailAddress_default = {email_address}\n",
+                "\n",
+                "[ ca_extensions ]\n",
+                "subjectKeyIdentifier   = hash\n",
+                "authorityKeyIdentifier = keyid:always, issuer\n",
+                "basicConstraints       = critical, CA:true\n",
+                "keyUsage               = keyCertSign, cRLSign\n",
+                "\n",
+                "[ signing_policy ]\n",
+                "countryName            = optional\n",
+                "stateOrProvinceName    = optional\n",
+                "localityName           = optional\n",
+                "organizationName       = optional\n",
+                "organizationalUnitName = optional\n",
+                "commonName             = supplied\n",
+                "emailAddress           = optional\n",
+                "\n",
+                "[ signing_req ]\n",
+                "subjectKeyIdentifier   = hash\n",
+                "authorityKeyIdentifier = keyid,issuer\n",
+                "basicConstraints       = CA:FALSE\n",
+                "keyUsage               = digitalSignature, keyEncipherment\n",
+                "\"\"\"\n",
+                "\n",
+                "save_file(ssl_configuration_file, certificate)\n"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Copy certificate configuration to `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "\n",
+                "cwd = os.getcwd()\n",
+                "os.chdir(temp_dir) # Use chdir to workaround kubectl bug on Windows, which incorrectly processes 'c:\\' on kubectl cp cmd line \n",
+                "\n",
+                "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"mkdir -p {test_cert_store_root}/{app_name}\"')\n",
+                "\n",
+                "run(f'kubectl cp {ssl_configuration_file} {controller}:{test_cert_store_root}/{app_name}/{ssl_configuration_file} -c controller -n {namespace}')\n",
+                "\n",
+                "os.chdir(cwd)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Obtain CA certificate and extract the private key from pfx if Big Data Cluster CA is used"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "if use_bdc_ca:\n",
+                "\n",
+                "  cmd = f\"cp /var/run/secrets/certificates/rootca/cluster-ca-certificate.crt {test_cert_store_root}/{certificate_filename}\"\n",
+                "\n",
+                "  run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')\n",
+                "\n",
+                "  cmd = f\"NO_PASSWORD= openssl pkcs12 -in /var/run/secrets/certificates/rootca/cluster-ca-certificate.p12 -out {test_cert_store_root}/{private_key_filename} -nocerts -nodes -password env:NO_PASSWORD\"\n",
+                "\n",
+                "  run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Set next serial number"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"test -f {test_cert_store_root}/index.txt || touch {test_cert_store_root}/index.txt\"')\n",
+                "run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"test -f {test_cert_store_root}/serial.txt || echo '00' > {test_cert_store_root}/serial.txt\" \"\"\")\n",
+                "\n",
+                "current_serial_number = run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"cat {test_cert_store_root}/serial.txt\" \"\"\", return_output=True)\n",
+                "\n",
+                "# The serial number is hex\n",
+                "new_serial_number = int(f\"0x{current_serial_number}\", 0) + 1\n",
+                "\n",
+                "run(f\"\"\"kubectl exec {controller} -n {namespace} -c controller -- bash -c \"echo '{new_serial_number:02X}' > {test_cert_store_root}/serial.txt\" \"\"\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Create private key and certificate signing request"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "cmd = f\"openssl ca -batch -notext -config {test_cert_store_root}/{app_name}/ca.openssl.cnf -policy signing_policy -extensions signing_req -out {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -infiles {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-signingrequest.csr\"\n",
+                "\n",
+                "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Display certificate\n",
+                "\n",
+                "Use openssl x509 to display the certificate, so it can be visually\n",
+                "verified to be correct."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "cmd = f\"openssl x509 -in {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -text -noout\"\n",
+                "\n",
+                "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Generate the `controller` certificate in PKCS12 format\n",
+                "\n",
+                "For the controller certificate, generate the certificate in PKCS12\n",
+                "format.\n",
+                "\n",
+                "In order to obtain the certificate in PKCS12 format from the generated\n",
+                "private key and certificate file above, run the following:"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "cmd = f'NO_PASSWORD= openssl pkcs12 -export -out {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.p12 -inkey {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-privatekey.pem -in {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -password env:NO_PASSWORD'\n",
+                "\n",
+                "run(f'kubectl exec {controller} -n {namespace} -c {app_name} -- bash -c \"{cmd}\"')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Clean up the extracted controller private key\n",
+                "\n",
+                "Remove the private key that was extract from the\n",
+                "cluster-ca-certificate.p12 above."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "cmd = f\"rm {test_cert_store_root}/{private_key_filename}\"\n",
+                "\n",
+                "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Clean up temporary directory for staging configuration files"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Delete the temporary directory used to hold configuration files\n",
+                "\n",
+                "import shutil\n",
+                "\n",
+                "shutil.rmtree(temp_dir)\n",
+                "\n",
+                "print(f'Temporary directory deleted: {temp_dir}')"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "Related\n",
+                "-------\n",
+                "\n",
+                "-   [CER040 - Install signed Management Proxy\n",
+                "    certificate](../cert-management/cer040-install-service-proxy-cert.ipynb)\n",
+                "\n",
+                "-   [CER024 - Create Controller\n",
+                "    certificate](../cert-management/cer024-create-controller-cert.ipynb)\n",
+                "\n",
+                "-   [CER044 - Install signed Controller\n",
+                "    certificate](../cert-management/cer044-install-controller-cert.ipynb)"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": true
+        }
+    }
+}

+ 926 - 0
Big-Data-Clusters/CU8/Public/content/cert-management/cer040-install-service-proxy-cert.ipynb

@@ -0,0 +1,926 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "CER040 - Install signed Management Proxy certificate\n",
+                "====================================================\n",
+                "\n",
+                "This notebook installs into the Big Data Cluster the certificate signed\n",
+                "using:\n",
+                "\n",
+                "-   [CER030 - Sign Management Proxy certificate with generated\n",
+                "    CA](../cert-management/cer030-sign-service-proxy-generated-cert.ipynb)\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "app_name = \"mgmtproxy\"\n",
+                "scaledset_name = \"mgmtproxy\"\n",
+                "container_name = \"service-proxy\"\n",
+                "prefix_keyfile_name = \"service-proxy\"\n",
+                "common_name = \"mgmtproxy-svc\"\n",
+                "user = \"nginx\"\n",
+                "group = \"nginx\"\n",
+                "mode = \"550\"\n",
+                "\n",
+                "test_cert_store_root = \"/var/opt/secrets/test-certificates\""
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"cer040-install-service-proxy-cert.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n",
+                "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n",
+                "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the Kubernetes namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster use the kubectl command line\n",
+                "interface .\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n",
+                "    except:\n",
+                "        from IPython.display import Markdown\n",
+                "        print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'.  SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Create a temporary directory to stage files"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Create a temporary directory to hold configuration files\n",
+                "\n",
+                "import tempfile\n",
+                "\n",
+                "temp_dir = tempfile.mkdtemp()\n",
+                "\n",
+                "print(f\"Temporary directory created: {temp_dir}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Helper function to save configuration files to disk"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define helper function 'save_file' to save configuration files to the temporary directory created above\n",
+                "import os\n",
+                "import io\n",
+                "\n",
+                "def save_file(filename, contents):\n",
+                "    with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n",
+                "      text_file.write(contents)\n",
+                "\n",
+                "      print(\"File saved: \" + os.path.join(temp_dir, filename))\n",
+                "\n",
+                "print(\"Function `save_file` defined successfully.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get name of the \u2018Running\u2019 `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place the name  of the 'Running' controller pod in variable `controller`\n",
+                "\n",
+                "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n",
+                "\n",
+                "print(f\"Controller pod name: {controller}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the name of the `management proxy` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place the name  of the mgmtproxy pod in variable `pod`\n",
+                "\n",
+                "pod = run(f'kubectl get pod --selector=app=mgmtproxy -n {namespace} -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n",
+                "\n",
+                "print(f\"Management proxy pod name: {pod}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Validate certificate common name and alt names"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import json\n",
+                "from urllib.parse import urlparse\n",
+                "\n",
+                "kubernetes_default_record_name = 'kubernetes.default'\n",
+                "kubernetes_default_svc_prefix = 'kubernetes.default.svc'\n",
+                "default_dns_suffix = 'svc.cluster.local'\n",
+                "dns_suffix = ''\n",
+                "\n",
+                "nslookup_output=run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"nslookup {kubernetes_default_record_name} > /tmp/nslookup.out; cat /tmp/nslookup.out; rm /tmp/nslookup.out\"  ', return_output=True)\n",
+                "\n",
+                "name = re.findall('Name:\\s+(.[^,|^\\s|^\\n]+)', nslookup_output)\n",
+                "\n",
+                "if not name or kubernetes_default_svc_prefix not in name[0]:\n",
+                "    dns_suffix = default_dns_suffix\n",
+                "else:\n",
+                "    dns_suffix = 'svc' + name[0].replace(kubernetes_default_svc_prefix, '')\n",
+                "\n",
+                "alt_names = \"\"\n",
+                "bdc_fqdn = \"\"\n",
+                "\n",
+                "alt_names += f\"DNS.1 = {common_name}\\n\"\n",
+                "alt_names += f\"DNS.2 = {common_name}.{namespace}.{dns_suffix} \\n\"\n",
+                "\n",
+                "hdfs_vault_svc = \"hdfsvault-svc\"\n",
+                "bdc_config = run(\"azdata bdc config show\", return_output=True)\n",
+                "bdc_config = json.loads(bdc_config)\n",
+                "\n",
+                "dns_counter = 3 # DNS.1 and DNS.2 are already in the certificate template\n",
+                "\n",
+                "# Stateful set related DNS names\n",
+                "#\n",
+                "if app_name == \"gateway\" or app_name == \"master\":\n",
+                "  alt_names += f'DNS.{str(dns_counter)} = {pod}.{common_name}\\n'\n",
+                "  dns_counter = dns_counter + 1\n",
+                "  alt_names += f'DNS.{str(dns_counter)} = {pod}.{common_name}.{namespace}.{dns_suffix}\\n'\n",
+                "  dns_counter = dns_counter + 1\n",
+                "\n",
+                "# AD related DNS names\n",
+                "#\n",
+                "if \"security\" in bdc_config[\"spec\"] and \"activeDirectory\" in bdc_config[\"spec\"][\"security\"]:\n",
+                "    domain_dns_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"domainDnsName\"]\n",
+                "    subdomain_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"subdomain\"]\n",
+                "\n",
+                "    if subdomain_name:\n",
+                "        bdc_fqdn = f\"{subdomain_name}.{domain_dns_name}\"\n",
+                "    else:\n",
+                "        bdc_fqdn = f\"{namespace}.{domain_dns_name}\"\n",
+                "\n",
+                "    alt_names += f\"DNS.{str(dns_counter)} = {common_name}.{bdc_fqdn}\\n\"\n",
+                "    dns_counter = dns_counter + 1\n",
+                "\n",
+                "    if app_name == \"gateway\" or app_name == \"master\":\n",
+                "      alt_names += f'DNS.{str(dns_counter)} = {pod}.{bdc_fqdn}\\n'\n",
+                "      dns_counter = dns_counter + 1\n",
+                "\n",
+                "    # Endpoint DNS names for bdc certificates\n",
+                "    #\n",
+                "    if app_name in bdc_config[\"spec\"][\"resources\"]:\n",
+                "        app_name_endpoints = bdc_config[\"spec\"][\"resources\"][app_name][\"spec\"][\"endpoints\"]\n",
+                "        for endpoint in app_name_endpoints:\n",
+                "            if \"dnsName\" in endpoint:\n",
+                "                alt_names += f'DNS.{str(dns_counter)} = {endpoint[\"dnsName\"]}\\n'\n",
+                "                dns_counter = dns_counter + 1\n",
+                "        \n",
+                "    # Endpoint DNS names for control plane certificates\n",
+                "    #\n",
+                "    if app_name == \"controller\" or app_name == \"mgmtproxy\":\n",
+                "        bdc_endpoint_list = run(\"azdata bdc endpoint list\", return_output=True)\n",
+                "        bdc_endpoint_list = json.loads(bdc_endpoint_list)\n",
+                "\n",
+                "        # Parse the DNS host name from:\n",
+                "        #\n",
+                "        #    \"endpoint\": \"https://monitor.aris.local:30777\"\n",
+                "        # \n",
+                "        for endpoint in bdc_endpoint_list:\n",
+                "            if endpoint[\"name\"] == app_name:\n",
+                "                url = urlparse(endpoint[\"endpoint\"])\n",
+                "                alt_names += f\"DNS.{str(dns_counter)} = {url.hostname}\\n\"\n",
+                "                dns_counter = dns_counter + 1\n",
+                "\n",
+                "# Special case for the controller certificate\n",
+                "#\n",
+                "if app_name == \"controller\":\n",
+                "    alt_names += f\"DNS.{str(dns_counter)} = localhost\\n\"\n",
+                "    dns_counter = dns_counter + 1\n",
+                "\n",
+                "    # Add hdfsvault-svc host for key management calls.\n",
+                "    #\n",
+                "    alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}\\n\"\n",
+                "    dns_counter = dns_counter + 1\n",
+                "\n",
+                "    # Add hdfsvault-svc FQDN for key management calls.\n",
+                "    #\n",
+                "    if bdc_fqdn:\n",
+                "        alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}.{bdc_fqdn}\\n\"\n",
+                "        dns_counter = dns_counter + 1\n",
+                "\n",
+                "required_dns_names = re.findall('DNS\\.[0-9] = ([^,|^\\s|^\\n]+)', alt_names)\n",
+                "\n",
+                "# Get certificate common name and DNS names\n",
+                "# \n",
+                "cert = run(f'kubectl exec {controller} -c controller -n {namespace} -- openssl x509 -in {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -text -noout', return_output=True)\n",
+                "subject = re.findall('Subject:(.+)', cert)[0]\n",
+                "certficate_common_name = re.findall('CN=(.[^,|^\\s|^\\n]+)', subject)[0]\n",
+                "certficate_dns_names = re.findall('DNS:(.[^,|^\\s|^\\n]+)', cert)\n",
+                "\n",
+                "# Validate the common name\n",
+                "#\n",
+                "if (common_name != certficate_common_name):\n",
+                "    run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"rm -rf {test_cert_store_root}/{app_name}\"')\n",
+                "    raise SystemExit(f'Certficate common name does not match the expected one: {common_name}')\n",
+                "\n",
+                "# Validate the DNS names\n",
+                "#\n",
+                "if not all(dns_name in certficate_dns_names for dns_name in required_dns_names):\n",
+                "    run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"rm -rf {test_cert_store_root}/{app_name}\"')\n",
+                "    raise SystemExit(f'Certficate does not have all required DNS names: {required_dns_names}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Copy certifcate files from `controller` to local machine"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "\n",
+                "cwd = os.getcwd()\n",
+                "os.chdir(temp_dir) # Use chdir to workaround kubectl bug on Windows, which incorrectly processes 'c:\\' on kubectl cp cmd line \n",
+                "\n",
+                "run(f'kubectl cp {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem {prefix_keyfile_name}-certificate.pem -c controller -n {namespace}')\n",
+                "run(f'kubectl cp {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-privatekey.pem {prefix_keyfile_name}-privatekey.pem -c controller -n {namespace}')\n",
+                "\n",
+                "os.chdir(cwd)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Copy certifcate files from local machine to `controldb`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "\n",
+                "cwd = os.getcwd()\n",
+                "os.chdir(temp_dir) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n",
+                "\n",
+                "run(f'kubectl cp {prefix_keyfile_name}-certificate.pem controldb-0:/var/opt/mssql/{prefix_keyfile_name}-certificate.pem -c mssql-server -n {namespace}')\n",
+                "run(f'kubectl cp {prefix_keyfile_name}-privatekey.pem controldb-0:/var/opt/mssql/{prefix_keyfile_name}-privatekey.pem -c mssql-server -n {namespace}')\n",
+                "\n",
+                "os.chdir(cwd)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the `controller-db-rw-secret` secret\n",
+                "\n",
+                "Get the controller SQL symmetric key password for decryption."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import base64\n",
+                "\n",
+                "controller_db_rw_secret = run(f'kubectl get secret/controller-db-rw-secret -n {namespace} -o jsonpath={{.data.encryptionPassword}}', return_output=True)\n",
+                "controller_db_rw_secret = base64.b64decode(controller_db_rw_secret).decode('utf-8')\n",
+                "\n",
+                "print(\"controller_db_rw_secret retrieved\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Update the files table with the certificates through opened SQL connection"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "\n",
+                "sql = f\"\"\"\n",
+                "OPEN SYMMETRIC KEY ControllerDbSymmetricKey DECRYPTION BY PASSWORD = '{controller_db_rw_secret}'\n",
+                "\n",
+                "DECLARE @FileData VARBINARY(MAX), @Key uniqueidentifier;\n",
+                "SELECT @Key = KEY_GUID('ControllerDbSymmetricKey');\n",
+                " \n",
+                "SELECT TOP 1 @FileData = doc.BulkColumn FROM OPENROWSET(BULK N'/var/opt/mssql/{prefix_keyfile_name}-certificate.pem', SINGLE_BLOB) AS doc;\n",
+                "EXEC [dbo].[sp_set_file_data_encrypted] @FilePath = '/config/scaledsets/{scaledset_name}/containers/{container_name}/files/{prefix_keyfile_name}-certificate.pem',\n",
+                " @Data = @FileData,\n",
+                " @KeyGuid = @Key,\n",
+                " @Version = '0',\n",
+                " @User = '{user}',\n",
+                " @Group = '{group}',\n",
+                " @Mode = '{mode}';\n",
+                "\n",
+                "SELECT TOP 1 @FileData = doc.BulkColumn FROM OPENROWSET(BULK N'/var/opt/mssql/{prefix_keyfile_name}-privatekey.pem', SINGLE_BLOB) AS doc;\n",
+                "EXEC [dbo].[sp_set_file_data_encrypted] @FilePath = '/config/scaledsets/{scaledset_name}/containers/{container_name}/files/{prefix_keyfile_name}-privatekey.pem',\n",
+                " @Data = @FileData,\n",
+                " @KeyGuid = @Key,\n",
+                " @Version = '0',\n",
+                " @User = '{user}',\n",
+                " @Group = '{group}',\n",
+                " @Mode = '{mode}';\n",
+                "\"\"\"\n",
+                "\n",
+                "save_file(\"insert_certificates.sql\", sql)\n",
+                "\n",
+                "cwd = os.getcwd()\n",
+                "os.chdir(temp_dir) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n",
+                "\n",
+                "run(f'kubectl cp insert_certificates.sql controldb-0:/var/opt/mssql/insert_certificates.sql -c mssql-server -n {namespace}')\n",
+                "\n",
+                "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"SQLCMDPASSWORD=`cat /var/run/secrets/credentials/mssql-sa-password/password` /opt/mssql-tools/bin/sqlcmd -b -U sa -d controller -i /var/opt/mssql/insert_certificates.sql\" \"\"\")\n",
+                "\n",
+                "# Clean up\n",
+                "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/insert_certificates.sql\" \"\"\")\n",
+                "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/{prefix_keyfile_name}-certificate.pem\" \"\"\")\n",
+                "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/{prefix_keyfile_name}-privatekey.pem\" \"\"\")\n",
+                "\n",
+                "os.chdir(cwd)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Clear out the controller\\_db\\_rw\\_secret variable"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "controller_db_rw_secret= \"\""
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Clean up certificate staging area\n",
+                "\n",
+                "Remove the certificate files generated on disk (they have now been\n",
+                "placed in the controller database)."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "cmd = f\"rm -r {test_cert_store_root}/{app_name}\"\n",
+                "\n",
+                "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Restart Pod"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(f'kubectl delete pod {pod} -n {namespace}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Clean up temporary directory for staging configuration files"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Delete the temporary directory used to hold configuration files\n",
+                "\n",
+                "import shutil\n",
+                "\n",
+                "shutil.rmtree(temp_dir)\n",
+                "\n",
+                "print(f'Temporary directory deleted: {temp_dir}')"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "Related\n",
+                "-------\n",
+                "\n",
+                "-   [CER041 - Install signed Knox\n",
+                "    certificate](../cert-management/cer041-install-knox-cert.ipynb)\n",
+                "\n",
+                "-   [CER030 - Sign Management Proxy certificate with generated\n",
+                "    CA](../cert-management/cer030-sign-service-proxy-generated-cert.ipynb)\n",
+                "\n",
+                "-   [CER020 - Create Management Proxy\n",
+                "    certificate](../cert-management/cer020-create-management-service-proxy-cert.ipynb)"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": true
+        }
+    }
+}

+ 918 - 0
Big-Data-Clusters/CU8/Public/content/cert-management/cer041-install-knox-cert.ipynb

@@ -0,0 +1,918 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "CER041 - Install signed Knox certificate\n",
+                "========================================\n",
+                "\n",
+                "This notebook installs into the Big Data Cluster the certificate signed\n",
+                "using:\n",
+                "\n",
+                "-   [CER031 - Sign Knox certificate with generated\n",
+                "    CA](../cert-management/cer031-sign-knox-generated-cert.ipynb)\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "app_name = \"gateway\"\n",
+                "scaledset_name = \"gateway/pods/gateway-0\"\n",
+                "container_name = \"knox\"\n",
+                "prefix_keyfile_name = \"knox\"\n",
+                "common_name = \"gateway-svc\"\n",
+                "user = \"knox\"\n",
+                "group = \"knox\"\n",
+                "mode = \"550\"\n",
+                "\n",
+                "test_cert_store_root = \"/var/opt/secrets/test-certificates\""
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"cer041-install-knox-cert.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n",
+                "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n",
+                "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the Kubernetes namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster use the kubectl command line\n",
+                "interface .\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n",
+                "    except:\n",
+                "        from IPython.display import Markdown\n",
+                "        print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'.  SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Create a temporary directory to stage files"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Create a temporary directory to hold configuration files\n",
+                "\n",
+                "import tempfile\n",
+                "\n",
+                "temp_dir = tempfile.mkdtemp()\n",
+                "\n",
+                "print(f\"Temporary directory created: {temp_dir}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Helper function to save configuration files to disk"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define helper function 'save_file' to save configuration files to the temporary directory created above\n",
+                "import os\n",
+                "import io\n",
+                "\n",
+                "def save_file(filename, contents):\n",
+                "    with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n",
+                "      text_file.write(contents)\n",
+                "\n",
+                "      print(\"File saved: \" + os.path.join(temp_dir, filename))\n",
+                "\n",
+                "print(\"Function `save_file` defined successfully.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get name of the \u2018Running\u2019 `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place the name  of the 'Running' controller pod in variable `controller`\n",
+                "\n",
+                "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n",
+                "\n",
+                "print(f\"Controller pod name: {controller}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Pod name for gateway"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "pod = 'gateway-0'"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Validate certificate common name and alt names"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import json\n",
+                "from urllib.parse import urlparse\n",
+                "\n",
+                "kubernetes_default_record_name = 'kubernetes.default'\n",
+                "kubernetes_default_svc_prefix = 'kubernetes.default.svc'\n",
+                "default_dns_suffix = 'svc.cluster.local'\n",
+                "dns_suffix = ''\n",
+                "\n",
+                "nslookup_output=run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"nslookup {kubernetes_default_record_name} > /tmp/nslookup.out; cat /tmp/nslookup.out; rm /tmp/nslookup.out\"  ', return_output=True)\n",
+                "\n",
+                "name = re.findall('Name:\\s+(.[^,|^\\s|^\\n]+)', nslookup_output)\n",
+                "\n",
+                "if not name or kubernetes_default_svc_prefix not in name[0]:\n",
+                "    dns_suffix = default_dns_suffix\n",
+                "else:\n",
+                "    dns_suffix = 'svc' + name[0].replace(kubernetes_default_svc_prefix, '')\n",
+                "\n",
+                "alt_names = \"\"\n",
+                "bdc_fqdn = \"\"\n",
+                "\n",
+                "alt_names += f\"DNS.1 = {common_name}\\n\"\n",
+                "alt_names += f\"DNS.2 = {common_name}.{namespace}.{dns_suffix} \\n\"\n",
+                "\n",
+                "hdfs_vault_svc = \"hdfsvault-svc\"\n",
+                "bdc_config = run(\"azdata bdc config show\", return_output=True)\n",
+                "bdc_config = json.loads(bdc_config)\n",
+                "\n",
+                "dns_counter = 3 # DNS.1 and DNS.2 are already in the certificate template\n",
+                "\n",
+                "# Stateful set related DNS names\n",
+                "#\n",
+                "if app_name == \"gateway\" or app_name == \"master\":\n",
+                "  alt_names += f'DNS.{str(dns_counter)} = {pod}.{common_name}\\n'\n",
+                "  dns_counter = dns_counter + 1\n",
+                "  alt_names += f'DNS.{str(dns_counter)} = {pod}.{common_name}.{namespace}.{dns_suffix}\\n'\n",
+                "  dns_counter = dns_counter + 1\n",
+                "\n",
+                "# AD related DNS names\n",
+                "#\n",
+                "if \"security\" in bdc_config[\"spec\"] and \"activeDirectory\" in bdc_config[\"spec\"][\"security\"]:\n",
+                "    domain_dns_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"domainDnsName\"]\n",
+                "    subdomain_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"subdomain\"]\n",
+                "\n",
+                "    if subdomain_name:\n",
+                "        bdc_fqdn = f\"{subdomain_name}.{domain_dns_name}\"\n",
+                "    else:\n",
+                "        bdc_fqdn = f\"{namespace}.{domain_dns_name}\"\n",
+                "\n",
+                "    alt_names += f\"DNS.{str(dns_counter)} = {common_name}.{bdc_fqdn}\\n\"\n",
+                "    dns_counter = dns_counter + 1\n",
+                "\n",
+                "    if app_name == \"gateway\" or app_name == \"master\":\n",
+                "      alt_names += f'DNS.{str(dns_counter)} = {pod}.{bdc_fqdn}\\n'\n",
+                "      dns_counter = dns_counter + 1\n",
+                "\n",
+                "    # Endpoint DNS names for bdc certificates\n",
+                "    #\n",
+                "    if app_name in bdc_config[\"spec\"][\"resources\"]:\n",
+                "        app_name_endpoints = bdc_config[\"spec\"][\"resources\"][app_name][\"spec\"][\"endpoints\"]\n",
+                "        for endpoint in app_name_endpoints:\n",
+                "            if \"dnsName\" in endpoint:\n",
+                "                alt_names += f'DNS.{str(dns_counter)} = {endpoint[\"dnsName\"]}\\n'\n",
+                "                dns_counter = dns_counter + 1\n",
+                "        \n",
+                "    # Endpoint DNS names for control plane certificates\n",
+                "    #\n",
+                "    if app_name == \"controller\" or app_name == \"mgmtproxy\":\n",
+                "        bdc_endpoint_list = run(\"azdata bdc endpoint list\", return_output=True)\n",
+                "        bdc_endpoint_list = json.loads(bdc_endpoint_list)\n",
+                "\n",
+                "        # Parse the DNS host name from:\n",
+                "        #\n",
+                "        #    \"endpoint\": \"https://monitor.aris.local:30777\"\n",
+                "        # \n",
+                "        for endpoint in bdc_endpoint_list:\n",
+                "            if endpoint[\"name\"] == app_name:\n",
+                "                url = urlparse(endpoint[\"endpoint\"])\n",
+                "                alt_names += f\"DNS.{str(dns_counter)} = {url.hostname}\\n\"\n",
+                "                dns_counter = dns_counter + 1\n",
+                "\n",
+                "# Special case for the controller certificate\n",
+                "#\n",
+                "if app_name == \"controller\":\n",
+                "    alt_names += f\"DNS.{str(dns_counter)} = localhost\\n\"\n",
+                "    dns_counter = dns_counter + 1\n",
+                "\n",
+                "    # Add hdfsvault-svc host for key management calls.\n",
+                "    #\n",
+                "    alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}\\n\"\n",
+                "    dns_counter = dns_counter + 1\n",
+                "\n",
+                "    # Add hdfsvault-svc FQDN for key management calls.\n",
+                "    #\n",
+                "    if bdc_fqdn:\n",
+                "        alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}.{bdc_fqdn}\\n\"\n",
+                "        dns_counter = dns_counter + 1\n",
+                "\n",
+                "required_dns_names = re.findall('DNS\\.[0-9] = ([^,|^\\s|^\\n]+)', alt_names)\n",
+                "\n",
+                "# Get certificate common name and DNS names\n",
+                "# \n",
+                "cert = run(f'kubectl exec {controller} -c controller -n {namespace} -- openssl x509 -in {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -text -noout', return_output=True)\n",
+                "subject = re.findall('Subject:(.+)', cert)[0]\n",
+                "certficate_common_name = re.findall('CN=(.[^,|^\\s|^\\n]+)', subject)[0]\n",
+                "certficate_dns_names = re.findall('DNS:(.[^,|^\\s|^\\n]+)', cert)\n",
+                "\n",
+                "# Validate the common name\n",
+                "#\n",
+                "if (common_name != certficate_common_name):\n",
+                "    run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"rm -rf {test_cert_store_root}/{app_name}\"')\n",
+                "    raise SystemExit(f'Certficate common name does not match the expected one: {common_name}')\n",
+                "\n",
+                "# Validate the DNS names\n",
+                "#\n",
+                "if not all(dns_name in certficate_dns_names for dns_name in required_dns_names):\n",
+                "    run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"rm -rf {test_cert_store_root}/{app_name}\"')\n",
+                "    raise SystemExit(f'Certficate does not have all required DNS names: {required_dns_names}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Copy certifcate files from `controller` to local machine"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "\n",
+                "cwd = os.getcwd()\n",
+                "os.chdir(temp_dir) # Use chdir to workaround kubectl bug on Windows, which incorrectly processes 'c:\\' on kubectl cp cmd line \n",
+                "\n",
+                "run(f'kubectl cp {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem {prefix_keyfile_name}-certificate.pem -c controller -n {namespace}')\n",
+                "run(f'kubectl cp {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-privatekey.pem {prefix_keyfile_name}-privatekey.pem -c controller -n {namespace}')\n",
+                "\n",
+                "os.chdir(cwd)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Copy certifcate files from local machine to `controldb`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "\n",
+                "cwd = os.getcwd()\n",
+                "os.chdir(temp_dir) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n",
+                "\n",
+                "run(f'kubectl cp {prefix_keyfile_name}-certificate.pem controldb-0:/var/opt/mssql/{prefix_keyfile_name}-certificate.pem -c mssql-server -n {namespace}')\n",
+                "run(f'kubectl cp {prefix_keyfile_name}-privatekey.pem controldb-0:/var/opt/mssql/{prefix_keyfile_name}-privatekey.pem -c mssql-server -n {namespace}')\n",
+                "\n",
+                "os.chdir(cwd)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the `controller-db-rw-secret` secret\n",
+                "\n",
+                "Get the controller SQL symmetric key password for decryption."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import base64\n",
+                "\n",
+                "controller_db_rw_secret = run(f'kubectl get secret/controller-db-rw-secret -n {namespace} -o jsonpath={{.data.encryptionPassword}}', return_output=True)\n",
+                "controller_db_rw_secret = base64.b64decode(controller_db_rw_secret).decode('utf-8')\n",
+                "\n",
+                "print(\"controller_db_rw_secret retrieved\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Update the files table with the certificates through opened SQL connection"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "\n",
+                "sql = f\"\"\"\n",
+                "OPEN SYMMETRIC KEY ControllerDbSymmetricKey DECRYPTION BY PASSWORD = '{controller_db_rw_secret}'\n",
+                "\n",
+                "DECLARE @FileData VARBINARY(MAX), @Key uniqueidentifier;\n",
+                "SELECT @Key = KEY_GUID('ControllerDbSymmetricKey');\n",
+                " \n",
+                "SELECT TOP 1 @FileData = doc.BulkColumn FROM OPENROWSET(BULK N'/var/opt/mssql/{prefix_keyfile_name}-certificate.pem', SINGLE_BLOB) AS doc;\n",
+                "EXEC [dbo].[sp_set_file_data_encrypted] @FilePath = '/config/scaledsets/{scaledset_name}/containers/{container_name}/files/{prefix_keyfile_name}-certificate.pem',\n",
+                " @Data = @FileData,\n",
+                " @KeyGuid = @Key,\n",
+                " @Version = '0',\n",
+                " @User = '{user}',\n",
+                " @Group = '{group}',\n",
+                " @Mode = '{mode}';\n",
+                "\n",
+                "SELECT TOP 1 @FileData = doc.BulkColumn FROM OPENROWSET(BULK N'/var/opt/mssql/{prefix_keyfile_name}-privatekey.pem', SINGLE_BLOB) AS doc;\n",
+                "EXEC [dbo].[sp_set_file_data_encrypted] @FilePath = '/config/scaledsets/{scaledset_name}/containers/{container_name}/files/{prefix_keyfile_name}-privatekey.pem',\n",
+                " @Data = @FileData,\n",
+                " @KeyGuid = @Key,\n",
+                " @Version = '0',\n",
+                " @User = '{user}',\n",
+                " @Group = '{group}',\n",
+                " @Mode = '{mode}';\n",
+                "\"\"\"\n",
+                "\n",
+                "save_file(\"insert_certificates.sql\", sql)\n",
+                "\n",
+                "cwd = os.getcwd()\n",
+                "os.chdir(temp_dir) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n",
+                "\n",
+                "run(f'kubectl cp insert_certificates.sql controldb-0:/var/opt/mssql/insert_certificates.sql -c mssql-server -n {namespace}')\n",
+                "\n",
+                "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"SQLCMDPASSWORD=`cat /var/run/secrets/credentials/mssql-sa-password/password` /opt/mssql-tools/bin/sqlcmd -b -U sa -d controller -i /var/opt/mssql/insert_certificates.sql\" \"\"\")\n",
+                "\n",
+                "# Clean up\n",
+                "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/insert_certificates.sql\" \"\"\")\n",
+                "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/{prefix_keyfile_name}-certificate.pem\" \"\"\")\n",
+                "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/{prefix_keyfile_name}-privatekey.pem\" \"\"\")\n",
+                "\n",
+                "os.chdir(cwd)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Clear out the controller\\_db\\_rw\\_secret variable"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "controller_db_rw_secret= \"\""
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Clean up certificate staging area\n",
+                "\n",
+                "Remove the certificate files generated on disk (they have now been\n",
+                "placed in the controller database)."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "cmd = f\"rm -r {test_cert_store_root}/{app_name}\"\n",
+                "\n",
+                "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Restart knox gateway service"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(f'kubectl delete pod {pod} -n {namespace}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Clean up temporary directory for staging configuration files"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Delete the temporary directory used to hold configuration files\n",
+                "\n",
+                "import shutil\n",
+                "\n",
+                "shutil.rmtree(temp_dir)\n",
+                "\n",
+                "print(f'Temporary directory deleted: {temp_dir}')"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "Related\n",
+                "-------\n",
+                "\n",
+                "-   [CER042 - Install signed App-Proxy\n",
+                "    certificate](../cert-management/cer042-install-app-proxy-cert.ipynb)\n",
+                "\n",
+                "-   [CER031 - Sign Knox certificate with generated\n",
+                "    CA](../cert-management/cer031-sign-knox-generated-cert.ipynb)\n",
+                "\n",
+                "-   [CER021 - Create Knox\n",
+                "    certificate](../cert-management/cer021-create-knox-cert.ipynb)"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": true
+        }
+    }
+}

+ 923 - 0
Big-Data-Clusters/CU8/Public/content/cert-management/cer042-install-app-proxy-cert.ipynb

@@ -0,0 +1,923 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "CER042 - Install signed App-Proxy certificate\n",
+                "=============================================\n",
+                "\n",
+                "This notebook installs into the Big Data Cluster the certificate signed\n",
+                "using:\n",
+                "\n",
+                "-   [CER032 - Sign App-Proxy certificate with generated\n",
+                "    CA](../cert-management/cer032-sign-app-proxy-generated-cert.ipynb)\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "app_name = \"app-proxy\"\n",
+                "scaledset_name = \"appproxy\"\n",
+                "container_name = \"app-service-proxy\"\n",
+                "prefix_keyfile_name = \"service-proxy\"\n",
+                "common_name = \"appproxy-svc\"\n",
+                "user = \"nginx\"\n",
+                "group = \"nginx\"\n",
+                "mode = \"550\"\n",
+                "\n",
+                "test_cert_store_root = \"/var/opt/secrets/test-certificates\""
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"cer042-install-app-proxy-cert.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n",
+                "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n",
+                "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the Kubernetes namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster use the kubectl command line\n",
+                "interface .\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n",
+                "    except:\n",
+                "        from IPython.display import Markdown\n",
+                "        print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'.  SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Create a temporary directory to stage files"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Create a temporary directory to hold configuration files\n",
+                "\n",
+                "import tempfile\n",
+                "\n",
+                "temp_dir = tempfile.mkdtemp()\n",
+                "\n",
+                "print(f\"Temporary directory created: {temp_dir}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Helper function to save configuration files to disk"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define helper function 'save_file' to save configuration files to the temporary directory created above\n",
+                "import os\n",
+                "import io\n",
+                "\n",
+                "def save_file(filename, contents):\n",
+                "    with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n",
+                "      text_file.write(contents)\n",
+                "\n",
+                "      print(\"File saved: \" + os.path.join(temp_dir, filename))\n",
+                "\n",
+                "print(\"Function `save_file` defined successfully.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get name of the \u2018Running\u2019 `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place the name  of the 'Running' controller pod in variable `controller`\n",
+                "\n",
+                "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n",
+                "\n",
+                "print(f\"Controller pod name: {controller}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the name of the `app proxy` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place the name  of the appproxy pod in variable `pod`\n",
+                "\n",
+                "pod = run(f'kubectl get pod --selector=app=appproxy -n {namespace} -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n",
+                "\n",
+                "print(f\"App proxy pod name: {pod}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Validate certificate common name and alt names"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import json\n",
+                "from urllib.parse import urlparse\n",
+                "\n",
+                "kubernetes_default_record_name = 'kubernetes.default'\n",
+                "kubernetes_default_svc_prefix = 'kubernetes.default.svc'\n",
+                "default_dns_suffix = 'svc.cluster.local'\n",
+                "dns_suffix = ''\n",
+                "\n",
+                "nslookup_output=run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"nslookup {kubernetes_default_record_name} > /tmp/nslookup.out; cat /tmp/nslookup.out; rm /tmp/nslookup.out\"  ', return_output=True)\n",
+                "\n",
+                "name = re.findall('Name:\\s+(.[^,|^\\s|^\\n]+)', nslookup_output)\n",
+                "\n",
+                "if not name or kubernetes_default_svc_prefix not in name[0]:\n",
+                "    dns_suffix = default_dns_suffix\n",
+                "else:\n",
+                "    dns_suffix = 'svc' + name[0].replace(kubernetes_default_svc_prefix, '')\n",
+                "\n",
+                "alt_names = \"\"\n",
+                "bdc_fqdn = \"\"\n",
+                "\n",
+                "alt_names += f\"DNS.1 = {common_name}\\n\"\n",
+                "alt_names += f\"DNS.2 = {common_name}.{namespace}.{dns_suffix} \\n\"\n",
+                "\n",
+                "hdfs_vault_svc = \"hdfsvault-svc\"\n",
+                "bdc_config = run(\"azdata bdc config show\", return_output=True)\n",
+                "bdc_config = json.loads(bdc_config)\n",
+                "\n",
+                "dns_counter = 3 # DNS.1 and DNS.2 are already in the certificate template\n",
+                "\n",
+                "# Stateful set related DNS names\n",
+                "#\n",
+                "if app_name == \"gateway\" or app_name == \"master\":\n",
+                "  alt_names += f'DNS.{str(dns_counter)} = {pod}.{common_name}\\n'\n",
+                "  dns_counter = dns_counter + 1\n",
+                "  alt_names += f'DNS.{str(dns_counter)} = {pod}.{common_name}.{namespace}.{dns_suffix}\\n'\n",
+                "  dns_counter = dns_counter + 1\n",
+                "\n",
+                "# AD related DNS names\n",
+                "#\n",
+                "if \"security\" in bdc_config[\"spec\"] and \"activeDirectory\" in bdc_config[\"spec\"][\"security\"]:\n",
+                "    domain_dns_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"domainDnsName\"]\n",
+                "    subdomain_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"subdomain\"]\n",
+                "\n",
+                "    if subdomain_name:\n",
+                "        bdc_fqdn = f\"{subdomain_name}.{domain_dns_name}\"\n",
+                "    else:\n",
+                "        bdc_fqdn = f\"{namespace}.{domain_dns_name}\"\n",
+                "\n",
+                "    alt_names += f\"DNS.{str(dns_counter)} = {common_name}.{bdc_fqdn}\\n\"\n",
+                "    dns_counter = dns_counter + 1\n",
+                "\n",
+                "    if app_name == \"gateway\" or app_name == \"master\":\n",
+                "      alt_names += f'DNS.{str(dns_counter)} = {pod}.{bdc_fqdn}\\n'\n",
+                "      dns_counter = dns_counter + 1\n",
+                "\n",
+                "    # Endpoint DNS names for bdc certificates\n",
+                "    #\n",
+                "    if app_name in bdc_config[\"spec\"][\"resources\"]:\n",
+                "        app_name_endpoints = bdc_config[\"spec\"][\"resources\"][app_name][\"spec\"][\"endpoints\"]\n",
+                "        for endpoint in app_name_endpoints:\n",
+                "            if \"dnsName\" in endpoint:\n",
+                "                alt_names += f'DNS.{str(dns_counter)} = {endpoint[\"dnsName\"]}\\n'\n",
+                "                dns_counter = dns_counter + 1\n",
+                "        \n",
+                "    # Endpoint DNS names for control plane certificates\n",
+                "    #\n",
+                "    if app_name == \"controller\" or app_name == \"mgmtproxy\":\n",
+                "        bdc_endpoint_list = run(\"azdata bdc endpoint list\", return_output=True)\n",
+                "        bdc_endpoint_list = json.loads(bdc_endpoint_list)\n",
+                "\n",
+                "        # Parse the DNS host name from:\n",
+                "        #\n",
+                "        #    \"endpoint\": \"https://monitor.aris.local:30777\"\n",
+                "        # \n",
+                "        for endpoint in bdc_endpoint_list:\n",
+                "            if endpoint[\"name\"] == app_name:\n",
+                "                url = urlparse(endpoint[\"endpoint\"])\n",
+                "                alt_names += f\"DNS.{str(dns_counter)} = {url.hostname}\\n\"\n",
+                "                dns_counter = dns_counter + 1\n",
+                "\n",
+                "# Special case for the controller certificate\n",
+                "#\n",
+                "if app_name == \"controller\":\n",
+                "    alt_names += f\"DNS.{str(dns_counter)} = localhost\\n\"\n",
+                "    dns_counter = dns_counter + 1\n",
+                "\n",
+                "    # Add hdfsvault-svc host for key management calls.\n",
+                "    #\n",
+                "    alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}\\n\"\n",
+                "    dns_counter = dns_counter + 1\n",
+                "\n",
+                "    # Add hdfsvault-svc FQDN for key management calls.\n",
+                "    #\n",
+                "    if bdc_fqdn:\n",
+                "        alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}.{bdc_fqdn}\\n\"\n",
+                "        dns_counter = dns_counter + 1\n",
+                "\n",
+                "required_dns_names = re.findall('DNS\\.[0-9] = ([^,|^\\s|^\\n]+)', alt_names)\n",
+                "\n",
+                "# Get certificate common name and DNS names\n",
+                "# \n",
+                "cert = run(f'kubectl exec {controller} -c controller -n {namespace} -- openssl x509 -in {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -text -noout', return_output=True)\n",
+                "subject = re.findall('Subject:(.+)', cert)[0]\n",
+                "certficate_common_name = re.findall('CN=(.[^,|^\\s|^\\n]+)', subject)[0]\n",
+                "certficate_dns_names = re.findall('DNS:(.[^,|^\\s|^\\n]+)', cert)\n",
+                "\n",
+                "# Validate the common name\n",
+                "#\n",
+                "if (common_name != certficate_common_name):\n",
+                "    run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"rm -rf {test_cert_store_root}/{app_name}\"')\n",
+                "    raise SystemExit(f'Certficate common name does not match the expected one: {common_name}')\n",
+                "\n",
+                "# Validate the DNS names\n",
+                "#\n",
+                "if not all(dns_name in certficate_dns_names for dns_name in required_dns_names):\n",
+                "    run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"rm -rf {test_cert_store_root}/{app_name}\"')\n",
+                "    raise SystemExit(f'Certficate does not have all required DNS names: {required_dns_names}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Copy certifcate files from `controller` to local machine"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "\n",
+                "cwd = os.getcwd()\n",
+                "os.chdir(temp_dir) # Use chdir to workaround kubectl bug on Windows, which incorrectly processes 'c:\\' on kubectl cp cmd line \n",
+                "\n",
+                "run(f'kubectl cp {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem {prefix_keyfile_name}-certificate.pem -c controller -n {namespace}')\n",
+                "run(f'kubectl cp {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-privatekey.pem {prefix_keyfile_name}-privatekey.pem -c controller -n {namespace}')\n",
+                "\n",
+                "os.chdir(cwd)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Copy certifcate files from local machine to `controldb`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "\n",
+                "cwd = os.getcwd()\n",
+                "os.chdir(temp_dir) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n",
+                "\n",
+                "run(f'kubectl cp {prefix_keyfile_name}-certificate.pem controldb-0:/var/opt/mssql/{prefix_keyfile_name}-certificate.pem -c mssql-server -n {namespace}')\n",
+                "run(f'kubectl cp {prefix_keyfile_name}-privatekey.pem controldb-0:/var/opt/mssql/{prefix_keyfile_name}-privatekey.pem -c mssql-server -n {namespace}')\n",
+                "\n",
+                "os.chdir(cwd)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the `controller-db-rw-secret` secret\n",
+                "\n",
+                "Get the controller SQL symmetric key password for decryption."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import base64\n",
+                "\n",
+                "controller_db_rw_secret = run(f'kubectl get secret/controller-db-rw-secret -n {namespace} -o jsonpath={{.data.encryptionPassword}}', return_output=True)\n",
+                "controller_db_rw_secret = base64.b64decode(controller_db_rw_secret).decode('utf-8')\n",
+                "\n",
+                "print(\"controller_db_rw_secret retrieved\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Update the files table with the certificates through opened SQL connection"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "\n",
+                "sql = f\"\"\"\n",
+                "OPEN SYMMETRIC KEY ControllerDbSymmetricKey DECRYPTION BY PASSWORD = '{controller_db_rw_secret}'\n",
+                "\n",
+                "DECLARE @FileData VARBINARY(MAX), @Key uniqueidentifier;\n",
+                "SELECT @Key = KEY_GUID('ControllerDbSymmetricKey');\n",
+                " \n",
+                "SELECT TOP 1 @FileData = doc.BulkColumn FROM OPENROWSET(BULK N'/var/opt/mssql/{prefix_keyfile_name}-certificate.pem', SINGLE_BLOB) AS doc;\n",
+                "EXEC [dbo].[sp_set_file_data_encrypted] @FilePath = '/config/scaledsets/{scaledset_name}/containers/{container_name}/files/{prefix_keyfile_name}-certificate.pem',\n",
+                " @Data = @FileData,\n",
+                " @KeyGuid = @Key,\n",
+                " @Version = '0',\n",
+                " @User = '{user}',\n",
+                " @Group = '{group}',\n",
+                " @Mode = '{mode}';\n",
+                "\n",
+                "SELECT TOP 1 @FileData = doc.BulkColumn FROM OPENROWSET(BULK N'/var/opt/mssql/{prefix_keyfile_name}-privatekey.pem', SINGLE_BLOB) AS doc;\n",
+                "EXEC [dbo].[sp_set_file_data_encrypted] @FilePath = '/config/scaledsets/{scaledset_name}/containers/{container_name}/files/{prefix_keyfile_name}-privatekey.pem',\n",
+                " @Data = @FileData,\n",
+                " @KeyGuid = @Key,\n",
+                " @Version = '0',\n",
+                " @User = '{user}',\n",
+                " @Group = '{group}',\n",
+                " @Mode = '{mode}';\n",
+                "\"\"\"\n",
+                "\n",
+                "save_file(\"insert_certificates.sql\", sql)\n",
+                "\n",
+                "cwd = os.getcwd()\n",
+                "os.chdir(temp_dir) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n",
+                "\n",
+                "run(f'kubectl cp insert_certificates.sql controldb-0:/var/opt/mssql/insert_certificates.sql -c mssql-server -n {namespace}')\n",
+                "\n",
+                "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"SQLCMDPASSWORD=`cat /var/run/secrets/credentials/mssql-sa-password/password` /opt/mssql-tools/bin/sqlcmd -b -U sa -d controller -i /var/opt/mssql/insert_certificates.sql\" \"\"\")\n",
+                "\n",
+                "# Clean up\n",
+                "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/insert_certificates.sql\" \"\"\")\n",
+                "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/{prefix_keyfile_name}-certificate.pem\" \"\"\")\n",
+                "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/{prefix_keyfile_name}-privatekey.pem\" \"\"\")\n",
+                "\n",
+                "os.chdir(cwd)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Clear out the controller\\_db\\_rw\\_secret variable"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "controller_db_rw_secret= \"\""
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Clean up certificate staging area\n",
+                "\n",
+                "Remove the certificate files generated on disk (they have now been\n",
+                "placed in the controller database)."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "cmd = f\"rm -r {test_cert_store_root}/{app_name}\"\n",
+                "\n",
+                "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Restart Pod"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(f'kubectl delete pod {pod} -n {namespace}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Clean up temporary directory for staging configuration files"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Delete the temporary directory used to hold configuration files\n",
+                "\n",
+                "import shutil\n",
+                "\n",
+                "shutil.rmtree(temp_dir)\n",
+                "\n",
+                "print(f'Temporary directory deleted: {temp_dir}')"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "Related\n",
+                "-------\n",
+                "\n",
+                "-   [CER022 - Create App Proxy\n",
+                "    certificate](../cert-management/cer022-create-app-proxy-cert.ipynb)\n",
+                "\n",
+                "-   [CER032 - Sign App-Proxy certificate with generated\n",
+                "    CA](../cert-management/cer032-sign-app-proxy-generated-cert.ipynb)"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": true
+        }
+    }
+}

+ 1027 - 0
Big-Data-Clusters/CU8/Public/content/cert-management/cer043-install-master-certs.ipynb

@@ -0,0 +1,1027 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "CER043 - Install signed Master certificates\n",
+                "===========================================\n",
+                "\n",
+                "This notebook installs into the Big Data Cluster the certificates signed\n",
+                "using:\n",
+                "\n",
+                "-   [CER033 - Sign Master certificates with generated\n",
+                "    CA](../cert-management/cer033-sign-master-generated-certs.ipynb)\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "app_name = \"master\"\n",
+                "scaledset_name = \"master\"\n",
+                "container_name = \"mssql-server\"\n",
+                "common_name = \"master-svc\"\n",
+                "user = \"mssql\"\n",
+                "group = \"mssql\"\n",
+                "mode = \"550\"\n",
+                "\n",
+                "prefix_keyfile_name = \"sql\"\n",
+                "certificate_names = {\"master-0\" : \"master-0-certificate.pem\", \"master-1\" : \"master-1-certificate.pem\", \"master-2\" : \"master-2-certificate.pem\"}\n",
+                "key_names = {\"master-0\" : \"master-0-privatekey.pem\", \"master-1\" : \"master-1-privatekey.pem\", \"master-2\" : \"master-2-privatekey.pem\"}\n",
+                "\n",
+                "test_cert_store_root = \"/var/opt/secrets/test-certificates\""
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"cer043-install-master-certs.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n",
+                "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n",
+                "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the Kubernetes namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster use the kubectl command line\n",
+                "interface .\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n",
+                "    except:\n",
+                "        from IPython.display import Markdown\n",
+                "        print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'.  SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Create a temporary directory to stage files"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Create a temporary directory to hold configuration files\n",
+                "\n",
+                "import tempfile\n",
+                "\n",
+                "temp_dir = tempfile.mkdtemp()\n",
+                "\n",
+                "print(f\"Temporary directory created: {temp_dir}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Helper function to save configuration files to disk"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define helper function 'save_file' to save configuration files to the temporary directory created above\n",
+                "import os\n",
+                "import io\n",
+                "\n",
+                "def save_file(filename, contents):\n",
+                "    with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n",
+                "      text_file.write(contents)\n",
+                "\n",
+                "      print(\"File saved: \" + os.path.join(temp_dir, filename))\n",
+                "\n",
+                "print(\"Function `save_file` defined successfully.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get name of the \u2018Running\u2019 `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place the name  of the 'Running' controller pod in variable `controller`\n",
+                "\n",
+                "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n",
+                "\n",
+                "print(f\"Controller pod name: {controller}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the name of the `master` `pods`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place the name of the master pods in variable `pods`\n",
+                "\n",
+                "podNames = run(f'kubectl get pod --selector=app=master -n {namespace} -o jsonpath={{.items[*].metadata.name}}', return_output=True)\n",
+                "pods = podNames.split(\" \")\n",
+                "\n",
+                "print(f\"Master pod names: {pods}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Validate certificate common name and alt names"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import json\n",
+                "from urllib.parse import urlparse\n",
+                "\n",
+                "kubernetes_default_record_name = 'kubernetes.default'\n",
+                "kubernetes_default_svc_prefix = 'kubernetes.default.svc'\n",
+                "default_dns_suffix = 'svc.cluster.local'\n",
+                "dns_suffix = ''\n",
+                "\n",
+                "nslookup_output=run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"nslookup {kubernetes_default_record_name} > /tmp/nslookup.out; cat /tmp/nslookup.out; rm /tmp/nslookup.out\"  ', return_output=True)\n",
+                "\n",
+                "name = re.findall('Name:\\s+(.[^,|^\\s|^\\n]+)', nslookup_output)\n",
+                "\n",
+                "if not name or kubernetes_default_svc_prefix not in name[0]:\n",
+                "    dns_suffix = default_dns_suffix\n",
+                "else:\n",
+                "    dns_suffix = 'svc' + name[0].replace(kubernetes_default_svc_prefix, '')\n",
+                "\n",
+                "pods.sort()\n",
+                "  \n",
+                "for pod_name in pods:\n",
+                "\n",
+                "    alt_names = \"\"\n",
+                "    bdc_fqdn = \"\"\n",
+                "\n",
+                "    alt_names += f\"DNS.1 = {common_name}\\n\"\n",
+                "    alt_names += f\"DNS.2 = {common_name}.{namespace}.{dns_suffix} \\n\"\n",
+                "\n",
+                "    hdfs_vault_svc = \"hdfsvault-svc\"\n",
+                "    bdc_config = run(\"azdata bdc config show\", return_output=True)\n",
+                "    bdc_config = json.loads(bdc_config)\n",
+                "\n",
+                "    dns_counter = 3 # DNS.1 and DNS.2 are already in the certificate template\n",
+                "\n",
+                "    # Stateful set related DNS names\n",
+                "    #\n",
+                "    if app_name == \"gateway\" or app_name == \"master\":\n",
+                "      alt_names += f'DNS.{str(dns_counter)} = {pod_name}.{common_name}\\n'\n",
+                "      dns_counter = dns_counter + 1\n",
+                "      alt_names += f'DNS.{str(dns_counter)} = {pod_name}.{common_name}.{namespace}.{dns_suffix}\\n'\n",
+                "      dns_counter = dns_counter + 1\n",
+                "\n",
+                "    # AD related DNS names\n",
+                "    #\n",
+                "    if \"security\" in bdc_config[\"spec\"] and \"activeDirectory\" in bdc_config[\"spec\"][\"security\"]:\n",
+                "        domain_dns_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"domainDnsName\"]\n",
+                "        subdomain_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"subdomain\"]\n",
+                "\n",
+                "        if subdomain_name:\n",
+                "            bdc_fqdn = f\"{subdomain_name}.{domain_dns_name}\"\n",
+                "        else:\n",
+                "            bdc_fqdn = f\"{namespace}.{domain_dns_name}\"\n",
+                "            \n",
+                "        alt_names += f\"DNS.{str(dns_counter)} = {common_name}.{bdc_fqdn}\\n\"\n",
+                "        dns_counter = dns_counter + 1\n",
+                "\n",
+                "        if app_name == \"gateway\" or app_name == \"master\":\n",
+                "          alt_names += f'DNS.{str(dns_counter)} = {pod_name}.{bdc_fqdn}\\n'\n",
+                "          dns_counter = dns_counter + 1\n",
+                "\n",
+                "        # Endpoint DNS names for bdc certificates\n",
+                "        #\n",
+                "        if app_name in bdc_config[\"spec\"][\"resources\"]:\n",
+                "            app_name_endpoints = bdc_config[\"spec\"][\"resources\"][app_name][\"spec\"][\"endpoints\"]\n",
+                "            for endpoint in app_name_endpoints:\n",
+                "                if \"dnsName\" in endpoint:\n",
+                "                    alt_names += f'DNS.{str(dns_counter)} = {endpoint[\"dnsName\"]}\\n'\n",
+                "                    dns_counter = dns_counter + 1\n",
+                "            \n",
+                "        # Endpoint DNS names for control plane certificates\n",
+                "        #\n",
+                "        if app_name == \"controller\" or app_name == \"mgmtproxy\":\n",
+                "            bdc_endpoint_list = run(\"azdata bdc endpoint list\", return_output=True)\n",
+                "            bdc_endpoint_list = json.loads(bdc_endpoint_list)\n",
+                "\n",
+                "            # Parse the DNS host name from:\n",
+                "            #\n",
+                "            #    \"endpoint\": \"https://monitor.aris.local:30777\"\n",
+                "            # \n",
+                "            for endpoint in bdc_endpoint_list:\n",
+                "                if endpoint[\"name\"] == app_name:\n",
+                "                    url = urlparse(endpoint[\"endpoint\"])\n",
+                "                    alt_names += f\"DNS.{str(dns_counter)} = {url.hostname}\\n\"\n",
+                "                    dns_counter = dns_counter + 1\n",
+                "\n",
+                "    # Special case for the controller certificate\n",
+                "    #\n",
+                "    if app_name == \"controller\":\n",
+                "        alt_names += f\"DNS.{str(dns_counter)} = localhost\\n\"\n",
+                "        dns_counter = dns_counter + 1\n",
+                "\n",
+                "        # Add hdfsvault-svc host for key management calls.\n",
+                "        #\n",
+                "        alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}\\n\"\n",
+                "        dns_counter = dns_counter + 1\n",
+                "\n",
+                "        # Add hdfsvault-svc FQDN for key management calls.\n",
+                "        #\n",
+                "        if bdc_fqdn:\n",
+                "            alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}.{bdc_fqdn}\\n\"\n",
+                "            dns_counter = dns_counter + 1\n",
+                "\n",
+                "    required_dns_names = re.findall('DNS\\.[0-9] = ([^,|^\\s|^\\n]+)', alt_names)\n",
+                "\n",
+                "    # Get certificate common name and DNS names\n",
+                "    # \n",
+                "    cert = run(f'kubectl exec {controller} -c controller -n {namespace} -- openssl x509 -in {test_cert_store_root}/{app_name}/{certificate_names[pod_name]} -text -noout', return_output=True)\n",
+                "    subject = re.findall('Subject:(.+)', cert)[0]\n",
+                "    certficate_common_name = re.findall('CN=(.[^,|^\\s|^\\n]+)', subject)[0]\n",
+                "    certficate_dns_names = re.findall('DNS:(.[^,|^\\s|^\\n]+)', cert)\n",
+                "\n",
+                "    # Validate the common name\n",
+                "    #\n",
+                "    if (common_name != certficate_common_name):\n",
+                "        run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"rm -rf {test_cert_store_root}/{app_name}\"')\n",
+                "        raise SystemExit(f'Certficate common name does not match the expected one: {common_name}')\n",
+                "\n",
+                "    # Validate the DNS names\n",
+                "    #\n",
+                "    if not all(dns_name in certficate_dns_names for dns_name in required_dns_names):\n",
+                "        run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"rm -rf {test_cert_store_root}/{app_name}\"')\n",
+                "        raise SystemExit(f'Certficate does not have all required DNS names: {required_dns_names}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Copy certifcate files from `controller` to local machine"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "\n",
+                "cwd = os.getcwd()\n",
+                "os.chdir(temp_dir) # Use chdir to workaround kubectl bug on Windows, which incorrectly processes 'c:\\' on kubectl cp cmd line \n",
+                "\n",
+                "for pod_name in pods:\n",
+                "\n",
+                "    run(f'kubectl cp {controller}:{test_cert_store_root}/{app_name}/{certificate_names[pod_name]} {certificate_names[pod_name]} -c controller -n {namespace}')\n",
+                "    run(f'kubectl cp {controller}:{test_cert_store_root}/{app_name}/{key_names[pod_name]} {key_names[pod_name]} -c controller -n {namespace}')\n",
+                "\n",
+                "os.chdir(cwd)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Copy certifcate files from local machine to `controldb`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "\n",
+                "cwd = os.getcwd()\n",
+                "os.chdir(temp_dir) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n",
+                "  \n",
+                "for pod_name in pods:\n",
+                "    run(f'kubectl cp {certificate_names[pod_name]} controldb-0:/var/opt/mssql/{certificate_names[pod_name]} -c mssql-server -n {namespace}')\n",
+                "    run(f'kubectl cp {key_names[pod_name]} controldb-0:/var/opt/mssql/{key_names[pod_name]} -c mssql-server -n {namespace}')\n",
+                "\n",
+                "os.chdir(cwd)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the `controller-db-rw-secret` secret\n",
+                "\n",
+                "Get the controller SQL symmetric key password for decryption."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import base64\n",
+                "\n",
+                "controller_db_rw_secret = run(f'kubectl get secret/controller-db-rw-secret -n {namespace} -o jsonpath={{.data.encryptionPassword}}', return_output=True)\n",
+                "controller_db_rw_secret = base64.b64decode(controller_db_rw_secret).decode('utf-8')\n",
+                "\n",
+                "print(\"controller_db_rw_secret retrieved\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Update the files table with the certificates through opened SQL connection"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "\n",
+                "sql = f\"\"\"\n",
+                "OPEN SYMMETRIC KEY ControllerDbSymmetricKey DECRYPTION BY PASSWORD = '{controller_db_rw_secret}'\n",
+                "\n",
+                "DECLARE @FileData VARBINARY(MAX), @Key uniqueidentifier;\n",
+                "SELECT @Key = KEY_GUID('ControllerDbSymmetricKey');\n",
+                "\n",
+                "\"\"\"\n",
+                "  \n",
+                "for pod_name in pods:\n",
+                "\n",
+                "    insert = f\"\"\"\n",
+                "    SELECT TOP 1 @FileData = doc.BulkColumn FROM OPENROWSET(BULK N'/var/opt/mssql/{certificate_names[pod_name]}', SINGLE_BLOB) AS doc;\n",
+                "    EXEC [dbo].[sp_set_file_data_encrypted] @FilePath = '/config/scaledsets/{scaledset_name}/pods/{pod_name}/containers/{container_name}/files/{prefix_keyfile_name}-certificate.pem',\n",
+                "        @Data = @FileData,\n",
+                "        @KeyGuid = @Key,\n",
+                "        @Version = '0',\n",
+                "        @User = '{user}',\n",
+                "        @Group = '{group}',\n",
+                "        @Mode = '{mode}';\n",
+                "\n",
+                "    SELECT TOP 1 @FileData = doc.BulkColumn FROM OPENROWSET(BULK N'/var/opt/mssql/{key_names[pod_name]}', SINGLE_BLOB) AS doc;\n",
+                "    EXEC [dbo].[sp_set_file_data_encrypted] @FilePath = '/config/scaledsets/{scaledset_name}/pods/{pod_name}/containers/{container_name}/files/{prefix_keyfile_name}-privatekey.pem',\n",
+                "        @Data = @FileData,\n",
+                "        @KeyGuid = @Key,\n",
+                "        @Version = '0',\n",
+                "        @User = '{user}',\n",
+                "        @Group = '{group}',\n",
+                "        @Mode = '{mode}';\n",
+                "\n",
+                "    \"\"\"\n",
+                "\n",
+                "    sql += insert\n",
+                "\n",
+                "save_file(\"insert_certificates.sql\", sql)\n",
+                "\n",
+                "cwd = os.getcwd()\n",
+                "os.chdir(temp_dir) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n",
+                "\n",
+                "run(f'kubectl cp insert_certificates.sql controldb-0:/var/opt/mssql/insert_certificates.sql -c mssql-server -n {namespace}')\n",
+                "\n",
+                "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"SQLCMDPASSWORD=`cat /var/run/secrets/credentials/mssql-sa-password/password` /opt/mssql-tools/bin/sqlcmd -b -U sa -d controller -i /var/opt/mssql/insert_certificates.sql\" \"\"\")\n",
+                "\n",
+                "# Clean up\n",
+                "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/insert_certificates.sql\" \"\"\")\n",
+                "\n",
+                "for pod_name in pods:\n",
+                "\n",
+                "  run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/{certificate_names[pod_name]}\" \"\"\")\n",
+                "  run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/{key_names[pod_name]}\" \"\"\")\n",
+                "\n",
+                "os.chdir(cwd)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Clear out the controller\\_db\\_rw\\_secret variable"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "controller_db_rw_secret= \"\""
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Restart Pods"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import threading\n",
+                "import time\n",
+                "\n",
+                "if len(pods) == 1:\n",
+                "  # One master pod indicates non-HA environment, just delete it\n",
+                "  run(f'kubectl delete pod {pods[0]} -n {namespace}')\n",
+                "else:\n",
+                "  # HA setup, delete secondaries before primary\n",
+                "  timeout_s = 300\n",
+                "  check_interval_s = 10\n",
+                "\n",
+                "  master_primary_svc_ip = run(f'kubectl get service master-p-svc -n {namespace} -o jsonpath={{.spec.clusterIP}}', return_output=True) \n",
+                "  master_password = run(f'kubectl exec -it master-0 -c mssql-server -n {namespace} -- cat /var/run/secrets/credentials/pool/mssql-system-password', return_output=True) \n",
+                "\n",
+                "  def run_query(cmd):\n",
+                "    run(f\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- /opt/mssql-tools/bin/sqlcmd -S {master_primary_svc_ip} -U system -P {master_password} -h -1 -q \\\"SET NOCOUNT ON; {cmd}\\\" -o /tmp/res.csv\")\n",
+                "    res=run(f\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- head -n -2 /tmp/res.csv \", return_output=True)\n",
+                "    return res\n",
+                "\n",
+                "  def get_number_of_unsynchronized_replicas():\n",
+                "    cmd = 'select count(*) from sys.dm_hadr_database_replica_states where synchronization_state <> 2'\n",
+                "    res = run_query(cmd)\n",
+                "    return int(res.split()[0])\n",
+                "\n",
+                "  def get_primary_replica():\n",
+                "    cmd = 'select distinct replica_server_name from sys.dm_hadr_database_replica_states s join sys.availability_replicas r on s.replica_id = r.replica_id where is_primary_replica = 1'\n",
+                "    res = run_query(cmd)\n",
+                "    return res.split()[0]\n",
+                "\n",
+                "  def get_secondary_replicas():\n",
+                "    cmd = 'select distinct replica_server_name from sys.dm_hadr_database_replica_states s join sys.availability_replicas r on s.replica_id = r.replica_id where is_primary_replica = 0'\n",
+                "    res = run_query(cmd)\n",
+                "    rows = res.strip().split(\"\\n\")\n",
+                "    res = []\n",
+                "    for row in rows:\n",
+                "      res.append(row.strip())\n",
+                "    return res\n",
+                "\n",
+                "  def all_replicas_syncrhonized():\n",
+                "    while True:\n",
+                "      time.sleep(check_interval_s)\n",
+                "      unsynchronized_replicas_cnt=get_number_of_unsynchronized_replicas()\n",
+                "      if unsynchronized_replicas_cnt == 0:\n",
+                "        return True\n",
+                "\n",
+                "  def wait_for_replicas_to_synchronize():\n",
+                "    mt = threading.Thread(target=all_replicas_syncrhonized)\n",
+                "    mt.start()\n",
+                "    mt.join(timeout=timeout_s)\n",
+                "\n",
+                "    if mt.isAlive():\n",
+                "      raise SystemExit(\"Timeout waiting for all replicas to be synchronized.\")\n",
+                "\n",
+                "  secondary_replicas = get_secondary_replicas()\n",
+                "  for replica in secondary_replicas:\n",
+                "    wait_for_replicas_to_synchronize()\n",
+                "    run(f'kubectl delete pod {replica} -n {namespace}')\n",
+                "\n",
+                "  primary_replica = get_primary_replica() \n",
+                "  wait_for_replicas_to_synchronize()\n",
+                "\n",
+                "  key = \"/var/run/secrets/certificates/sqlha/mssql-ha-operator-controller-client/mssql-ha-operator-controller-client-privatekey.pem\"\n",
+                "  cert = \"/var/run/secrets/certificates/sqlha/mssql-ha-operator-controller-client/mssql-ha-operator-controller-client-certificate.pem\"\n",
+                "  content_type_header = \"Content-Type: application/json\"\n",
+                "  authorization_header = \"Authorization: Certificate\"\n",
+                "  data = f'{{\"TargetReplicaName\":\"{secondary_replicas[0]}\",\"ForceFailover\":\"false\"}}'\n",
+                "  request_url = f'https://controller-svc:443/internal/api/v1/bdc/services/sql/resources/master/availabilitygroups/containedag/failover'\n",
+                "\n",
+                "  manual_failover_api_command = f\"curl -sS --key {key} --cert  {cert} -X POST --header '{content_type_header}'  --header '{authorization_header}' --data '{data}' {request_url}\"\n",
+                "\n",
+                "  operator_pod = run(f'kubectl get pod --selector=app=mssql-operator -n {namespace} -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n",
+                "\n",
+                "  run(f'kubectl exec {operator_pod} -c mssql-ha-operator -n {namespace} -- {manual_failover_api_command}')\n",
+                " \n",
+                "  wait_for_replicas_to_synchronize()\n",
+                "  \n",
+                "  run(f'kubectl delete pod {primary_replica} -n {namespace}')\n",
+                "  wait_for_replicas_to_synchronize()\n",
+                "\n",
+                "  run(f\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- rm -rf /tmp/res.csv \")\n"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Clean up certificate staging area\n",
+                "\n",
+                "Remove the certificate files generated on disk (they have now been\n",
+                "placed in the controller database)."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "cmd = f\"rm -r {test_cert_store_root}/{app_name}\"\n",
+                "\n",
+                "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Clean up temporary directory for staging configuration files"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Delete the temporary directory used to hold configuration files\n",
+                "\n",
+                "import shutil\n",
+                "\n",
+                "shutil.rmtree(temp_dir)\n",
+                "\n",
+                "print(f'Temporary directory deleted: {temp_dir}')"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "Related\n",
+                "-------\n",
+                "\n",
+                "-   [CER023 - Create Master\n",
+                "    certificates](../cert-management/cer023-create-master-certs.ipynb)\n",
+                "\n",
+                "-   [CER033 - Sign Master certificates with generated\n",
+                "    CA](../cert-management/cer033-sign-master-generated-certs.ipynb)\n",
+                "\n",
+                "-   [CER044 - Install signed Controller\n",
+                "    certificate](../cert-management/cer044-install-controller-cert.ipynb)"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": true
+        }
+    }
+}

+ 915 - 0
Big-Data-Clusters/CU8/Public/content/cert-management/cer044-install-controller-cert.ipynb

@@ -0,0 +1,915 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "CER044 - Install signed Controller certificate\n",
+                "==============================================\n",
+                "\n",
+                "This notebook installs into the Big Data Cluster the certificate signed\n",
+                "using:\n",
+                "\n",
+                "-   [CER034 - Sign Controller certificate with cluster Root\n",
+                "    CA](../cert-management/cer034-sign-controller-generated-cert.ipynb)\n",
+                "\n",
+                "NOTE: At the end of this notebook the Controller pod and all pods that\n",
+                "use PolyBase (Master Pool and Compute Pool pods) will be restarted to\n",
+                "load the new certificates.\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "app_name = \"controller\"\n",
+                "scaledset_name = \"control\"\n",
+                "container_name = \"controller\"\n",
+                "prefix_keyfile_name = \"controller\"\n",
+                "common_name = \"controller-svc\"\n",
+                "\n",
+                "test_cert_store_root = \"/var/opt/secrets/test-certificates\""
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"cer044-install-controller-cert.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n",
+                "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n",
+                "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the Kubernetes namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster use the kubectl command line\n",
+                "interface .\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n",
+                "    except:\n",
+                "        from IPython.display import Markdown\n",
+                "        print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'.  SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Create a temporary directory to stage files"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Create a temporary directory to hold configuration files\n",
+                "\n",
+                "import tempfile\n",
+                "\n",
+                "temp_dir = tempfile.mkdtemp()\n",
+                "\n",
+                "print(f\"Temporary directory created: {temp_dir}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Helper function to save configuration files to disk"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define helper function 'save_file' to save configuration files to the temporary directory created above\n",
+                "import os\n",
+                "import io\n",
+                "\n",
+                "def save_file(filename, contents):\n",
+                "    with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n",
+                "      text_file.write(contents)\n",
+                "\n",
+                "      print(\"File saved: \" + os.path.join(temp_dir, filename))\n",
+                "\n",
+                "print(\"Function `save_file` defined successfully.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get name of the \u2018Running\u2019 `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place the name  of the 'Running' controller pod in variable `controller`\n",
+                "\n",
+                "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n",
+                "\n",
+                "print(f\"Controller pod name: {controller}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Validate certificate common name and alt names"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import json\n",
+                "from urllib.parse import urlparse\n",
+                "\n",
+                "kubernetes_default_record_name = 'kubernetes.default'\n",
+                "kubernetes_default_svc_prefix = 'kubernetes.default.svc'\n",
+                "default_dns_suffix = 'svc.cluster.local'\n",
+                "dns_suffix = ''\n",
+                "\n",
+                "nslookup_output=run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"nslookup {kubernetes_default_record_name} > /tmp/nslookup.out; cat /tmp/nslookup.out; rm /tmp/nslookup.out\"  ', return_output=True)\n",
+                "\n",
+                "name = re.findall('Name:\\s+(.[^,|^\\s|^\\n]+)', nslookup_output)\n",
+                "\n",
+                "if not name or kubernetes_default_svc_prefix not in name[0]:\n",
+                "    dns_suffix = default_dns_suffix\n",
+                "else:\n",
+                "    dns_suffix = 'svc' + name[0].replace(kubernetes_default_svc_prefix, '')\n",
+                "\n",
+                "alt_names = \"\"\n",
+                "bdc_fqdn = \"\"\n",
+                "\n",
+                "alt_names += f\"DNS.1 = {common_name}\\n\"\n",
+                "alt_names += f\"DNS.2 = {common_name}.{namespace}.{dns_suffix} \\n\"\n",
+                "\n",
+                "hdfs_vault_svc = \"hdfsvault-svc\"\n",
+                "bdc_config = run(\"azdata bdc config show\", return_output=True)\n",
+                "bdc_config = json.loads(bdc_config)\n",
+                "\n",
+                "dns_counter = 3 # DNS.1 and DNS.2 are already in the certificate template\n",
+                "\n",
+                "# Stateful set related DNS names\n",
+                "#\n",
+                "if app_name == \"gateway\" or app_name == \"master\":\n",
+                "  alt_names += f'DNS.{str(dns_counter)} = {pod}.{common_name}\\n'\n",
+                "  dns_counter = dns_counter + 1\n",
+                "  alt_names += f'DNS.{str(dns_counter)} = {pod}.{common_name}.{namespace}.{dns_suffix}\\n'\n",
+                "  dns_counter = dns_counter + 1\n",
+                "\n",
+                "# AD related DNS names\n",
+                "#\n",
+                "if \"security\" in bdc_config[\"spec\"] and \"activeDirectory\" in bdc_config[\"spec\"][\"security\"]:\n",
+                "    domain_dns_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"domainDnsName\"]\n",
+                "    subdomain_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"subdomain\"]\n",
+                "\n",
+                "    if subdomain_name:\n",
+                "        bdc_fqdn = f\"{subdomain_name}.{domain_dns_name}\"\n",
+                "    else:\n",
+                "        bdc_fqdn = f\"{namespace}.{domain_dns_name}\"\n",
+                "\n",
+                "    alt_names += f\"DNS.{str(dns_counter)} = {common_name}.{bdc_fqdn}\\n\"\n",
+                "    dns_counter = dns_counter + 1\n",
+                "\n",
+                "    if app_name == \"gateway\" or app_name == \"master\":\n",
+                "      alt_names += f'DNS.{str(dns_counter)} = {pod}.{bdc_fqdn}\\n'\n",
+                "      dns_counter = dns_counter + 1\n",
+                "\n",
+                "    # Endpoint DNS names for bdc certificates\n",
+                "    #\n",
+                "    if app_name in bdc_config[\"spec\"][\"resources\"]:\n",
+                "        app_name_endpoints = bdc_config[\"spec\"][\"resources\"][app_name][\"spec\"][\"endpoints\"]\n",
+                "        for endpoint in app_name_endpoints:\n",
+                "            if \"dnsName\" in endpoint:\n",
+                "                alt_names += f'DNS.{str(dns_counter)} = {endpoint[\"dnsName\"]}\\n'\n",
+                "                dns_counter = dns_counter + 1\n",
+                "        \n",
+                "    # Endpoint DNS names for control plane certificates\n",
+                "    #\n",
+                "    if app_name == \"controller\" or app_name == \"mgmtproxy\":\n",
+                "        bdc_endpoint_list = run(\"azdata bdc endpoint list\", return_output=True)\n",
+                "        bdc_endpoint_list = json.loads(bdc_endpoint_list)\n",
+                "\n",
+                "        # Parse the DNS host name from:\n",
+                "        #\n",
+                "        #    \"endpoint\": \"https://monitor.aris.local:30777\"\n",
+                "        # \n",
+                "        for endpoint in bdc_endpoint_list:\n",
+                "            if endpoint[\"name\"] == app_name:\n",
+                "                url = urlparse(endpoint[\"endpoint\"])\n",
+                "                alt_names += f\"DNS.{str(dns_counter)} = {url.hostname}\\n\"\n",
+                "                dns_counter = dns_counter + 1\n",
+                "\n",
+                "# Special case for the controller certificate\n",
+                "#\n",
+                "if app_name == \"controller\":\n",
+                "    alt_names += f\"DNS.{str(dns_counter)} = localhost\\n\"\n",
+                "    dns_counter = dns_counter + 1\n",
+                "\n",
+                "    # Add hdfsvault-svc host for key management calls.\n",
+                "    #\n",
+                "    alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}\\n\"\n",
+                "    dns_counter = dns_counter + 1\n",
+                "\n",
+                "    # Add hdfsvault-svc FQDN for key management calls.\n",
+                "    #\n",
+                "    if bdc_fqdn:\n",
+                "        alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}.{bdc_fqdn}\\n\"\n",
+                "        dns_counter = dns_counter + 1\n",
+                "\n",
+                "required_dns_names = re.findall('DNS\\.[0-9] = ([^,|^\\s|^\\n]+)', alt_names)\n",
+                "\n",
+                "# Get certificate common name and DNS names\n",
+                "# \n",
+                "cert = run(f'kubectl exec {controller} -c controller -n {namespace} -- openssl x509 -in {test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem -text -noout', return_output=True)\n",
+                "subject = re.findall('Subject:(.+)', cert)[0]\n",
+                "certficate_common_name = re.findall('CN=(.[^,|^\\s|^\\n]+)', subject)[0]\n",
+                "certficate_dns_names = re.findall('DNS:(.[^,|^\\s|^\\n]+)', cert)\n",
+                "\n",
+                "# Validate the common name\n",
+                "#\n",
+                "if (common_name != certficate_common_name):\n",
+                "    run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"rm -rf {test_cert_store_root}/{app_name}\"')\n",
+                "    raise SystemExit(f'Certficate common name does not match the expected one: {common_name}')\n",
+                "\n",
+                "# Validate the DNS names\n",
+                "#\n",
+                "if not all(dns_name in certficate_dns_names for dns_name in required_dns_names):\n",
+                "    run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"rm -rf {test_cert_store_root}/{app_name}\"')\n",
+                "    raise SystemExit(f'Certficate does not have all required DNS names: {required_dns_names}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Copy certifcate files from `controller` to local machine"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "\n",
+                "cwd = os.getcwd()\n",
+                "os.chdir(temp_dir) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n",
+                "\n",
+                "run(f'kubectl cp {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.p12 {prefix_keyfile_name}-certificate.p12 -c controller -n {namespace}')\n",
+                "run(f'kubectl cp {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-certificate.pem {prefix_keyfile_name}-certificate.pem -c controller -n {namespace}')\n",
+                "run(f'kubectl cp {controller}:{test_cert_store_root}/{app_name}/{prefix_keyfile_name}-privatekey.pem {prefix_keyfile_name}-privatekey.pem -c controller -n {namespace}')\n",
+                "\n",
+                "os.chdir(cwd)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Copy certifcate files from local machine to `controldb`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "\n",
+                "cwd = os.getcwd()\n",
+                "os.chdir(temp_dir) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n",
+                "\n",
+                "run(f'kubectl cp {prefix_keyfile_name}-certificate.p12 controldb-0:/var/opt/mssql/{prefix_keyfile_name}-certificate.p12 -c mssql-server -n {namespace}')\n",
+                "run(f'kubectl cp {prefix_keyfile_name}-certificate.pem controldb-0:/var/opt/mssql/{prefix_keyfile_name}-certificate.pem -c mssql-server -n {namespace}')\n",
+                "run(f'kubectl cp {prefix_keyfile_name}-privatekey.pem controldb-0:/var/opt/mssql/{prefix_keyfile_name}-privatekey.pem -c mssql-server -n {namespace}')\n",
+                "\n",
+                "os.chdir(cwd)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the `controller-db-rw-secret` secret\n",
+                "\n",
+                "Get the controller SQL symmetric key password for decryption."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import base64\n",
+                "\n",
+                "controller_db_rw_secret = run(f'kubectl get secret/controller-db-rw-secret -n {namespace} -o jsonpath={{.data.encryptionPassword}}', return_output=True)\n",
+                "controller_db_rw_secret = base64.b64decode(controller_db_rw_secret).decode('utf-8')\n",
+                "\n",
+                "print(\"controller_db_rw_secret retrieved\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Update the files table with the certificates through opened SQL connection"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "\n",
+                "sql = f\"\"\"\n",
+                "OPEN SYMMETRIC KEY ControllerDbSymmetricKey DECRYPTION BY PASSWORD = '{controller_db_rw_secret}'\n",
+                "\n",
+                "DECLARE @FileData VARBINARY(MAX), @Key uniqueidentifier;\n",
+                "SELECT @Key = KEY_GUID('ControllerDbSymmetricKey');\n",
+                "\n",
+                "SELECT TOP 1 @FileData = doc.BulkColumn FROM OPENROWSET(BULK N'/var/opt/mssql/{prefix_keyfile_name}-certificate.p12', SINGLE_BLOB) AS doc;\n",
+                "EXEC [dbo].[sp_set_file_data_encrypted] @FilePath = '/config/scaledsets/control/containers/{container_name}/files/{prefix_keyfile_name}-certificate.p12',\n",
+                " @Data = @FileData,\n",
+                " @KeyGuid = @Key,\n",
+                " @Version = '0',\n",
+                " @User = '',\n",
+                " @Group = '',\n",
+                " @Mode = '';\n",
+                "\n",
+                "SELECT TOP 1 @FileData = doc.BulkColumn FROM OPENROWSET(BULK N'/var/opt/mssql/{prefix_keyfile_name}-certificate.pem', SINGLE_BLOB) AS doc;\n",
+                "EXEC [dbo].[sp_set_file_data_encrypted] @FilePath = '/config/scaledsets/control/containers/{container_name}/files/{prefix_keyfile_name}-certificate.pem',\n",
+                " @Data = @FileData,\n",
+                " @KeyGuid = @Key,\n",
+                " @Version = '0',\n",
+                " @User = '',\n",
+                " @Group = '',\n",
+                " @Mode = '';\n",
+                "\n",
+                "SELECT TOP 1 @FileData = doc.BulkColumn FROM OPENROWSET(BULK N'/var/opt/mssql/{prefix_keyfile_name}-privatekey.pem', SINGLE_BLOB) AS doc;\n",
+                "EXEC [dbo].[sp_set_file_data_encrypted] @FilePath = '/config/scaledsets/control/containers/{container_name}/files/{prefix_keyfile_name}-privatekey.pem',\n",
+                " @Data = @FileData,\n",
+                " @KeyGuid = @Key,\n",
+                " @Version = '0',\n",
+                " @User = '',\n",
+                " @Group = '',\n",
+                " @Mode = '';\n",
+                "\"\"\"\n",
+                "\n",
+                "save_file(\"insert_certificates.sql\", sql)\n",
+                "\n",
+                "cwd = os.getcwd()\n",
+                "os.chdir(temp_dir) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n",
+                "\n",
+                "run(f'kubectl cp insert_certificates.sql controldb-0:/var/opt/mssql/insert_certificates.sql  -c mssql-server -n {namespace}')\n",
+                "\n",
+                "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"SQLCMDPASSWORD=`cat /var/run/secrets/credentials/mssql-sa-password/password` /opt/mssql-tools/bin/sqlcmd -b -U sa -d controller -i /var/opt/mssql/insert_certificates.sql\" \"\"\")\n",
+                "\n",
+                "# Cleanup\n",
+                "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/insert_certificates.sql\" \"\"\")\n",
+                "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/{prefix_keyfile_name}-certificate.p12\" \"\"\")\n",
+                "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/{prefix_keyfile_name}-certificate.pem\" \"\"\")\n",
+                "run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/{prefix_keyfile_name}-privatekey.pem\" \"\"\")\n",
+                "\n",
+                "os.chdir(cwd)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Clean up certificate staging area\n",
+                "\n",
+                "Remove the certificate files generated on disk (they have now been\n",
+                "placed in the controller database)."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "cmd = f\"rm -r {test_cert_store_root}/{app_name}\"\n",
+                "\n",
+                "run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Clear out the controller\\_db\\_rw\\_secret variable"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "controller_db_rw_secret= \"\""
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Restart `controller` to pick up new certificates.\n",
+                "\n",
+                "Delete the controller pod so that it can restart the controller and pick\n",
+                "up new certificates."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(f'kubectl delete pod {controller} -n {namespace}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Clean up temporary directory for staging configuration files"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Delete the temporary directory used to hold configuration files\n",
+                "\n",
+                "import shutil\n",
+                "\n",
+                "shutil.rmtree(temp_dir)\n",
+                "\n",
+                "print(f'Temporary directory deleted: {temp_dir}')"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "Related\n",
+                "-------\n",
+                "\n",
+                "-   [CER024 - Create Controller\n",
+                "    certificate](../cert-management/cer024-create-controller-cert.ipynb)\n",
+                "\n",
+                "-   [CER034 - Sign Controller certificate with cluster Root\n",
+                "    CA](../cert-management/cer034-sign-controller-generated-cert.ipynb)"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": true
+        }
+    }
+}

+ 264 - 0
Big-Data-Clusters/CU8/Public/content/cert-management/cer050-wait-cluster-healthly.ipynb

@@ -0,0 +1,264 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "CER050 - Wait for BDC to be Healthy\n",
+                "===================================\n",
+                "\n",
+                "This notebook will wait until the Big Data Cluster has returned to a\n",
+                "healthy state, after the `Controller` pod and pods that use `PolyBase`\n",
+                "have been restarted to load the new certificates.\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "timeout = 600  # amount of time to wait before cluster is healthy:  default to 10 minutes\n",
+                "check_interval = 30  # amount of time between health checks - default 30 seconds\n",
+                "min_pod_count = 10  # minimum number of healthy pods required to assert health"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Helper functions for waiting for the cluster to become healthy"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "import threading\n",
+                "import time\n",
+                "import sys\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "isRunning = True\n",
+                "\n",
+                "def all_containers_ready(pod):\n",
+                "    \"\"\"helper method returns true if all the containers within the given pod are ready\n",
+                "\n",
+                "    Arguments:\n",
+                "        pod {v1Pod} -- Metadata retrieved from the api call to.\n",
+                "    \"\"\"\n",
+                "         \n",
+                "    return all(map(lambda c: c.ready is True, pod.status.container_statuses))\n",
+                "\n",
+                "\n",
+                "def pod_is_ready(pod):\n",
+                "    \"\"\"tests that the pod, and all containers are ready\n",
+                "\n",
+                "    Arguments:\n",
+                "        pod {v1Pod} -- Metadata retrieved from api call.\n",
+                "    \"\"\"\n",
+                "\n",
+                "    return \"job-name\" in pod.metadata.labels or (pod.status.phase == \"Running\" and all_containers_ready(pod))\n",
+                "\n",
+                "\n",
+                "def waitReady():\n",
+                "    \"\"\"Waits for all pods, and containers to become ready.\n",
+                "    \"\"\"\n",
+                "    while isRunning:\n",
+                "        try:\n",
+                "            time.sleep(check_interval)\n",
+                "            pods = get_pods()\n",
+                "            allReady = len(pods.items) >= min_pod_count and all(map(pod_is_ready, pods.items))\n",
+                "\n",
+                "            if allReady:\n",
+                "                return True\n",
+                "            else:\n",
+                "                display(Markdown(get_pod_failures(pods)))\n",
+                "                display(Markdown(f\"cluster not healthy, rechecking in {check_interval} seconds.\"))\n",
+                "        except Exception as ex:\n",
+                "            last_error_message = str(ex)\n",
+                "            display(Markdown(last_error_message))\n",
+                "            time.sleep(check_interval)\n",
+                "\n",
+                "def get_pod_failures(pods=None):\n",
+                "    \"\"\"Returns a status message for any pods that are not ready.\n",
+                "    \"\"\"\n",
+                "    results = \"\"\n",
+                "    if not pods:\n",
+                "        pods = get_pods()\n",
+                "\n",
+                "    for pod in pods.items:\n",
+                "        if \"job-name\" not in pod.metadata.labels:\n",
+                "            if pod.status and pod.status.container_statuses:\n",
+                "                for container in filter(lambda c: c.ready is False, pod.status.container_statuses):\n",
+                "                    results = results + \"Container {0} in Pod {1} is not ready. Reported status: {2} <br/>\".format(container.name, pod.metadata.name, container.state)       \n",
+                "            else:\n",
+                "                results = results + \"Pod {0} is not ready.  <br/>\".format(pod.metadata.name)\n",
+                "    return results\n",
+                "\n",
+                "\n",
+                "def get_pods():\n",
+                "    \"\"\"Returns a list of pods by namespace, or all namespaces if no namespace is specified\n",
+                "    \"\"\"\n",
+                "    pods = None\n",
+                "    if namespace is not None:\n",
+                "        display(Markdown(f'Checking namespace {namespace}'))\n",
+                "        pods = api.list_namespaced_pod(namespace, _request_timeout=30) \n",
+                "    else:\n",
+                "        display(Markdown('Checking all namespaces'))\n",
+                "        pods = api.list_pod_for_all_namespaces(_request_timeout=30)\n",
+                "    return pods\n",
+                "\n",
+                "def wait_for_cluster_healthy():\n",
+                "    isRunning = True\n",
+                "    mt = threading.Thread(target=waitReady)\n",
+                "    mt.start()\n",
+                "    mt.join(timeout=timeout)\n",
+                "\n",
+                "    if mt.isAlive():\n",
+                "      raise SystemExit(\"Timeout waiting for all cluster to be healthy.\")\n",
+                "      \n",
+                "    isRunning = False"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Wait for cluster to to get healthy"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "wait_for_cluster_healthy()"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": true
+        }
+    }
+}

文件差异内容过多而无法显示
+ 467 - 0
Big-Data-Clusters/CU8/Public/content/cert-management/cer100-create-root-ca-install-certs.ipynb


文件差异内容过多而无法显示
+ 456 - 0
Big-Data-Clusters/CU8/Public/content/cert-management/cer101-use-root-ca-install-certs.ipynb


文件差异内容过多而无法显示
+ 455 - 0
Big-Data-Clusters/CU8/Public/content/cert-management/cer102-use-bdc-ca-install-certs.ipynb


+ 81 - 0
Big-Data-Clusters/CU8/Public/content/cert-management/readme.md

@@ -0,0 +1,81 @@
+# A set of notebooks used for Certificate Management
+
+The notebooks in this chapter can be used to create a self-signed root certificate authority (or allow for one to be uploaded), and then use that root CA to create and sign certificates for each external endpoint in a Big Data Cluster.
+
+After running the notebook in this chapter, and installing the Root CA certificate locally, all connections to the Big Data Cluster can be made securely (i.e. the internet browser will indicate "This Connection is Secure".  The following notebook can be used to install the Root CA certificate locally on this machine.
+
+- CER010 - Install generated Root CA locally
+
+## Run the notebooks in a sequence
+
+These two notebooks run the required notebooks in this chapter in a sequence in a single 'run all cells' button press.
+
+- CER100 - Configure Cluster with Self Signed Certificates
+- CER101 - Configure Cluster with Self Signed Certificates using existing Root CA
+
+The first notebook (CER100) will first generate a Root CA certificate.  The 2nd notebook (CER101) will use an already existing Root CA downloaded and upload using:
+
+- CER002 - Download existing Root CA certificate
+- CER003 - Upload existing Root CA certificate
+
+## Details
+
+- By default, the Big Data Cluster cluster generates its own Root CA certificate and all the certificates used inside the cluster are signed with this Root CA certificate. External clients connecting to cluster endpoints will not have this internal Root CA installed and this leads to the certificate verification related warnings on clients (internet browsers etc.) and the need to use the --insecure option with tools like CURL.
+
+- It is better if the certificates for the external endpoints in the Big Data Cluster can be provided and installed in the containers hosting the endpoint services, most preferably using your own trusted CA to sign these certificates and then install the CA chain inside the cluster.  The notebooks in this chapter aid in this process by creating a self-signed Root CA certificate and then creating certificates for each external endpoint signed by the self-signed Root CA certificate.
+
+- The openssl certificate tracking database is created in the `controller` in the `/var/opt/secrets/test-certificates` folder.  Here a record is maintained of each certificate that has been issued for tracking purposes.
+
+[Home](../readme.md)
+
+## Notebooks in this Chapter
+- [CER001 - Generate a Root CA certificate
](cer001-create-root-ca.ipynb)
+
+- [CER002 - Download existing Root CA certificate
](cer002-download-existing-root-ca.ipynb)
+
+- [CER003 - Upload existing Root CA certificate
](cer003-upload-existing-root-ca.ipynb)
+
+- [CER004 - Download and Upload existing Root CA certificate
](cer004-download-upload-existing-root-ca.ipynb)
+
+- [CER005 - Install new Root CA certificate
](cer005-install-existing-root-ca.ipynb)
+
+- [CER010 - Install generated Root CA locally
](cer010-install-generated-root-ca-locally.ipynb)
+
+- [CER020 - Create Management Proxy certificate
](cer020-create-management-service-proxy-cert.ipynb)
+
+- [CER021 - Create Knox certificate
](cer021-create-knox-cert.ipynb)
+
+- [CER022 - Create App Proxy certificate
](cer022-create-app-proxy-cert.ipynb)
+
+- [CER023 - Create Master certificates
](cer023-create-master-certs.ipynb)
+
+- [CER024 - Create Controller certificate
](cer024-create-controller-cert.ipynb)
+
+- [CER030 - Sign Management Proxy certificate with generated CA
](cer030-sign-service-proxy-generated-cert.ipynb)
+
+- [CER031 - Sign Knox certificate with generated CA
](cer031-sign-knox-generated-cert.ipynb)
+
+- [CER032 - Sign App-Proxy certificate with generated CA
](cer032-sign-app-proxy-generated-cert.ipynb)
+
+- [CER033 - Sign Master certificates with generated CA
](cer033-sign-master-generated-certs.ipynb)
+
+- [CER034 - Sign Controller certificate with cluster Root CA
](cer034-sign-controller-generated-cert.ipynb)
+
+- [CER040 - Install signed Management Proxy certificate
](cer040-install-service-proxy-cert.ipynb)
+
+- [CER041 - Install signed Knox certificate
](cer041-install-knox-cert.ipynb)
+
+- [CER042 - Install signed App-Proxy certificate
](cer042-install-app-proxy-cert.ipynb)
+
+- [CER043 - Install signed Master certificates
](cer043-install-master-certs.ipynb)
+
+- [CER044 - Install signed Controller certificate
](cer044-install-controller-cert.ipynb)
+
+- [CER050 - Wait for BDC to be Healthy
](cer050-wait-cluster-healthly.ipynb)
+
+- [CER100 - Configure Cluster with Self Signed Certificates
](cer100-create-root-ca-install-certs.ipynb)
+
+- [CER101 - Configure Cluster with Self Signed Certificates using existing Root CA
](cer101-use-root-ca-install-certs.ipynb)
+
+- [CER102 - Configure Cluster with Self Signed Certificates using existing Big Data Cluster CA
](cer102-use-bdc-ca-install-certs.ipynb)
+

+ 55 - 0
Big-Data-Clusters/CU8/Public/content/cert-management/toc.yml

@@ -0,0 +1,55 @@
+- title: Certificate Management
+  url: /cert-management/readme
+  not_numbered: true
+  expand_sections: false
+  sections:
+  - title: CER001 - Generate a Root CA certificate
+    url: cert-management/cer001-create-root-ca
+  - title: CER002 - Download existing Root CA certificate
+    url: cert-management/cer002-download-existing-root-ca
+  - title: CER003 - Upload existing Root CA certificate
+    url: cert-management/cer003-upload-existing-root-ca
+  - title: CER004 - Download and Upload existing Root CA certificate
+    url: cert-management/cer004-download-upload-existing-root-ca
+  - title: CER005 - Install new Root CA certificate
+    url: cert-management/cer005-install-existing-root-ca
+  - title: CER010 - Install generated Root CA locally
+    url: cert-management/cer010-install-generated-root-ca-locally
+  - title: CER020 - Create Management Proxy certificate
+    url: cert-management/cer020-create-management-service-proxy-cert
+  - title: CER021 - Create Knox certificate
+    url: cert-management/cer021-create-knox-cert
+  - title: CER022 - Create App Proxy certificate
+    url: cert-management/cer022-create-app-proxy-cert
+  - title: CER023 - Create Master certificates
+    url: cert-management/cer023-create-master-certs
+  - title: CER024 - Create Controller certificate
+    url: cert-management/cer024-create-controller-cert
+  - title: CER030 - Sign Management Proxy certificate with generated CA
+    url: cert-management/cer030-sign-service-proxy-generated-cert
+  - title: CER031 - Sign Knox certificate with generated CA
+    url: cert-management/cer031-sign-knox-generated-cert
+  - title: CER032 - Sign App-Proxy certificate with generated CA
+    url: cert-management/cer032-sign-app-proxy-generated-cert
+  - title: CER033 - Sign Master certificates with generated CA
+    url: cert-management/cer033-sign-master-generated-certs
+  - title: CER034 - Sign Controller certificate with cluster Root CA
+    url: cert-management/cer034-sign-controller-generated-cert
+  - title: CER040 - Install signed Management Proxy certificate
+    url: cert-management/cer040-install-service-proxy-cert
+  - title: CER041 - Install signed Knox certificate
+    url: cert-management/cer041-install-knox-cert
+  - title: CER042 - Install signed App-Proxy certificate
+    url: cert-management/cer042-install-app-proxy-cert
+  - title: CER043 - Install signed Master certificates
+    url: cert-management/cer043-install-master-certs
+  - title: CER044 - Install signed Controller certificate
+    url: cert-management/cer044-install-controller-cert
+  - title: CER050 - Wait for BDC to be Healthy
+    url: cert-management/cer050-wait-cluster-healthly
+  - title: CER100 - Configure Cluster with Self Signed Certificates
+    url: cert-management/cer100-create-root-ca-install-certs
+  - title: CER101 - Configure Cluster with Self Signed Certificates using existing Root CA
+    url: cert-management/cer101-use-root-ca-install-certs
+  - title: CER102 - Configure Cluster with Self Signed Certificates using existing Big Data Cluster CA
+    url: cert-management/cer102-use-bdc-ca-install-certs

+ 25 - 0
Big-Data-Clusters/CU8/Public/content/common/readme.md

@@ -0,0 +1,25 @@
+# A set of notebooks used for common scenarios
+
+- The notebooks in this chapter are used as prerequisites for other notebooks, such as login and logout of a cluster.
+
+[Home](../readme.md)
+
+## Notebooks in this Chapter
+- [SOP005 - az login
](sop005-az-login.ipynb)
+
+- [SOP006 - az logout
](sop006-az-logout.ipynb)
+
+- [SOP007 - Version information (azdata, bdc, kubernetes)
](sop007-get-key-version-information.ipynb)
+
+- [SOP011 - Set kubernetes configuration context
](sop011-set-kubernetes-context.ipynb)
+
+- [SOP013 - Create secret for azdata login (inside cluster)](sop013-create-secret-for-azdata-login.ipynb)
+
+- [SOP014 - Delete secret for azdata login (inside cluster)](sop014-delete-secret-for-azdata-login.ipynb)
+
+- [SOP028 - azdata login
](sop028-azdata-login.ipynb)
+
+- [SOP033 - azdata logout
](sop033-azdata-logout.ipynb)
+
+- [SOP034 - Wait for BDC to be Healthy
](sop034-wait-cluster-healthly.ipynb)
+

+ 404 - 0
Big-Data-Clusters/CU8/Public/content/common/sop005-az-login.ipynb

@@ -0,0 +1,404 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "SOP005 - az login\n",
+                "=================\n",
+                "\n",
+                "Use the az command line interface to login to Azure.\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n",
+                "\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"sop005-az-login.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Login to azure"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(\"az login\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "Related\n",
+                "-------\n",
+                "\n",
+                "-   [SOP006 - az logout](../common/sop006-az-logout.ipynb)"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": true,
+            "symlink": true
+        }
+    }
+}

+ 404 - 0
Big-Data-Clusters/CU8/Public/content/common/sop006-az-logout.ipynb

@@ -0,0 +1,404 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "SOP006 - az logout\n",
+                "==================\n",
+                "\n",
+                "Use the az command line interface to logout of Azure.\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n",
+                "\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"sop006-az-logout.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Logout of azure"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(\"az logout\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "Related\n",
+                "-------\n",
+                "\n",
+                "-   [SOP005 - az login](../common/sop005-az-login.ipynb)"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": true,
+            "symlink": true
+        }
+    }
+}

文件差异内容过多而无法显示
+ 359 - 0
Big-Data-Clusters/CU8/Public/content/common/sop007-get-key-version-information.ipynb


文件差异内容过多而无法显示
+ 389 - 0
Big-Data-Clusters/CU8/Public/content/common/sop011-set-kubernetes-context.ipynb


文件差异内容过多而无法显示
+ 398 - 0
Big-Data-Clusters/CU8/Public/content/common/sop013-create-secret-for-azdata-login.ipynb


文件差异内容过多而无法显示
+ 382 - 0
Big-Data-Clusters/CU8/Public/content/common/sop014-delete-secret-for-azdata-login.ipynb


文件差异内容过多而无法显示
+ 381 - 0
Big-Data-Clusters/CU8/Public/content/common/sop028-azdata-login.ipynb


文件差异内容过多而无法显示
+ 360 - 0
Big-Data-Clusters/CU8/Public/content/common/sop033-azdata-logout.ipynb


+ 269 - 0
Big-Data-Clusters/CU8/Public/content/common/sop034-wait-cluster-healthly.ipynb

@@ -0,0 +1,269 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "SOP034 - Wait for BDC to be Healthy\n",
+                "===================================\n",
+                "\n",
+                "Blocks until the Big Data Cluster is healthy, or the specified timeout\n",
+                "expires.\n",
+                "\n",
+                "The min\\_pod\\_count parameter indicates that the health check will not\n",
+                "pass until at least this number of pods exists in the cluster. If any\n",
+                "existing pods beyond this limit are unhealthy, the cluster is not\n",
+                "healthy.\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "timeout = 600  # amount of time to wait before cluster is healthy:  default to 10 minutes\n",
+                "check_interval = 30  # amount of time between health checks - default 30 seconds\n",
+                "min_pod_count = 10  # minimum number of healthy pods required to assert health"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Helper functions for waiting for the cluster to become healthy"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "import threading\n",
+                "import time\n",
+                "import sys\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "isRunning = True\n",
+                "\n",
+                "def all_containers_ready(pod):\n",
+                "    \"\"\"helper method returns true if all the containers within the given pod are ready\n",
+                "\n",
+                "    Arguments:\n",
+                "        pod {v1Pod} -- Metadata retrieved from the api call to.\n",
+                "    \"\"\"\n",
+                "         \n",
+                "    return all(map(lambda c: c.ready is True, pod.status.container_statuses))\n",
+                "\n",
+                "\n",
+                "def pod_is_ready(pod):\n",
+                "    \"\"\"tests that the pod, and all containers are ready\n",
+                "\n",
+                "    Arguments:\n",
+                "        pod {v1Pod} -- Metadata retrieved from api call.\n",
+                "    \"\"\"\n",
+                "\n",
+                "    return \"job-name\" in pod.metadata.labels or (pod.status.phase == \"Running\" and all_containers_ready(pod))\n",
+                "\n",
+                "\n",
+                "def waitReady():\n",
+                "    \"\"\"Waits for all pods, and containers to become ready.\n",
+                "    \"\"\"\n",
+                "    while isRunning:\n",
+                "        try:\n",
+                "            time.sleep(check_interval)\n",
+                "            pods = get_pods()\n",
+                "            allReady = len(pods.items) >= min_pod_count and all(map(pod_is_ready, pods.items))\n",
+                "\n",
+                "            if allReady:\n",
+                "                return True\n",
+                "            else:\n",
+                "                display(Markdown(get_pod_failures(pods)))\n",
+                "                display(Markdown(f\"cluster not healthy, rechecking in {check_interval} seconds.\"))\n",
+                "        except Exception as ex:\n",
+                "            last_error_message = str(ex)\n",
+                "            display(Markdown(last_error_message))\n",
+                "            time.sleep(check_interval)\n",
+                "\n",
+                "def get_pod_failures(pods=None):\n",
+                "    \"\"\"Returns a status message for any pods that are not ready.\n",
+                "    \"\"\"\n",
+                "    results = \"\"\n",
+                "    if not pods:\n",
+                "        pods = get_pods()\n",
+                "\n",
+                "    for pod in pods.items:\n",
+                "        if \"job-name\" not in pod.metadata.labels:\n",
+                "            if pod.status and pod.status.container_statuses:\n",
+                "                for container in filter(lambda c: c.ready is False, pod.status.container_statuses):\n",
+                "                    results = results + \"Container {0} in Pod {1} is not ready. Reported status: {2} <br/>\".format(container.name, pod.metadata.name, container.state)       \n",
+                "            else:\n",
+                "                results = results + \"Pod {0} is not ready.  <br/>\".format(pod.metadata.name)\n",
+                "    return results\n",
+                "\n",
+                "\n",
+                "def get_pods():\n",
+                "    \"\"\"Returns a list of pods by namespace, or all namespaces if no namespace is specified\n",
+                "    \"\"\"\n",
+                "    pods = None\n",
+                "    if namespace is not None:\n",
+                "        display(Markdown(f'Checking namespace {namespace}'))\n",
+                "        pods = api.list_namespaced_pod(namespace, _request_timeout=30) \n",
+                "    else:\n",
+                "        display(Markdown('Checking all namespaces'))\n",
+                "        pods = api.list_pod_for_all_namespaces(_request_timeout=30)\n",
+                "    return pods\n",
+                "\n",
+                "def wait_for_cluster_healthy():\n",
+                "    isRunning = True\n",
+                "    mt = threading.Thread(target=waitReady)\n",
+                "    mt.start()\n",
+                "    mt.join(timeout=timeout)\n",
+                "\n",
+                "    if mt.isAlive():\n",
+                "      raise SystemExit(\"Timeout waiting for all cluster to be healthy.\")\n",
+                "      \n",
+                "    isRunning = False"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Wait for cluster to to get healthy"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "wait_for_cluster_healthy()"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false,
+            "timeout": "900"
+        }
+    }
+}

+ 23 - 0
Big-Data-Clusters/CU8/Public/content/common/toc.yml

@@ -0,0 +1,23 @@
+- title: Common 
+  url: /common/readme
+  not_numbered: true
+  expand_sections: false
+  sections:
+  - title: SOP005 - az login
+    url: common/sop005-az-login
+  - title: SOP006 - az logout
+    url: common/sop006-az-logout
+  - title: SOP007 - Version information (azdata, bdc, kubernetes)
+    url: common/sop007-get-key-version-information
+  - title: SOP011 - Set kubernetes configuration context
+    url: common/sop011-set-kubernetes-context
+  - title: SOP013 - Create secret for azdata login (inside cluster)
+    url: common/sop013-create-secret-for-azdata-login
+  - title: SOP014 - Delete secret for azdata login (inside cluster)
+    url: common/sop014-delete-secret-for-azdata-login
+  - title: SOP028 - azdata login
+    url: common/sop028-azdata-login
+  - title: SOP033 - azdata logout
+    url: common/sop033-azdata-logout
+  - title: SOP034 - Wait for BDC to be Healthy
+    url: common/sop034-wait-cluster-healthly

+ 31 - 0
Big-Data-Clusters/CU8/Public/content/diagnose/readme.md

@@ -0,0 +1,31 @@
+# Diagnose notebooks
+
+- A collection of notebooks for diagnosing situations and states with a Big Data Cluster.
+
+[Home](../readme.md)
+
+## Notebooks in this Chapter
+- [TSG027 - Observe cluster deployment
](tsg027-observe-bdc-create.ipynb)
+
+- [TSG078 - Is cluster healthy
](tsg078-is-cluster-healthy.ipynb)
+
+- [TSG029 - Find dumps in the cluster](tsg029-find-dumps-in-the-cluster.ipynb)
+
+- [TSG032 - CPU and Memory usage for all containers
](tsg032-get-cpu-and-memory-for-all-containers.ipynb)
+
+- [TSG060 - Persistent Volume disk space for all BDC PVCs
](tsg060-get-disk-space-for-all-pvcs.ipynb)
+
+- [TSG087 - Use hadoop fs CLI on nmnode pod
](tsg087-use-hadoop-fs.ipynb)
+
+- [TSG037 - Determine master pool pod hosting primary replica
](tsg037-determine-primary-master-replica.ipynb)
+
+- [TSG055 - Time Curl to Sparkhead
](tsg055-time-curl-to-sparkhead.ipynb)
+
+- [TSG079 - Generate `controller` core dump
](tsg079-generate-controller-core-dump.ipynb)
+
+- [TSG086 - Run `top` in all containers
](tsg086-run-top-for-all-containers.ipynb)
+
+- [TSG108 - View the controller upgrade config map
](tsg108-controller-failed-to-upgrade.ipynb)
+
+- [TSG114 - Connect to ControlDB using Port Fowarding
](tsg114-port-forwarding-for-controldb.ipynb)
+

+ 29 - 0
Big-Data-Clusters/CU8/Public/content/diagnose/toc.yml

@@ -0,0 +1,29 @@
+- title: Diagnose
+  url: /diagnose/readme
+  not_numbered: true
+  expand_sections: true
+  sections:
+  - title: TSG027 - Observe cluster deployment
+    url: diagnose/tsg027-observe-bdc-create
+  - title: TSG078 - Is cluster healthy
+    url: diagnose/tsg078-is-cluster-healthy
+  - title: TSG029 - Find dumps in the cluster
+    url: diagnose/tsg029-find-dumps-in-the-cluster
+  - title: TSG032 - CPU and Memory usage for all containers
+    url: diagnose/tsg032-get-cpu-and-memory-for-all-containers
+  - title: TSG060 - Persistent Volume disk space for all BDC PVCs
+    url: diagnose/tsg060-get-disk-space-for-all-pvcs
+  - title: TSG087 - Use hadoop fs CLI on nmnode pod
+    url: diagnose/tsg087-use-hadoop-fs
+  - title: TSG037 - Determine master pool pod hosting primary replica
+    url: diagnose/tsg037-determine-primary-master-replica
+  - title: TSG055 - Time Curl to Sparkhead
+    url: diagnose/tsg055-time-curl-to-sparkhead
+  - title: TSG079 - Generate `controller` core dump
+    url: diagnose/tsg079-generate-controller-core-dump
+  - title: TSG086 - Run `top` in all containers
+    url: diagnose/tsg086-run-top-for-all-containers
+  - title: TSG108 - View the controller upgrade config map
+    url: diagnose/tsg108-controller-failed-to-upgrade
+  - title: TSG114 - Connect to ControlDB using Port Fowarding
+    url: diagnose/tsg114-port-forwarding-for-controldb

文件差异内容过多而无法显示
+ 385 - 0
Big-Data-Clusters/CU8/Public/content/diagnose/tsg027-observe-bdc-create.ipynb


+ 180 - 0
Big-Data-Clusters/CU8/Public/content/diagnose/tsg029-find-dumps-in-the-cluster.ipynb

@@ -0,0 +1,180 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG029 - Find dumps in the cluster\n",
+                "==================================\n",
+                "\n",
+                "Description\n",
+                "-----------\n",
+                "\n",
+                "Look for coredumps and minidumps from processes like SQL Server or\n",
+                "controller in a big data cluster.\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get all relevant pods"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "pod_list = api.list_namespaced_pod(namespace, label_selector='app in (compute-0, data-0, storage-0, master, controller, controldb)', field_selector='status.phase==Running')\n",
+                "pod_names = [pod.metadata.name for pod in pod_list.items]\n",
+                "print('Scanning pods: ' + ', '.join(pod_names))\n",
+                "\n",
+                "command = 'find /var/opt /var/log -path /var/opt/mssql-extensibility/data -prune -o -print | grep -E \"core\\\\.sqlservr|core\\\\.controller|SQLD|\\\\.mdmp$|\\\\.dmp$|\\\\.gdmp$\"'\n",
+                "all_dumps = ''\n",
+                "\n",
+                "for name in pod_names:\n",
+                "    print('Searching pod: ' + name)\n",
+                "    container = 'mssql-server'\n",
+                "    if 'control-' in name:\n",
+                "        container = 'controller'\n",
+                "\n",
+                "    try:\n",
+                "        dumps=stream(api.connect_get_namespaced_pod_exec, name, namespace, command=['/bin/sh', '-c', command], container=container, stderr=True, stdout=True)\n",
+                "    except Exception as e:\n",
+                "        print(f'Unable to connect to pod: {name} due to {str(e.__class__)}. Skipping dump check for this pod...')\n",
+                "    else:\n",
+                "        if dumps:\n",
+                "            all_dumps += '*Pod: ' + name + '*\\n'\n",
+                "            all_dumps += dumps + '\\n'"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Validate\n",
+                "\n",
+                "Validate no dump files were found."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "if len(all_dumps) > 0:\n",
+                "    raise SystemExit('FAIL - dump files found:\\n' + all_dumps)\n",
+                "\n",
+                "print('SUCCESS - no dump files were found.')"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false
+        }
+    }
+}

+ 158 - 0
Big-Data-Clusters/CU8/Public/content/diagnose/tsg032-get-cpu-and-memory-for-all-containers.ipynb

@@ -0,0 +1,158 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG032 - CPU and Memory usage for all containers\n",
+                "================================================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get per process usage stats"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "cmd = \"\"\"echo \"CPU %\\t MEM %\\t MEM\\t PROCESS\" &&\n",
+                "ps aux |\n",
+                "awk '\n",
+                "    {mem[$11] += int($6/1024)};\n",
+                "    {cpuper[$11] += $3};\n",
+                "    {memper[$11] += $4};\n",
+                "END {\n",
+                "    for (i in mem) {\n",
+                "        print cpuper[i] \"%\\t\", memper[i] \"%\\t\", mem[i] \"MB\\t\", i\n",
+                "    }\n",
+                "}' |\n",
+                "sort -k3nr\n",
+                "\"\"\"\n",
+                "\n",
+                "pod_list = api.list_namespaced_pod(namespace)\n",
+                "pod_names = [pod.metadata.name for pod in pod_list.items]\n",
+                "\n",
+                "for pod in pod_list.items:\n",
+                "    container_names = [container.name for container in pod.spec.containers]\n",
+                "\n",
+                "    for container in container_names:\n",
+                "        print (f\"CONTAINER: {container} / POD: {pod.metadata.name}\")\n",
+                "        try:\n",
+                "            print(stream(api.connect_get_namespaced_pod_exec, pod.metadata.name, namespace, command=['/bin/sh', '-c', cmd], container=container, stderr=True, stdout=True))\n",
+                "        except Exception:\n",
+                "            print (f\"Failed to get CPU/Memory for container: {container} in POD: {pod.metadata.name}\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false
+        }
+    }
+}

文件差异内容过多而无法显示
+ 387 - 0
Big-Data-Clusters/CU8/Public/content/diagnose/tsg037-determine-primary-master-replica.ipynb


+ 548 - 0
Big-Data-Clusters/CU8/Public/content/diagnose/tsg055-time-curl-to-sparkhead.ipynb

@@ -0,0 +1,548 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG055 - Time Curl to Sparkhead\n",
+                "===============================\n",
+                "\n",
+                "Description\n",
+                "-----------\n",
+                "\n",
+                "If `azdata bdc status show` fails with:\n",
+                "\n",
+                "> StatefulSet sparkhead is not healthy: {{Pod sparkhead-0 is not\n",
+                "> healthy: {Container hadoop-livy-sparkhistory is unhealthy: {Found\n",
+                "> error properties: {Property: sparkhistory.readiness, Details: \u2018Timed\n",
+                "> out getting health status after 5000 milliseconds.\u2019}}}}}: unhealthy\n",
+                "> Pod sparkhead-0 is not healthy: {Container hadoop-livy-sparkhistory is\n",
+                "> unhealthy: {Found error properties: {Property: sparkhistory.readiness,\n",
+                "> Details: \u2018Timed out getting health status after 5000\n",
+                "> milliseconds.\u2019}}}: unhealthy spark: unhealthy\" StatefulSet sparkhead\n",
+                "> is not healthy: {{Pod sparkhead-0 is not healthy: {Container\n",
+                "> hadoop-livy-sparkhistory is unhealthy: {Found error properties:\n",
+                "> {Property: sparkhistory.readiness, Details: \u2018Timed out getting health\n",
+                "> status after 5000 milliseconds.\u2019}}}}}: unhealthy Pod sparkhead-0 is\n",
+                "> not healthy: {Container hadoop-livy-sparkhistory is unhealthy: {Found\n",
+                "> error properties: {Property: sparkhistory.readiness, Details: \u2018Timed\n",
+                "> out getting health status after 5000 milliseconds.\u2019}}}: unhealthy\n",
+                "\n",
+                "It can be a useful diagnosis step to understand what the Curl response\n",
+                "time is from the `controller` pod to the `sparkhead` pod.\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"tsg055-time-curl-to-sparkhead.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n",
+                "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n",
+                "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get name of the \u2018Running\u2019 `controller` `pod`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place the name  of the 'Running' controller pod in variable `controller`\n",
+                "\n",
+                "controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n",
+                "\n",
+                "print(f\"Controller pod name: {controller}\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Time `curl` in `controller` `pod` to `sparkhead`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(f'kubectl exec {controller} -n {namespace} -c controller -- bash -c \"time curl --cacert /run/secrets/certificates/rootca/cluster-ca-certificate.crt https://sparkhead-svc:18480\"')"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false,
+            "expert": {
+                "rules": [
+                    [
+                        "TSG078",
+                        "code",
+                        "stream",
+                        "name",
+                        "stdout",
+                        "text",
+                        ".*StatefulSet sparkhead is not healthy.*Timed out getting health status"
+                    ]
+                ]
+            }
+        }
+    }
+}

+ 562 - 0
Big-Data-Clusters/CU8/Public/content/diagnose/tsg060-get-disk-space-for-all-pvcs.ipynb

@@ -0,0 +1,562 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG060 - Persistent Volume disk space for all BDC PVCs\n",
+                "======================================================\n",
+                "\n",
+                "Description\n",
+                "-----------\n",
+                "\n",
+                "Connect to each container and get the disk space used/available for each\n",
+                "Persisted Volume (PV) mapped to each Persisted Volume Claim (PVC) of a\n",
+                "Big Data Cluster (BDC)\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters\n",
+                "\n",
+                "Set the space used percentage, if disk space used crosses this\n",
+                "threshold, this notebook will raise an exception."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "SPACED_USED_PERCENT_THRESHOLD = 80"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"tsg060-get-disk-space-for-all-pvcs.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n",
+                "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n",
+                "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Connect to each container that mounts a PVC and run the `df` linux command line tool\n",
+                "\n",
+                "For each pod:\n",
+                "\n",
+                "1.  Get the claim\\_names from the volumes which have a PVC\n",
+                "2.  Join that to the containers who volume\\_mount that claim\\_name\n",
+                "3.  Get the \u2018mount\\_path\u2019 from the \u2018volume\\_mount\u2019\n",
+                "4.  Exec into the container and run the \u2018df\u2019 tool.\n",
+                "\n",
+                "This technique seems to work across kubeadm and AKS, but does require\n",
+                "\u2018kubectl exec\u2019 into each container (which requires permission and some\n",
+                "time)."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "pods = api.list_namespaced_pod(namespace)\n",
+                "\n",
+                "low_diskspace = False\n",
+                "\n",
+                "for pod in pods.items:\n",
+                "    for volume in pod.spec.volumes:\n",
+                "        if volume.persistent_volume_claim is not None:\n",
+                "            for container in pod.spec.containers:\n",
+                "                for volume_mount in container.volume_mounts:\n",
+                "                    if volume_mount.name == volume.name:\n",
+                "                        pvc = api.read_namespaced_persistent_volume_claim(name=volume.persistent_volume_claim.claim_name, namespace=namespace)\n",
+                "                        print (f\"Disk Space for {pod.metadata.name}/{container.name} PVC: {volume.persistent_volume_claim.claim_name} bound to PV: {pvc.spec.volume_name} ({pvc.status.capacity}) Storage Class: {pvc.spec.storage_class_name}\")\n",
+                "                        try:\n",
+                "                            output=stream(api.connect_get_namespaced_pod_exec, pod.metadata.name, namespace, container=container.name, command=['/bin/sh', '-c', f'df {volume_mount.mount_path} -h'], stderr=True, stdout=True)\n",
+                "                        except Exception as err:\n",
+                "                            print(err)\n",
+                "                        else:\n",
+                "                            print(output)\n",
+                "\n",
+                "                            # Get the same output as a CSV, so we can check the space used\n",
+                "                            output=stream(api.connect_get_namespaced_pod_exec, pod.metadata.name, namespace, container=container.name, command=['/bin/sh', '-c', f\"\"\"df {volume_mount.mount_path} -h -P | awk '{{print $1\",\"$2\",\"$3\",\"$4\",\"$5\",\"$6\" \"$7}}'\"\"\"], stderr=True, stdout=True)\n",
+                "                            \n",
+                "                            s = output.split(\",\")\n",
+                "                            space_used = int(s[9][:-1])\n",
+                "\n",
+                "                            if space_used > SPACED_USED_PERCENT_THRESHOLD:\n",
+                "                                low_diskspace = True\n",
+                "\n",
+                "                                # NOTE: This string is used to match an `expert rule` (SOP013)\n",
+                "                                #\n",
+                "                                print(f\"WARNING: LOW DISK SPACE! ({pod.metadata.name}/{container.name})\")\n",
+                "                                print(\"^^^^^^^^^^^^^^^^^^^^^^^^^\")\n",
+                "\n",
+                "if low_diskspace:\n",
+                "    raise SystemExit(f\"Disk space on one or more Persisted Volumes is greater than {SPACED_USED_PERCENT_THRESHOLD}%\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false
+        }
+    }
+}

文件差异内容过多而无法显示
+ 361 - 0
Big-Data-Clusters/CU8/Public/content/diagnose/tsg078-is-cluster-healthy.ipynb


文件差异内容过多而无法显示
+ 359 - 0
Big-Data-Clusters/CU8/Public/content/diagnose/tsg079-generate-controller-core-dump.ipynb


+ 146 - 0
Big-Data-Clusters/CU8/Public/content/diagnose/tsg086-run-top-for-all-containers.ipynb

@@ -0,0 +1,146 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG086 - Run `top` in all containers\n",
+                "====================================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Run top in each container"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "cmd = \"top -b -n 1\"\n",
+                "\n",
+                "pod_list = api.list_namespaced_pod(namespace)\n",
+                "pod_names = [pod.metadata.name for pod in pod_list.items]\n",
+                "\n",
+                "for pod in pod_list.items:\n",
+                "    container_names = [container.name for container in pod.spec.containers]\n",
+                "\n",
+                "    for container in container_names:\n",
+                "        print (f\"CONTAINER: {container} / POD: {pod.metadata.name}\")\n",
+                "        try:\n",
+                "            print(stream(api.connect_get_namespaced_pod_exec, pod.metadata.name, namespace, command=['/bin/sh', '-c', cmd], container=container, stderr=True, stdout=True))\n",
+                "        except Exception as err:\n",
+                "            print (f\"Failed to get run 'top' for container: {container} in pod: {pod.metadata.name}. Error: {err}\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false
+        }
+    }
+}

+ 493 - 0
Big-Data-Clusters/CU8/Public/content/diagnose/tsg087-use-hadoop-fs.ipynb

@@ -0,0 +1,493 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG087 - Use hadoop fs CLI on nmnode pod\n",
+                "========================================\n",
+                "\n",
+                "Description\n",
+                "-----------\n",
+                "\n",
+                "Connect directly to the namenode and use the comprehensive `hadoop fs`\n",
+                "CLI\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"tsg087-use-hadoop-fs.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n",
+                "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n",
+                "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Connect to the namenode pod and run hadoop fs CLI"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(f'kubectl exec nmnode-0-0 -n {namespace} -c hadoop -- hadoop fs -ls /')"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false
+        }
+    }
+}

+ 494 - 0
Big-Data-Clusters/CU8/Public/content/diagnose/tsg108-controller-failed-to-upgrade.ipynb

@@ -0,0 +1,494 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG108 - View the controller upgrade config map\n",
+                "===============================================\n",
+                "\n",
+                "Description\n",
+                "-----------\n",
+                "\n",
+                "When running a Big Data Cluster upgrade using `azdata bdc upgrade`:\n",
+                "\n",
+                "`azdata bdc upgrade --name <namespace> --tag <tag>`\n",
+                "\n",
+                "It may fail with:\n",
+                "\n",
+                "> Upgrading cluster to version 15.0.4003.10029\\_2\n",
+                ">\n",
+                "> NOTE: Cluster upgrade can take a significant amount of time depending\n",
+                "> on configuration, network speed, and the number of nodes in the\n",
+                "> cluster.\n",
+                ">\n",
+                "> Upgrading Control Plane. Control plane upgrade failed. Failed to\n",
+                "> upgrade controller.\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "Use these steps to troubelshoot the problem.\n",
+                "\n",
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"tsg108-controller-failed-to-upgrade.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n",
+                "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n",
+                "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the Kubernetes namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster use the kubectl command line\n",
+                "interface .\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n",
+                "    except:\n",
+                "        from IPython.display import Markdown\n",
+                "        print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'.  SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### View the upgrade configmap"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(f'kubectl get configmap -n {namespace} controller-upgrade-configmap -o yaml')"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "Related\n",
+                "-------\n",
+                "\n",
+                "-   [TSG109 - Set upgrade\n",
+                "    timeouts](../repair/tsg109-upgrade-stalled.ipynb)"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false,
+            "symlink": true,
+            "expert": {
+                "expanded_rules": [
+                    [
+                        5,
+                        "../repair/tsg109-upgrade-stalled.ipynb",
+                        "code",
+                        "stream",
+                        "name",
+                        "stdout",
+                        "text",
+                        ".\\*upgrade has timed out",
+                        0
+                    ]
+                ]
+            }
+        }
+    }
+}

+ 481 - 0
Big-Data-Clusters/CU8/Public/content/diagnose/tsg114-port-forwarding-for-controldb.ipynb

@@ -0,0 +1,481 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG114 - Connect to ControlDB using Port Fowarding\n",
+                "==================================================\n",
+                "\n",
+                "Description\n",
+                "-----------\n",
+                "\n",
+                "For advanced troubleshooting of the Controller Database, it may help to\n",
+                "use tools like SSMS to connect. Use this TSG to set up port-forwarding\n",
+                "on this machine, so you can connect to the SQL Server instance in thr\n",
+                "Controller Database pod.\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "Use these steps to troubelshoot the problem.\n",
+                "\n",
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"tsg114-port-forwarding-for-controldb.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n",
+                "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n",
+                "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the Kubernetes namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster use the kubectl command line\n",
+                "interface .\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n",
+                "    except:\n",
+                "        from IPython.display import Markdown\n",
+                "        print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'.  SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Display the controldb-0 \u2018sa\u2019 password"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(f'kubectl exec controldb-0 -n {namespace} -- cat /run/secrets/credentials/mssql-sa-password/password')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Set up port forwarding for control-db"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(f'kubectl port-forward pod/controldb-0 -n {namespace} 1433:1433')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "You can now connect to SQL Server in the ControlDB pod from this machine\n",
+                "(using 127.0.0.1) using the \u2018sa\u2019 username and password from above."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false
+        }
+    }
+}

+ 37 - 0
Big-Data-Clusters/CU8/Public/content/install/readme.md

@@ -0,0 +1,37 @@
+# Installation notebooks
+
+- A set of notebooks used for installing and uninstalling command line tools and packages needed to manage SQL Server Big Data Clusters.
+
+[Home](../readme.md)
+
+## Notebooks in this Chapter
+- [SOP036 - Install kubectl command line interface
](sop036-install-kubectl.ipynb)
+
+- [SOP037 - Uninstall kubectl command line interface
](sop037-uninstall-kubectl.ipynb)
+
+- [SOP059 - Install Kubernetes Python module
](sop059-install-kubernetes-module.ipynb)
+
+- [SOP060 - Uninstall kubernetes module
](sop060-uninstall-kubernetes-module.ipynb)
+
+- [SOP062 - Install ipython-sql and pyodbc modules
](sop062-install-ipython-sql-module.ipynb)
+
+- [SOP063 - Install azdata CLI (using package manager)
](sop063-packman-install-azdata.ipynb)
+
+- [SOP064 - Uninstall azdata CLI (using package manager)
](sop064-packman-uninstall-azdata.ipynb)
+
+- [SOP054 - Install azdata CLI (using pip)
](sop054-install-azdata.ipynb)
+
+- [SOP055 - Uninstall azdata CLI (using pip)
](sop055-uninstall-azdata.ipynb)
+
+- [SOP038 - Install azure command line interface
](sop038-install-az.ipynb)
+
+- [SOP039 - Uninstall azure command line interface
](sop039-uninstall-az.ipynb)
+
+- [SOP040 - Upgrade pip in ADS Python sandbox
](sop040-upgrade-pip.ipynb)
+
+- [SOP069 - Install ODBC for SQL Server
](sop069-install-odbc-driver-for-sql-server.ipynb)
+
+- [SOP012 - Install unixodbc for Mac
](sop012-brew-install-odbc-for-sql-server.ipynb)
+
+- [SOP010 - Upgrade a big data cluster
](sop010-upgrade-bdc.ipynb)
+

文件差异内容过多而无法显示
+ 390 - 0
Big-Data-Clusters/CU8/Public/content/install/sop010-upgrade-bdc.ipynb


+ 401 - 0
Big-Data-Clusters/CU8/Public/content/install/sop012-brew-install-odbc-for-sql-server.ipynb

@@ -0,0 +1,401 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "SOP012 - Install unixodbc for Mac\n",
+                "=================================\n",
+                "\n",
+                "Description\n",
+                "-----------\n",
+                "\n",
+                "`azdata` may fail to install on Mac with the following error.\n",
+                "\n",
+                "> ERROR:\n",
+                "> dlopen(/Users/user/.local/lib/python3.6/site-packages/pyodbc.cpython-36m-darwin.so,\n",
+                "> 2): Library not loaded: /usr/local/opt/unixodbc/lib/libodbc.2.dylib\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n",
+                "\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"sop012-brew-install-odbc-for-sql-server.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Install `unixodbc`"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run('brew install unixodbc')"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false,
+            "symlink": true
+        }
+    }
+}

+ 463 - 0
Big-Data-Clusters/CU8/Public/content/install/sop036-install-kubectl.ipynb

@@ -0,0 +1,463 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "SOP036 - Install kubectl command line interface\n",
+                "===============================================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# For Windows\n",
+                "#\n",
+                "version = \"v1.17.0\"\n",
+                "url = f\"https://storage.googleapis.com/kubernetes-release/release/{version}/bin/windows/amd64/kubectl.exe\""
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n",
+                "\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"sop036-install-kubectl.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Install Kubernetes CLI\n",
+                "\n",
+                "To get the latest version number for `kubectl` for Windows, open this\n",
+                "file:\n",
+                "\n",
+                "-   https://storage.googleapis.com/kubernetes-release/release/stable.txt\n",
+                "\n",
+                "NOTE: For Windows, `kubectl.exe` is installed in the folder containing\n",
+                "the `python.exe` (`sys.executable`), which will be in the path for\n",
+                "notebooks run in ADS."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "import sys\n",
+                "import platform\n",
+                "import urllib.request\n",
+                "\n",
+                "from pathlib import Path\n",
+                "\n",
+                "if platform.system() == \"Darwin\":\n",
+                "    run('brew update')\n",
+                "    run('brew install kubernetes-cli')\n",
+                "elif platform.system() == \"Windows\":\n",
+                "    dest = os.path.join(Path(sys.executable).parent, \"kubectl.exe\")\n",
+                "\n",
+                "    print(f\"START: Download from '{url}' to {dest}\")\n",
+                "    urllib.request.urlretrieve(url, dest)\n",
+                "    print(\"END: Download complete\")\n",
+                "elif platform.system() == \"Linux\":\n",
+                "    run('sudo apt-get update')\n",
+                "    run('sudo apt-get install -y kubectl')\n",
+                "else:\n",
+                "    raise SystemExit(f\"Platform '{platform.system()}' is not recognized, must be 'Darwin', 'Windows' or 'Linux'\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Add `kubectl` to system path (Windows Only)\n",
+                "\n",
+                "Optionally add the `kubectl` path to the user environment"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "if platform.system() == \"Windows\":\n",
+                "    run(f'powershell.exe -Command \"[Environment]::SetEnvironmentVariable(\\\\\"Path\\\\\", $env:Path + \\\\\";{Path(sys.executable).parent}\\\\\", [EnvironmentVariableTarget]::User)\"')"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": true,
+            "symlink": true
+        }
+    }
+}

+ 413 - 0
Big-Data-Clusters/CU8/Public/content/install/sop037-uninstall-kubectl.ipynb

@@ -0,0 +1,413 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "SOP037 - Uninstall kubectl command line interface\n",
+                "=================================================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n",
+                "\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"sop037-uninstall-kubectl.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Uninstall Kubernetes CLI\n",
+                "\n",
+                "NOTE: For Windows, `kubectl.exe` was installed in the folder containing\n",
+                "the `python.exe` (`sys.executable`), it will be removed from this\n",
+                "folder."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import os\n",
+                "import sys\n",
+                "import platform\n",
+                "\n",
+                "from pathlib import Path\n",
+                "\n",
+                "if platform.system() == \"Darwin\":\n",
+                "    run('brew uninstall kubernetes-cli')\n",
+                "elif platform.system() == \"Windows\":\n",
+                "    path = Path(sys.executable)\n",
+                "    cwd = os.getcwd()\n",
+                "    os.chdir(path.parent)\n",
+                "    run('cmd /k del kubectl.exe')\n",
+                "    os.chdir(cwd)\n",
+                "elif platform.system() == \"Linux\":\n",
+                "    run('sudo apt-get uninstall -y kubectl')\n",
+                "else:\n",
+                "    raise SystemExit(f\"Platform '{platform.system()}' is not recognized, must be 'Darwin', 'Windows' or 'Linux'\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": true,
+            "symlink": true
+        }
+    }
+}

+ 410 - 0
Big-Data-Clusters/CU8/Public/content/install/sop038-install-az.ipynb

@@ -0,0 +1,410 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "SOP038 - Install azure command line interface\n",
+                "=============================================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, hyperlinked suggestions, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"sop038-install-az.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'python': []}\n",
+                "error_hints = {'python': [['Library not loaded: /usr/local/opt/unixodbc', 'SOP012 - Install unixodbc for Mac', '../install/sop012-brew-install-odbc-for-sql-server.ipynb'], ['WARNING: You are using pip version', 'SOP040 - Upgrade pip in ADS Python sandbox', '../install/sop040-upgrade-pip.ipynb']]}\n",
+                "install_hint = {'python': []}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Install az CLI"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(\"python --version\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run('python -m pip install -m pip install azure-cli')"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": true,
+            "symlink": true
+        }
+    }
+}

+ 401 - 0
Big-Data-Clusters/CU8/Public/content/install/sop039-uninstall-az.ipynb

@@ -0,0 +1,401 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "SOP039 - Uninstall azure command line interface\n",
+                "===============================================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, hyperlinked suggestions, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"sop039-uninstall-az.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'python': []}\n",
+                "error_hints = {'python': [['Library not loaded: /usr/local/opt/unixodbc', 'SOP012 - Install unixodbc for Mac', '../install/sop012-brew-install-odbc-for-sql-server.ipynb'], ['WARNING: You are using pip version', 'SOP040 - Upgrade pip in ADS Python sandbox', '../install/sop040-upgrade-pip.ipynb']]}\n",
+                "install_hint = {'python': []}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Uninstall az CLI"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run('python -m pip uninstall azure-cli --yes')"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": true,
+            "symlink": true
+        }
+    }
+}

+ 403 - 0
Big-Data-Clusters/CU8/Public/content/install/sop040-upgrade-pip.ipynb

@@ -0,0 +1,403 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "SOP040 - Upgrade pip in ADS Python sandbox\n",
+                "==========================================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, hyperlinked suggestions, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"sop040-upgrade-pip.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'python': []}\n",
+                "error_hints = {'python': [['Library not loaded: /usr/local/opt/unixodbc', 'SOP012 - Install unixodbc for Mac', '../install/sop012-brew-install-odbc-for-sql-server.ipynb'], ['WARNING: You are using pip version', 'SOP040 - Upgrade pip in ADS Python sandbox', '../install/sop040-upgrade-pip.ipynb']]}\n",
+                "install_hint = {'python': []}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Upgrade pip"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import sys\n",
+                "\n",
+                "run(f'python -m pip install --upgrade pip')"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": true,
+            "symlink": true
+        }
+    }
+}

文件差异内容过多而无法显示
+ 359 - 0
Big-Data-Clusters/CU8/Public/content/install/sop054-install-azdata.ipynb


+ 428 - 0
Big-Data-Clusters/CU8/Public/content/install/sop055-uninstall-azdata.ipynb

@@ -0,0 +1,428 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "SOP055 - Uninstall azdata CLI (using pip)\n",
+                "=========================================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, hyperlinked suggestions, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"sop055-uninstall-azdata.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'python': []}\n",
+                "error_hints = {'python': [['Library not loaded: /usr/local/opt/unixodbc', 'SOP012 - Install unixodbc for Mac', '../install/sop012-brew-install-odbc-for-sql-server.ipynb'], ['WARNING: You are using pip version', 'SOP040 - Upgrade pip in ADS Python sandbox', '../install/sop040-upgrade-pip.ipynb']]}\n",
+                "install_hint = {'python': []}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Uninstall azdata CLI"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import sys\n",
+                "\n",
+                "run(f'python -m pip uninstall -r https://aka.ms/azdata -y')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Pip list\n",
+                "\n",
+                "Verify there are no azdata modules in the list"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(f'python -m pip list')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Related (SOP055, SOP064)"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": true,
+            "symlink": true
+        }
+    }
+}

+ 419 - 0
Big-Data-Clusters/CU8/Public/content/install/sop059-install-kubernetes-module.ipynb

@@ -0,0 +1,419 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "SOP059 - Install Kubernetes Python module\n",
+                "=========================================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, hyperlinked suggestions, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"sop059-install-kubernetes-module.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'python': []}\n",
+                "error_hints = {'python': [['Library not loaded: /usr/local/opt/unixodbc', 'SOP012 - Install unixodbc for Mac', '../install/sop012-brew-install-odbc-for-sql-server.ipynb'], ['WARNING: You are using pip version', 'SOP040 - Upgrade pip in ADS Python sandbox', '../install/sop040-upgrade-pip.ipynb']]}\n",
+                "install_hint = {'python': []}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Pip install the kubernetes module"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import sys\n",
+                "\n",
+                "run(f'python -m pip install kubernetes>=10.0.0')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Pip list installed modules"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(f'python -m pip list')"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": true,
+            "symlink": true
+        }
+    }
+}

+ 419 - 0
Big-Data-Clusters/CU8/Public/content/install/sop060-uninstall-kubernetes-module.ipynb

@@ -0,0 +1,419 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "SOP060 - Uninstall kubernetes module\n",
+                "====================================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, hyperlinked suggestions, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"sop060-uninstall-kubernetes-module.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'python': []}\n",
+                "error_hints = {'python': [['Library not loaded: /usr/local/opt/unixodbc', 'SOP012 - Install unixodbc for Mac', '../install/sop012-brew-install-odbc-for-sql-server.ipynb'], ['WARNING: You are using pip version', 'SOP040 - Upgrade pip in ADS Python sandbox', '../install/sop040-upgrade-pip.ipynb']]}\n",
+                "install_hint = {'python': []}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Pip uninstall the kubernetes module"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import sys\n",
+                "\n",
+                "run(f'python -m pip uninstall kubernetes -y')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Pip list installed modules"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(f'python -m pip list')"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": true,
+            "symlink": true
+        }
+    }
+}

+ 433 - 0
Big-Data-Clusters/CU8/Public/content/install/sop062-install-ipython-sql-module.ipynb

@@ -0,0 +1,433 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "SOP062 - Install ipython-sql and pyodbc modules\n",
+                "===============================================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, hyperlinked suggestions, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"sop062-install-ipython-sql-module.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'python': []}\n",
+                "error_hints = {'python': [['Library not loaded: /usr/local/opt/unixodbc', 'SOP012 - Install unixodbc for Mac', '../install/sop012-brew-install-odbc-for-sql-server.ipynb'], ['WARNING: You are using pip version', 'SOP040 - Upgrade pip in ADS Python sandbox', '../install/sop040-upgrade-pip.ipynb']]}\n",
+                "install_hint = {'python': []}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Pip install the ipython-sql module"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(f'python -m pip install ipython-sql')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Pip install the pyodbc module"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(f'python -m pip install pyodbc')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Pip list installed modules"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(f'python -m pip list')"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": true,
+            "symlink": true
+        }
+    }
+}

文件差异内容过多而无法显示
+ 359 - 0
Big-Data-Clusters/CU8/Public/content/install/sop063-packman-install-azdata.ipynb


文件差异内容过多而无法显示
+ 358 - 0
Big-Data-Clusters/CU8/Public/content/install/sop064-packman-uninstall-azdata.ipynb


+ 63 - 0
Big-Data-Clusters/CU8/Public/content/install/sop069-install-odbc-driver-for-sql-server.ipynb

@@ -0,0 +1,63 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "SOP069 - Install ODBC for SQL Server\n",
+                "====================================\n",
+                "\n",
+                "Description\n",
+                "-----------\n",
+                "\n",
+                "Some subcommands in `azdata` require the SQL Server ODBC driver. If the\n",
+                "driver is not installed, the following error is given:\n",
+                "\n",
+                "> ERROR: Error processing command: \u201cInterfaceError\u201d (\u2018IM002\u2019, \u2018\\[IM002\\]\n",
+                "> \\[Microsoft\\]\\[ODBC Driver Manager\\] Data source name not found and no\n",
+                "> default driver specified (0) (SQLDriverConnect)\u2019)\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Install ODBC Driver 17 for SQL Server"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import platform\n",
+                "import webbrowser\n",
+                "\n",
+                "if platform.system() == \"Windows\":\n",
+                "    webbrowser.open('https://www.microsoft.com/en-us/download/details.aspx?id=56567')\n",
+                "else:\n",
+                "    webbrowser.open('https://docs.microsoft.com/en-us/sql/connect/odbc/linux-mac/installing-the-microsoft-odbc-driver-for-sql-server')"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false,
+            "symlink": true
+        }
+    }
+}

+ 35 - 0
Big-Data-Clusters/CU8/Public/content/install/toc.yml

@@ -0,0 +1,35 @@
+- title: Install 
+  url: /install/readme
+  not_numbered: true
+  expand_sections: false
+  sections:
+  - title: SOP036 - Install kubectl command line interface
+    url: install/sop036-install-kubectl
+  - title: SOP037 - Uninstall kubectl command line interface
+    url: install/sop037-uninstall-kubectl
+  - title: SOP059 - Install Kubernetes Python module
+    url: install/sop059-install-kubernetes-module
+  - title: SOP060 - Uninstall kubernetes module
+    url: install/sop060-uninstall-kubernetes-module
+  - title: SOP062 - Install ipython-sql and pyodbc modules
+    url: install/sop062-install-ipython-sql-module
+  - title: SOP063 - Install azdata CLI (using package manager)
+    url: install/sop063-packman-install-azdata
+  - title: SOP064 - Uninstall azdata CLI (using package manager)
+    url: install/sop064-packman-uninstall-azdata
+  - title: SOP054 - Install azdata CLI (using pip)
+    url: install/sop054-install-azdata
+  - title: SOP055 - Uninstall azdata CLI (using pip)
+    url: install/sop055-uninstall-azdata
+  - title: SOP038 - Install azure command line interface
+    url: install/sop038-install-az
+  - title: SOP039 - Uninstall azure command line interface
+    url: install/sop039-uninstall-az
+  - title: SOP040 - Upgrade pip in ADS Python sandbox
+    url: install/sop040-upgrade-pip
+  - title: SOP069 - Install ODBC for SQL Server
+    url: install/sop069-install-odbc-driver-for-sql-server
+  - title: SOP012 - Install unixodbc for Mac
+    url: install/sop012-brew-install-odbc-for-sql-server
+  - title: SOP010 - Upgrade a big data cluster
+    url: install/sop010-upgrade-bdc

+ 49 - 0
Big-Data-Clusters/CU8/Public/content/log-analyzers/readme.md

@@ -0,0 +1,49 @@
+# Logs notebooks
+
+- A set of notebooks to gather and analyze logs from a SQL Server Big Data Cluster.  The analysis process will SUGGEST follow on TSGs to run for known issue found in the logs
+
+[Home](../readme.md)
+
+## Notebooks in this Chapter
+- [TSG046 - Knox gateway logs
](tsg046-get-knox-logs.ipynb)
+
+- [TSG036 - Controller logs
](tsg036-get-controller-logs.ipynb)
+
+- [TSG034 - Livy logs
](tsg034-get-livy-logs.ipynb)
+
+- [TSG035 - Spark History logs
](tsg035-get-sparkhistory-logs.ipynb)
+
+- [TSG030 - SQL Server errorlog files
](tsg030-get-errorlog-from-all-pods.ipynb)
+
+- [TSG031 - SQL Server PolyBase logs
](tsg031-get-polybase-logs-for-all-pods.ipynb)
+
+- [TSG095 - Hadoop namenode logs
](tsg095-get-namenode-logs.ipynb)
+
+- [TSG090 - Yarn nodemanager logs
](tsg090-get-nodemanager-logs.ipynb)
+
+- [TSG088 - Hadoop datanode logs
](tsg088-get-datanode-logs.ipynb)
+
+- [TSG096 - Zookeeper logs
](tsg096-get-zookeeper-logs.ipynb)
+
+- [TSG073 - InfluxDB logs
](tsg073-get-influxdb-logs.ipynb)
+
+- [TSG076 - Elastic Search logs
](tsg076-get-elastic-search-logs.ipynb)
+
+- [TSG077 - Kibana logs
](tsg077-get-kibana-logs.ipynb)
+
+- [TSG092 - Supervisord log tail for all containers in BDC
](tsg092-get-all-supervisord-log-tails.ipynb)
+
+- [TSG093 - Agent log tail for all containers in BDC
](tsg093-get-all-agent-log-tails.ipynb)
+
+- [TSG094 - Grafana logs
](tsg094-get-grafana-logs.ipynb)
+
+- [TSG117 - App-Deploy Proxy Nginx Logs
](tsg117-get-approxy-nginx-logs.ipynb)
+
+- [TSG120 - Provisioner log tail for all containers in BDC
](tsg120-get-all-provisioner-log-tails.ipynb)
+
+- [TSG121 - Supervisor mssql-server logs
](tsg121-get-all-supervisor-mssql-logs.ipynb)
+
+- [TSG122 - Hive Metastore logs
](tsg122-get-hive-metastore-logs.ipynb)
+
+- [TSG123 - Hive logs
](tsg123-get-hive-logs.ipynb)
+

+ 47 - 0
Big-Data-Clusters/CU8/Public/content/log-analyzers/toc.yml

@@ -0,0 +1,47 @@
+- title: Log Analyzers
+  url: /log-analyzers/readme
+  not_numbered: true
+  expand_sections: true
+  sections:
+  - title: TSG046 - Knox gateway logs
+    url: log-analyzers/tsg046-get-knox-logs
+  - title: TSG036 - Controller logs
+    url: log-analyzers/tsg036-get-controller-logs
+  - title: TSG034 - Livy logs
+    url: log-analyzers/tsg034-get-livy-logs
+  - title: TSG035 - Spark History logs
+    url: log-analyzers/tsg035-get-sparkhistory-logs
+  - title: TSG030 - SQL Server errorlog files
+    url: log-analyzers/tsg030-get-errorlog-from-all-pods
+  - title: TSG031 - SQL Server PolyBase logs
+    url: log-analyzers/tsg031-get-polybase-logs-for-all-pods
+  - title: TSG095 - Hadoop namenode logs
+    url: log-analyzers/tsg095-get-namenode-logs
+  - title: TSG090 - Yarn nodemanager logs
+    url: log-analyzers/tsg090-get-nodemanager-logs
+  - title: TSG088 - Hadoop datanode logs
+    url: log-analyzers/tsg088-get-datanode-logs
+  - title: TSG096 - Zookeeper logs
+    url: log-analyzers/tsg096-get-zookeeper-logs
+  - title: TSG073 - InfluxDB logs
+    url: log-analyzers/tsg073-get-influxdb-logs
+  - title: TSG076 - Elastic Search logs
+    url: log-analyzers/tsg076-get-elastic-search-logs
+  - title: TSG077 - Kibana logs
+    url: log-analyzers/tsg077-get-kibana-logs
+  - title: TSG092 - Supervisord log tail for all containers in BDC
+    url: log-analyzers/tsg092-get-all-supervisord-log-tails
+  - title: TSG093 - Agent log tail for all containers in BDC
+    url: log-analyzers/tsg093-get-all-agent-log-tails
+  - title: TSG094 - Grafana logs
+    url: log-analyzers/tsg094-get-grafana-logs
+  - title: TSG117 - App-Deploy Proxy Nginx Logs
+    url: log-analyzers/tsg117-get-approxy-nginx-logs
+  - title: TSG120 - Provisioner log tail for all containers in BDC
+    url: log-analyzers/tsg120-get-all-provisioner-log-tails
+  - title: TSG121 - Supervisor mssql-server logs
+    url: log-analyzers/tsg121-get-all-supervisor-mssql-logs
+  - title: TSG122 - Hive Metastore logs
+    url: log-analyzers/tsg122-get-hive-metastore-logs
+  - title: TSG123 - Hive logs
+    url: log-analyzers/tsg123-get-hive-logs

+ 289 - 0
Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg030-get-errorlog-from-all-pods.ipynb

@@ -0,0 +1,289 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG030 - SQL Server errorlog files\n",
+                "==================================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "import re\n",
+                "\n",
+                "tail_lines = 500\n",
+                "\n",
+                "pod = None # All\n",
+                "container = \"mssql-server\"\n",
+                "log_files = [ \"/var/opt/mssql/log/errorlog\" ]\n",
+                "\n",
+                "expressions_to_analyze = [\n",
+                "    re.compile(\".{35}Error:\"),\n",
+                "    re.compile(\".{35}Login failed for user '##\"),\n",
+                "    re.compile(\".{35}SqlDumpExceptionHandler\")\n",
+                "]"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get tail for log"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Display the last 'tail_lines' of files in 'log_files' list\n",
+                "\n",
+                "pods = api.list_namespaced_pod(namespace)\n",
+                "\n",
+                "entries_for_analysis = []\n",
+                "\n",
+                "for p in pods.items:\n",
+                "    if pod is None or p.metadata.name == pod:\n",
+                "        for c in p.spec.containers:\n",
+                "            if container is None or c.name == container:\n",
+                "                for log_file in log_files:\n",
+                "                    print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n",
+                "                    try:\n",
+                "                        output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n",
+                "                    except Exception:\n",
+                "                        print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n",
+                "                    else:\n",
+                "                        for line in output.split('\\n'):\n",
+                "                            for expression in expressions_to_analyze:\n",
+                "                                if expression.match(line):\n",
+                "                                    entries_for_analysis.append(line)\n",
+                "                            print(line)\n",
+                "print(\"\")\n",
+                "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Analyze log entries and suggest relevant Troubleshooting Guides"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Analyze log entries and suggest further relevant troubleshooting guides\n",
+                "from IPython.display import Markdown\n",
+                "import os\n",
+                "import json\n",
+                "import requests\n",
+                "import ipykernel\n",
+                "import datetime\n",
+                "\n",
+                "from urllib.parse import urljoin\n",
+                "from notebook import notebookapp\n",
+                "\n",
+                "def get_notebook_name():\n",
+                "    \"\"\"Return the full path of the jupyter notebook.   Some runtimes (e.g. ADS) \n",
+                "    have the kernel_id in the filename of the connection file.  If so, the \n",
+                "    notebook name at runtime can be determined using `list_running_servers`.\n",
+                "    Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n",
+                "    the connection file, therefore we are unable to establish the filename\n",
+                "    \"\"\"\n",
+                "    connection_file = os.path.basename(ipykernel.get_connection_file())\n",
+                "    \n",
+                "    # If the runtime has the kernel_id in the connection filename, use it to\n",
+                "    # get the real notebook name at runtime, otherwise, use the notebook \n",
+                "    # filename from build time.\n",
+                "    try: \n",
+                "        kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n",
+                "    except:\n",
+                "        pass\n",
+                "    else:\n",
+                "        for servers in list(notebookapp.list_running_servers()):\n",
+                "            try:\n",
+                "                response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n",
+                "            except:\n",
+                "                pass\n",
+                "            else:\n",
+                "                for nn in json.loads(response.text):\n",
+                "                    if nn['kernel']['id'] == kernel_id:\n",
+                "                        return nn['path']\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def get_notebook_rules():\n",
+                "    \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n",
+                "    file_name = get_notebook_name()\n",
+                "\n",
+                "    if file_name == None:\n",
+                "        return None\n",
+                "    else:\n",
+                "        j = load_json(file_name)\n",
+                "\n",
+                "        if \"azdata\" not in j[\"metadata\"] or \\\n",
+                "            \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n",
+                "            \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "            return []\n",
+                "        else:\n",
+                "            return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n",
+                "\n",
+                "rules = get_notebook_rules()\n",
+                "\n",
+                "if rules == None:\n",
+                "    print(\"\")\n",
+                "    print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n",
+                "else:\n",
+                "    print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n",
+                "    print(rules)\n",
+                "    hints = 0\n",
+                "    if len(rules) > 0:\n",
+                "        for entry in entries_for_analysis:\n",
+                "            for rule in rules:\n",
+                "                if entry.find(rule[0]) != -1:\n",
+                "                    print (entry)\n",
+                "\n",
+                "                    display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n",
+                "                    hints = hints + 1\n",
+                "\n",
+                "    print(\"\")\n",
+                "    print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false
+        }
+    }
+}

+ 287 - 0
Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg031-get-polybase-logs-for-all-pods.ipynb

@@ -0,0 +1,287 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG031 - SQL Server PolyBase logs\n",
+                "=================================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "import re\n",
+                "\n",
+                "tail_lines = 500\n",
+                "\n",
+                "pod = None # All\n",
+                "container = \"mssql-server\"\n",
+                "log_files = [ \"/var/opt/mssql/log/polybase/MSSQLSERVER_*_errors.log\" ]\n",
+                "\n",
+                "expressions_to_analyze = [\n",
+                "    re.compile(\"(.*)MppSqlException\")\n",
+                "]"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get tail for log"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Display the last 'tail_lines' of files in 'log_files' list\n",
+                "\n",
+                "pods = api.list_namespaced_pod(namespace)\n",
+                "\n",
+                "entries_for_analysis = []\n",
+                "\n",
+                "for p in pods.items:\n",
+                "    if pod is None or p.metadata.name == pod:\n",
+                "        for c in p.spec.containers:\n",
+                "            if container is None or c.name == container:\n",
+                "                for log_file in log_files:\n",
+                "                    print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n",
+                "                    try:\n",
+                "                        output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n",
+                "                    except Exception:\n",
+                "                        print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n",
+                "                    else:\n",
+                "                        for line in output.split('\\n'):\n",
+                "                            for expression in expressions_to_analyze:\n",
+                "                                if expression.match(line):\n",
+                "                                    entries_for_analysis.append(line)\n",
+                "                            print(line)\n",
+                "print(\"\")\n",
+                "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Analyze log entries and suggest relevant Troubleshooting Guides"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Analyze log entries and suggest further relevant troubleshooting guides\n",
+                "from IPython.display import Markdown\n",
+                "import os\n",
+                "import json\n",
+                "import requests\n",
+                "import ipykernel\n",
+                "import datetime\n",
+                "\n",
+                "from urllib.parse import urljoin\n",
+                "from notebook import notebookapp\n",
+                "\n",
+                "def get_notebook_name():\n",
+                "    \"\"\"Return the full path of the jupyter notebook.   Some runtimes (e.g. ADS) \n",
+                "    have the kernel_id in the filename of the connection file.  If so, the \n",
+                "    notebook name at runtime can be determined using `list_running_servers`.\n",
+                "    Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n",
+                "    the connection file, therefore we are unable to establish the filename\n",
+                "    \"\"\"\n",
+                "    connection_file = os.path.basename(ipykernel.get_connection_file())\n",
+                "    \n",
+                "    # If the runtime has the kernel_id in the connection filename, use it to\n",
+                "    # get the real notebook name at runtime, otherwise, use the notebook \n",
+                "    # filename from build time.\n",
+                "    try: \n",
+                "        kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n",
+                "    except:\n",
+                "        pass\n",
+                "    else:\n",
+                "        for servers in list(notebookapp.list_running_servers()):\n",
+                "            try:\n",
+                "                response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n",
+                "            except:\n",
+                "                pass\n",
+                "            else:\n",
+                "                for nn in json.loads(response.text):\n",
+                "                    if nn['kernel']['id'] == kernel_id:\n",
+                "                        return nn['path']\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def get_notebook_rules():\n",
+                "    \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n",
+                "    file_name = get_notebook_name()\n",
+                "\n",
+                "    if file_name == None:\n",
+                "        return None\n",
+                "    else:\n",
+                "        j = load_json(file_name)\n",
+                "\n",
+                "        if \"azdata\" not in j[\"metadata\"] or \\\n",
+                "            \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n",
+                "            \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "            return []\n",
+                "        else:\n",
+                "            return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n",
+                "\n",
+                "rules = get_notebook_rules()\n",
+                "\n",
+                "if rules == None:\n",
+                "    print(\"\")\n",
+                "    print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n",
+                "else:\n",
+                "    print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n",
+                "    print(rules)\n",
+                "    hints = 0\n",
+                "    if len(rules) > 0:\n",
+                "        for entry in entries_for_analysis:\n",
+                "            for rule in rules:\n",
+                "                if entry.find(rule[0]) != -1:\n",
+                "                    print (entry)\n",
+                "\n",
+                "                    display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n",
+                "                    hints = hints + 1\n",
+                "\n",
+                "    print(\"\")\n",
+                "    print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false
+        }
+    }
+}

+ 291 - 0
Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg034-get-livy-logs.ipynb

@@ -0,0 +1,291 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG034 - Livy logs\n",
+                "==================\n",
+                "\n",
+                "Description\n",
+                "-----------\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "import re\n",
+                "\n",
+                "tail_lines = 500\n",
+                "\n",
+                "pod = None # All\n",
+                "container = 'hadoop-livy-sparkhistory'\n",
+                "log_files = [ '/var/log/supervisor/log/livy*' ]\n",
+                "\n",
+                "expressions_to_analyze = [\n",
+                "    re.compile(\".{17} WARN \"),\n",
+                "    re.compile(\".{17} ERROR \")\n",
+                "]"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get tail for log"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Display the last 'tail_lines' of files in 'log_files' list\n",
+                "\n",
+                "pods = api.list_namespaced_pod(namespace)\n",
+                "\n",
+                "entries_for_analysis = []\n",
+                "\n",
+                "for p in pods.items:\n",
+                "    if pod is None or p.metadata.name == pod:\n",
+                "        for c in p.spec.containers:\n",
+                "            if container is None or c.name == container:\n",
+                "                for log_file in log_files:\n",
+                "                    print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n",
+                "                    try:\n",
+                "                        output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n",
+                "                    except Exception:\n",
+                "                        print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n",
+                "                    else:\n",
+                "                        for line in output.split('\\n'):\n",
+                "                            for expression in expressions_to_analyze:\n",
+                "                                if expression.match(line):\n",
+                "                                    entries_for_analysis.append(line)\n",
+                "                            print(line)\n",
+                "print(\"\")\n",
+                "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Analyze log entries and suggest relevant Troubleshooting Guides"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Analyze log entries and suggest further relevant troubleshooting guides\n",
+                "from IPython.display import Markdown\n",
+                "import os\n",
+                "import json\n",
+                "import requests\n",
+                "import ipykernel\n",
+                "import datetime\n",
+                "\n",
+                "from urllib.parse import urljoin\n",
+                "from notebook import notebookapp\n",
+                "\n",
+                "def get_notebook_name():\n",
+                "    \"\"\"Return the full path of the jupyter notebook.   Some runtimes (e.g. ADS) \n",
+                "    have the kernel_id in the filename of the connection file.  If so, the \n",
+                "    notebook name at runtime can be determined using `list_running_servers`.\n",
+                "    Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n",
+                "    the connection file, therefore we are unable to establish the filename\n",
+                "    \"\"\"\n",
+                "    connection_file = os.path.basename(ipykernel.get_connection_file())\n",
+                "    \n",
+                "    # If the runtime has the kernel_id in the connection filename, use it to\n",
+                "    # get the real notebook name at runtime, otherwise, use the notebook \n",
+                "    # filename from build time.\n",
+                "    try: \n",
+                "        kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n",
+                "    except:\n",
+                "        pass\n",
+                "    else:\n",
+                "        for servers in list(notebookapp.list_running_servers()):\n",
+                "            try:\n",
+                "                response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n",
+                "            except:\n",
+                "                pass\n",
+                "            else:\n",
+                "                for nn in json.loads(response.text):\n",
+                "                    if nn['kernel']['id'] == kernel_id:\n",
+                "                        return nn['path']\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def get_notebook_rules():\n",
+                "    \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n",
+                "    file_name = get_notebook_name()\n",
+                "\n",
+                "    if file_name == None:\n",
+                "        return None\n",
+                "    else:\n",
+                "        j = load_json(file_name)\n",
+                "\n",
+                "        if \"azdata\" not in j[\"metadata\"] or \\\n",
+                "            \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n",
+                "            \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "            return []\n",
+                "        else:\n",
+                "            return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n",
+                "\n",
+                "rules = get_notebook_rules()\n",
+                "\n",
+                "if rules == None:\n",
+                "    print(\"\")\n",
+                "    print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n",
+                "else:\n",
+                "    print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n",
+                "    print(rules)\n",
+                "    hints = 0\n",
+                "    if len(rules) > 0:\n",
+                "        for entry in entries_for_analysis:\n",
+                "            for rule in rules:\n",
+                "                if entry.find(rule[0]) != -1:\n",
+                "                    print (entry)\n",
+                "\n",
+                "                    display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n",
+                "                    hints = hints + 1\n",
+                "\n",
+                "    print(\"\")\n",
+                "    print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false
+        }
+    }
+}

+ 291 - 0
Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg035-get-sparkhistory-logs.ipynb

@@ -0,0 +1,291 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG035 - Spark History logs\n",
+                "===========================\n",
+                "\n",
+                "Description\n",
+                "-----------\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "import re\n",
+                "\n",
+                "tail_lines = 500\n",
+                "\n",
+                "pod = None # All\n",
+                "container='hadoop-livy-sparkhistory'\n",
+                "log_files = [ \"/var/log/supervisor/log/sparkhistory*\" ]\n",
+                "\n",
+                "expressions_to_analyze = [\n",
+                "    re.compile(\".{23} WARN \"),\n",
+                "    re.compile(\".{23} ERROR \")\n",
+                "]"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get tail for log"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Display the last 'tail_lines' of files in 'log_files' list\n",
+                "\n",
+                "pods = api.list_namespaced_pod(namespace)\n",
+                "\n",
+                "entries_for_analysis = []\n",
+                "\n",
+                "for p in pods.items:\n",
+                "    if pod is None or p.metadata.name == pod:\n",
+                "        for c in p.spec.containers:\n",
+                "            if container is None or c.name == container:\n",
+                "                for log_file in log_files:\n",
+                "                    print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n",
+                "                    try:\n",
+                "                        output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n",
+                "                    except Exception:\n",
+                "                        print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n",
+                "                    else:\n",
+                "                        for line in output.split('\\n'):\n",
+                "                            for expression in expressions_to_analyze:\n",
+                "                                if expression.match(line):\n",
+                "                                    entries_for_analysis.append(line)\n",
+                "                            print(line)\n",
+                "print(\"\")\n",
+                "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Analyze log entries and suggest relevant Troubleshooting Guides"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Analyze log entries and suggest further relevant troubleshooting guides\n",
+                "from IPython.display import Markdown\n",
+                "import os\n",
+                "import json\n",
+                "import requests\n",
+                "import ipykernel\n",
+                "import datetime\n",
+                "\n",
+                "from urllib.parse import urljoin\n",
+                "from notebook import notebookapp\n",
+                "\n",
+                "def get_notebook_name():\n",
+                "    \"\"\"Return the full path of the jupyter notebook.   Some runtimes (e.g. ADS) \n",
+                "    have the kernel_id in the filename of the connection file.  If so, the \n",
+                "    notebook name at runtime can be determined using `list_running_servers`.\n",
+                "    Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n",
+                "    the connection file, therefore we are unable to establish the filename\n",
+                "    \"\"\"\n",
+                "    connection_file = os.path.basename(ipykernel.get_connection_file())\n",
+                "    \n",
+                "    # If the runtime has the kernel_id in the connection filename, use it to\n",
+                "    # get the real notebook name at runtime, otherwise, use the notebook \n",
+                "    # filename from build time.\n",
+                "    try: \n",
+                "        kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n",
+                "    except:\n",
+                "        pass\n",
+                "    else:\n",
+                "        for servers in list(notebookapp.list_running_servers()):\n",
+                "            try:\n",
+                "                response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n",
+                "            except:\n",
+                "                pass\n",
+                "            else:\n",
+                "                for nn in json.loads(response.text):\n",
+                "                    if nn['kernel']['id'] == kernel_id:\n",
+                "                        return nn['path']\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def get_notebook_rules():\n",
+                "    \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n",
+                "    file_name = get_notebook_name()\n",
+                "\n",
+                "    if file_name == None:\n",
+                "        return None\n",
+                "    else:\n",
+                "        j = load_json(file_name)\n",
+                "\n",
+                "        if \"azdata\" not in j[\"metadata\"] or \\\n",
+                "            \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n",
+                "            \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "            return []\n",
+                "        else:\n",
+                "            return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n",
+                "\n",
+                "rules = get_notebook_rules()\n",
+                "\n",
+                "if rules == None:\n",
+                "    print(\"\")\n",
+                "    print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n",
+                "else:\n",
+                "    print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n",
+                "    print(rules)\n",
+                "    hints = 0\n",
+                "    if len(rules) > 0:\n",
+                "        for entry in entries_for_analysis:\n",
+                "            for rule in rules:\n",
+                "                if entry.find(rule[0]) != -1:\n",
+                "                    print (entry)\n",
+                "\n",
+                "                    display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n",
+                "                    hints = hints + 1\n",
+                "\n",
+                "    print(\"\")\n",
+                "    print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false
+        }
+    }
+}

+ 327 - 0
Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg036-get-controller-logs.ipynb

@@ -0,0 +1,327 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG036 - Controller logs\n",
+                "========================\n",
+                "\n",
+                "Get the last \u2018n\u2019 hours of controller logs.\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "import re\n",
+                "from datetime import datetime\n",
+                "\n",
+                "tail_lines = 500\n",
+                "\n",
+                "# The controller log files are kept in a yyyy-mm-dd folder structure\n",
+                "#\n",
+                "d = datetime.utcnow()\n",
+                "date = \"{0}-{1:02d}-{2:02d}\".format(d.year, d.month, d.day)\n",
+                "folder = f\"/var/log/controller/{date}\"\n",
+                "\n",
+                "pod = None # All\n",
+                "container = 'controller'\n",
+                "log_files = [ f'{folder}/controller.log', f'{folder}/kube.log', f'{folder}/controller.out', f'{folder}/access.log' ]\n",
+                "\n",
+                "expressions_to_analyze = [\n",
+                "    re.compile(\".{26} WARN \"),\n",
+                "    re.compile(\".{26} ERROR \")\n",
+                "]\n",
+                "\n",
+                "print(\"Log files to get:\")\n",
+                "print(log_files)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get tail for log"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Display the last 'tail_lines' of files in 'log_files' list\n",
+                "\n",
+                "pods = api.list_namespaced_pod(namespace)\n",
+                "\n",
+                "entries_for_analysis = []\n",
+                "\n",
+                "for p in pods.items:\n",
+                "    if pod is None or p.metadata.name == pod:\n",
+                "        for c in p.spec.containers:\n",
+                "            if container is None or c.name == container:\n",
+                "                for log_file in log_files:\n",
+                "                    print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n",
+                "                    try:\n",
+                "                        output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n",
+                "                    except Exception:\n",
+                "                        print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n",
+                "                    else:\n",
+                "                        for line in output.split('\\n'):\n",
+                "                            for expression in expressions_to_analyze:\n",
+                "                                if expression.match(line):\n",
+                "                                    entries_for_analysis.append(line)\n",
+                "                            print(line)\n",
+                "print(\"\")\n",
+                "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Analyze log entries and suggest relevant Troubleshooting Guides"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Analyze log entries and suggest further relevant troubleshooting guides\n",
+                "from IPython.display import Markdown\n",
+                "import os\n",
+                "import json\n",
+                "import requests\n",
+                "import ipykernel\n",
+                "import datetime\n",
+                "\n",
+                "from urllib.parse import urljoin\n",
+                "from notebook import notebookapp\n",
+                "\n",
+                "def get_notebook_name():\n",
+                "    \"\"\"Return the full path of the jupyter notebook.   Some runtimes (e.g. ADS) \n",
+                "    have the kernel_id in the filename of the connection file.  If so, the \n",
+                "    notebook name at runtime can be determined using `list_running_servers`.\n",
+                "    Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n",
+                "    the connection file, therefore we are unable to establish the filename\n",
+                "    \"\"\"\n",
+                "    connection_file = os.path.basename(ipykernel.get_connection_file())\n",
+                "    \n",
+                "    # If the runtime has the kernel_id in the connection filename, use it to\n",
+                "    # get the real notebook name at runtime, otherwise, use the notebook \n",
+                "    # filename from build time.\n",
+                "    try: \n",
+                "        kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n",
+                "    except:\n",
+                "        pass\n",
+                "    else:\n",
+                "        for servers in list(notebookapp.list_running_servers()):\n",
+                "            try:\n",
+                "                response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n",
+                "            except:\n",
+                "                pass\n",
+                "            else:\n",
+                "                for nn in json.loads(response.text):\n",
+                "                    if nn['kernel']['id'] == kernel_id:\n",
+                "                        return nn['path']\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def get_notebook_rules():\n",
+                "    \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n",
+                "    file_name = get_notebook_name()\n",
+                "\n",
+                "    if file_name == None:\n",
+                "        return None\n",
+                "    else:\n",
+                "        j = load_json(file_name)\n",
+                "\n",
+                "        if \"azdata\" not in j[\"metadata\"] or \\\n",
+                "            \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n",
+                "            \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "            return []\n",
+                "        else:\n",
+                "            return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n",
+                "\n",
+                "rules = get_notebook_rules()\n",
+                "\n",
+                "if rules == None:\n",
+                "    print(\"\")\n",
+                "    print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n",
+                "else:\n",
+                "    print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n",
+                "    print(rules)\n",
+                "    hints = 0\n",
+                "    if len(rules) > 0:\n",
+                "        for entry in entries_for_analysis:\n",
+                "            for rule in rules:\n",
+                "                if entry.find(rule[0]) != -1:\n",
+                "                    print (entry)\n",
+                "\n",
+                "                    display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n",
+                "                    hints = hints + 1\n",
+                "\n",
+                "    print(\"\")\n",
+                "    print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "Related\n",
+                "-------\n",
+                "\n",
+                "-   [TSG027 - Observe cluster\n",
+                "    deployment](../diagnose/tsg027-observe-bdc-create.ipynb)"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false,
+            "expert": {
+                "log_analyzer_rules": [
+                    [
+                        "doc is missing key: /data",
+                        "TSG038",
+                        "TSG038 - BDC create failures due to - doc is missing key",
+                        "../repair/tsg038-doc-is-missing-key-error.ipynb"
+                    ],
+                    [
+                        "Failed when starting controller service. System.TimeoutException:\nOperation timed out after 10 minutes",
+                        "TSG057",
+                        "TSG057 - Failed when starting controller service. System.TimeoutException",
+                        "../repair/tsg057-failed-when-starting-controller.ipynb"
+                    ]
+                ]
+            }
+        }
+    }
+}

+ 328 - 0
Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg046-get-knox-logs.ipynb

@@ -0,0 +1,328 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG046 - Knox gateway logs\n",
+                "==========================\n",
+                "\n",
+                "Description\n",
+                "-----------\n",
+                "\n",
+                "Knox gives a 500 error to the client, and removes details (the stack)\n",
+                "pointing to the cause of the underlying issue. Therefore use this TSG to\n",
+                "get the Knox logs from the cluster.\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "import re\n",
+                "\n",
+                "tail_lines = 500\n",
+                "\n",
+                "pod = None # All\n",
+                "container='knox'\n",
+                "log_files = [ \"/var/log/knox/gateway.log\" ]\n",
+                "\n",
+                "expressions_to_analyze = [\n",
+                "    re.compile(\".{23} WARN \"),\n",
+                "    re.compile(\".{23} ERROR \")\n",
+                "]"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get tail for log"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Display the last 'tail_lines' of files in 'log_files' list\n",
+                "\n",
+                "pods = api.list_namespaced_pod(namespace)\n",
+                "\n",
+                "entries_for_analysis = []\n",
+                "\n",
+                "for p in pods.items:\n",
+                "    if pod is None or p.metadata.name == pod:\n",
+                "        for c in p.spec.containers:\n",
+                "            if container is None or c.name == container:\n",
+                "                for log_file in log_files:\n",
+                "                    print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n",
+                "                    try:\n",
+                "                        output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n",
+                "                    except Exception:\n",
+                "                        print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n",
+                "                    else:\n",
+                "                        for line in output.split('\\n'):\n",
+                "                            for expression in expressions_to_analyze:\n",
+                "                                if expression.match(line):\n",
+                "                                    entries_for_analysis.append(line)\n",
+                "                            print(line)\n",
+                "print(\"\")\n",
+                "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Analyze log entries and suggest relevant Troubleshooting Guides"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Analyze log entries and suggest further relevant troubleshooting guides\n",
+                "from IPython.display import Markdown\n",
+                "import os\n",
+                "import json\n",
+                "import requests\n",
+                "import ipykernel\n",
+                "import datetime\n",
+                "\n",
+                "from urllib.parse import urljoin\n",
+                "from notebook import notebookapp\n",
+                "\n",
+                "def get_notebook_name():\n",
+                "    \"\"\"Return the full path of the jupyter notebook.   Some runtimes (e.g. ADS) \n",
+                "    have the kernel_id in the filename of the connection file.  If so, the \n",
+                "    notebook name at runtime can be determined using `list_running_servers`.\n",
+                "    Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n",
+                "    the connection file, therefore we are unable to establish the filename\n",
+                "    \"\"\"\n",
+                "    connection_file = os.path.basename(ipykernel.get_connection_file())\n",
+                "    \n",
+                "    # If the runtime has the kernel_id in the connection filename, use it to\n",
+                "    # get the real notebook name at runtime, otherwise, use the notebook \n",
+                "    # filename from build time.\n",
+                "    try: \n",
+                "        kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n",
+                "    except:\n",
+                "        pass\n",
+                "    else:\n",
+                "        for servers in list(notebookapp.list_running_servers()):\n",
+                "            try:\n",
+                "                response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n",
+                "            except:\n",
+                "                pass\n",
+                "            else:\n",
+                "                for nn in json.loads(response.text):\n",
+                "                    if nn['kernel']['id'] == kernel_id:\n",
+                "                        return nn['path']\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def get_notebook_rules():\n",
+                "    \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n",
+                "    file_name = get_notebook_name()\n",
+                "\n",
+                "    if file_name == None:\n",
+                "        return None\n",
+                "    else:\n",
+                "        j = load_json(file_name)\n",
+                "\n",
+                "        if \"azdata\" not in j[\"metadata\"] or \\\n",
+                "            \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n",
+                "            \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "            return []\n",
+                "        else:\n",
+                "            return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n",
+                "\n",
+                "rules = get_notebook_rules()\n",
+                "\n",
+                "if rules == None:\n",
+                "    print(\"\")\n",
+                "    print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n",
+                "else:\n",
+                "    print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n",
+                "    print(rules)\n",
+                "    hints = 0\n",
+                "    if len(rules) > 0:\n",
+                "        for entry in entries_for_analysis:\n",
+                "            for rule in rules:\n",
+                "                if entry.find(rule[0]) != -1:\n",
+                "                    print (entry)\n",
+                "\n",
+                "                    display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n",
+                "                    hints = hints + 1\n",
+                "\n",
+                "    print(\"\")\n",
+                "    print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false,
+            "expert": {
+                "rules": [
+                    [
+                        "SAM008",
+                        "code",
+                        "stream",
+                        "name",
+                        "stdout",
+                        "text",
+                        ".\\*ERROR: 500"
+                    ]
+                ],
+                "log_analyzer_rules": [
+                    [
+                        "Invalid object name \u2018roles\\_permissions\u2019",
+                        "TSG039",
+                        "TSG039 - Invalid object name 'role_permissions'",
+                        "../repair/tsg039-invalid-object-name-role-permissions.ipynb"
+                    ],
+                    [
+                        "Name node is in safe mode",
+                        "TSG024",
+                        "TSG024 - Namenode is in safe mode",
+                        "../repair/tsg024-name-node-is-in-safe-mode.ipynb"
+                    ],
+                    [
+                        "Connection exception dispatching request",
+                        "TSG034",
+                        "TSG034 - Livy logs",
+                        "../log-analyzers/tsg034-get-livy-logs.ipynb"
+                    ]
+                ]
+            }
+        }
+    }
+}

+ 285 - 0
Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg073-get-influxdb-logs.ipynb

@@ -0,0 +1,285 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG073 - InfluxDB logs\n",
+                "======================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "import re\n",
+                "\n",
+                "tail_lines = 500\n",
+                "\n",
+                "pod = None # All\n",
+                "container = \"influxdb\"\n",
+                "log_files = [ \"/var/log/supervisor/log/influxdb*.log\" ]\n",
+                "\n",
+                "expressions_to_analyze = []"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get tail for log"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Display the last 'tail_lines' of files in 'log_files' list\n",
+                "\n",
+                "pods = api.list_namespaced_pod(namespace)\n",
+                "\n",
+                "entries_for_analysis = []\n",
+                "\n",
+                "for p in pods.items:\n",
+                "    if pod is None or p.metadata.name == pod:\n",
+                "        for c in p.spec.containers:\n",
+                "            if container is None or c.name == container:\n",
+                "                for log_file in log_files:\n",
+                "                    print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n",
+                "                    try:\n",
+                "                        output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n",
+                "                    except Exception:\n",
+                "                        print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n",
+                "                    else:\n",
+                "                        for line in output.split('\\n'):\n",
+                "                            for expression in expressions_to_analyze:\n",
+                "                                if expression.match(line):\n",
+                "                                    entries_for_analysis.append(line)\n",
+                "                            print(line)\n",
+                "print(\"\")\n",
+                "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Analyze log entries and suggest relevant Troubleshooting Guides"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Analyze log entries and suggest further relevant troubleshooting guides\n",
+                "from IPython.display import Markdown\n",
+                "import os\n",
+                "import json\n",
+                "import requests\n",
+                "import ipykernel\n",
+                "import datetime\n",
+                "\n",
+                "from urllib.parse import urljoin\n",
+                "from notebook import notebookapp\n",
+                "\n",
+                "def get_notebook_name():\n",
+                "    \"\"\"Return the full path of the jupyter notebook.   Some runtimes (e.g. ADS) \n",
+                "    have the kernel_id in the filename of the connection file.  If so, the \n",
+                "    notebook name at runtime can be determined using `list_running_servers`.\n",
+                "    Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n",
+                "    the connection file, therefore we are unable to establish the filename\n",
+                "    \"\"\"\n",
+                "    connection_file = os.path.basename(ipykernel.get_connection_file())\n",
+                "    \n",
+                "    # If the runtime has the kernel_id in the connection filename, use it to\n",
+                "    # get the real notebook name at runtime, otherwise, use the notebook \n",
+                "    # filename from build time.\n",
+                "    try: \n",
+                "        kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n",
+                "    except:\n",
+                "        pass\n",
+                "    else:\n",
+                "        for servers in list(notebookapp.list_running_servers()):\n",
+                "            try:\n",
+                "                response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n",
+                "            except:\n",
+                "                pass\n",
+                "            else:\n",
+                "                for nn in json.loads(response.text):\n",
+                "                    if nn['kernel']['id'] == kernel_id:\n",
+                "                        return nn['path']\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def get_notebook_rules():\n",
+                "    \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n",
+                "    file_name = get_notebook_name()\n",
+                "\n",
+                "    if file_name == None:\n",
+                "        return None\n",
+                "    else:\n",
+                "        j = load_json(file_name)\n",
+                "\n",
+                "        if \"azdata\" not in j[\"metadata\"] or \\\n",
+                "            \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n",
+                "            \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "            return []\n",
+                "        else:\n",
+                "            return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n",
+                "\n",
+                "rules = get_notebook_rules()\n",
+                "\n",
+                "if rules == None:\n",
+                "    print(\"\")\n",
+                "    print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n",
+                "else:\n",
+                "    print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n",
+                "    print(rules)\n",
+                "    hints = 0\n",
+                "    if len(rules) > 0:\n",
+                "        for entry in entries_for_analysis:\n",
+                "            for rule in rules:\n",
+                "                if entry.find(rule[0]) != -1:\n",
+                "                    print (entry)\n",
+                "\n",
+                "                    display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n",
+                "                    hints = hints + 1\n",
+                "\n",
+                "    print(\"\")\n",
+                "    print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false
+        }
+    }
+}

+ 288 - 0
Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg076-get-elastic-search-logs.ipynb

@@ -0,0 +1,288 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG076 - Elastic Search logs\n",
+                "============================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "import re\n",
+                "\n",
+                "tail_lines = 500\n",
+                "\n",
+                "pod = None # All\n",
+                "container = \"elasticsearch\"\n",
+                "log_files = [ \"/var/log/supervisor/log/elasticsearch*.log\" ]\n",
+                "\n",
+                "expressions_to_analyze = [\n",
+                "    re.compile(\".{26}[WARN ]\"),\n",
+                "    re.compile(\".{26}[ERROR]\")\n",
+                "]"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get tail for log"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Display the last 'tail_lines' of files in 'log_files' list\n",
+                "\n",
+                "pods = api.list_namespaced_pod(namespace)\n",
+                "\n",
+                "entries_for_analysis = []\n",
+                "\n",
+                "for p in pods.items:\n",
+                "    if pod is None or p.metadata.name == pod:\n",
+                "        for c in p.spec.containers:\n",
+                "            if container is None or c.name == container:\n",
+                "                for log_file in log_files:\n",
+                "                    print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n",
+                "                    try:\n",
+                "                        output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n",
+                "                    except Exception:\n",
+                "                        print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n",
+                "                    else:\n",
+                "                        for line in output.split('\\n'):\n",
+                "                            for expression in expressions_to_analyze:\n",
+                "                                if expression.match(line):\n",
+                "                                    entries_for_analysis.append(line)\n",
+                "                            print(line)\n",
+                "print(\"\")\n",
+                "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Analyze log entries and suggest relevant Troubleshooting Guides"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Analyze log entries and suggest further relevant troubleshooting guides\n",
+                "from IPython.display import Markdown\n",
+                "import os\n",
+                "import json\n",
+                "import requests\n",
+                "import ipykernel\n",
+                "import datetime\n",
+                "\n",
+                "from urllib.parse import urljoin\n",
+                "from notebook import notebookapp\n",
+                "\n",
+                "def get_notebook_name():\n",
+                "    \"\"\"Return the full path of the jupyter notebook.   Some runtimes (e.g. ADS) \n",
+                "    have the kernel_id in the filename of the connection file.  If so, the \n",
+                "    notebook name at runtime can be determined using `list_running_servers`.\n",
+                "    Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n",
+                "    the connection file, therefore we are unable to establish the filename\n",
+                "    \"\"\"\n",
+                "    connection_file = os.path.basename(ipykernel.get_connection_file())\n",
+                "    \n",
+                "    # If the runtime has the kernel_id in the connection filename, use it to\n",
+                "    # get the real notebook name at runtime, otherwise, use the notebook \n",
+                "    # filename from build time.\n",
+                "    try: \n",
+                "        kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n",
+                "    except:\n",
+                "        pass\n",
+                "    else:\n",
+                "        for servers in list(notebookapp.list_running_servers()):\n",
+                "            try:\n",
+                "                response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n",
+                "            except:\n",
+                "                pass\n",
+                "            else:\n",
+                "                for nn in json.loads(response.text):\n",
+                "                    if nn['kernel']['id'] == kernel_id:\n",
+                "                        return nn['path']\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def get_notebook_rules():\n",
+                "    \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n",
+                "    file_name = get_notebook_name()\n",
+                "\n",
+                "    if file_name == None:\n",
+                "        return None\n",
+                "    else:\n",
+                "        j = load_json(file_name)\n",
+                "\n",
+                "        if \"azdata\" not in j[\"metadata\"] or \\\n",
+                "            \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n",
+                "            \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "            return []\n",
+                "        else:\n",
+                "            return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n",
+                "\n",
+                "rules = get_notebook_rules()\n",
+                "\n",
+                "if rules == None:\n",
+                "    print(\"\")\n",
+                "    print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n",
+                "else:\n",
+                "    print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n",
+                "    print(rules)\n",
+                "    hints = 0\n",
+                "    if len(rules) > 0:\n",
+                "        for entry in entries_for_analysis:\n",
+                "            for rule in rules:\n",
+                "                if entry.find(rule[0]) != -1:\n",
+                "                    print (entry)\n",
+                "\n",
+                "                    display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n",
+                "                    hints = hints + 1\n",
+                "\n",
+                "    print(\"\")\n",
+                "    print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false
+        }
+    }
+}

+ 285 - 0
Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg077-get-kibana-logs.ipynb

@@ -0,0 +1,285 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG077 - Kibana logs\n",
+                "====================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "import re\n",
+                "\n",
+                "tail_lines = 500\n",
+                "\n",
+                "pod = None # All\n",
+                "container = \"kibana\"\n",
+                "log_files = [ \"/var/log/supervisor/log/kibana*.log\" ]\n",
+                "\n",
+                "expressions_to_analyze = [ ]"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get tail for log"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Display the last 'tail_lines' of files in 'log_files' list\n",
+                "\n",
+                "pods = api.list_namespaced_pod(namespace)\n",
+                "\n",
+                "entries_for_analysis = []\n",
+                "\n",
+                "for p in pods.items:\n",
+                "    if pod is None or p.metadata.name == pod:\n",
+                "        for c in p.spec.containers:\n",
+                "            if container is None or c.name == container:\n",
+                "                for log_file in log_files:\n",
+                "                    print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n",
+                "                    try:\n",
+                "                        output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n",
+                "                    except Exception:\n",
+                "                        print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n",
+                "                    else:\n",
+                "                        for line in output.split('\\n'):\n",
+                "                            for expression in expressions_to_analyze:\n",
+                "                                if expression.match(line):\n",
+                "                                    entries_for_analysis.append(line)\n",
+                "                            print(line)\n",
+                "print(\"\")\n",
+                "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Analyze log entries and suggest relevant Troubleshooting Guides"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Analyze log entries and suggest further relevant troubleshooting guides\n",
+                "from IPython.display import Markdown\n",
+                "import os\n",
+                "import json\n",
+                "import requests\n",
+                "import ipykernel\n",
+                "import datetime\n",
+                "\n",
+                "from urllib.parse import urljoin\n",
+                "from notebook import notebookapp\n",
+                "\n",
+                "def get_notebook_name():\n",
+                "    \"\"\"Return the full path of the jupyter notebook.   Some runtimes (e.g. ADS) \n",
+                "    have the kernel_id in the filename of the connection file.  If so, the \n",
+                "    notebook name at runtime can be determined using `list_running_servers`.\n",
+                "    Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n",
+                "    the connection file, therefore we are unable to establish the filename\n",
+                "    \"\"\"\n",
+                "    connection_file = os.path.basename(ipykernel.get_connection_file())\n",
+                "    \n",
+                "    # If the runtime has the kernel_id in the connection filename, use it to\n",
+                "    # get the real notebook name at runtime, otherwise, use the notebook \n",
+                "    # filename from build time.\n",
+                "    try: \n",
+                "        kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n",
+                "    except:\n",
+                "        pass\n",
+                "    else:\n",
+                "        for servers in list(notebookapp.list_running_servers()):\n",
+                "            try:\n",
+                "                response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n",
+                "            except:\n",
+                "                pass\n",
+                "            else:\n",
+                "                for nn in json.loads(response.text):\n",
+                "                    if nn['kernel']['id'] == kernel_id:\n",
+                "                        return nn['path']\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def get_notebook_rules():\n",
+                "    \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n",
+                "    file_name = get_notebook_name()\n",
+                "\n",
+                "    if file_name == None:\n",
+                "        return None\n",
+                "    else:\n",
+                "        j = load_json(file_name)\n",
+                "\n",
+                "        if \"azdata\" not in j[\"metadata\"] or \\\n",
+                "            \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n",
+                "            \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "            return []\n",
+                "        else:\n",
+                "            return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n",
+                "\n",
+                "rules = get_notebook_rules()\n",
+                "\n",
+                "if rules == None:\n",
+                "    print(\"\")\n",
+                "    print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n",
+                "else:\n",
+                "    print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n",
+                "    print(rules)\n",
+                "    hints = 0\n",
+                "    if len(rules) > 0:\n",
+                "        for entry in entries_for_analysis:\n",
+                "            for rule in rules:\n",
+                "                if entry.find(rule[0]) != -1:\n",
+                "                    print (entry)\n",
+                "\n",
+                "                    display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n",
+                "                    hints = hints + 1\n",
+                "\n",
+                "    print(\"\")\n",
+                "    print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false
+        }
+    }
+}

+ 290 - 0
Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg088-get-datanode-logs.ipynb

@@ -0,0 +1,290 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG088 - Hadoop datanode logs\n",
+                "=============================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "import re\n",
+                "\n",
+                "tail_lines = 500\n",
+                "\n",
+                "pod = None # All\n",
+                "container = \"hadoop\"\n",
+                "log_files = [ \"/var/log/supervisor/log/datanode*.log\" ]\n",
+                "\n",
+                "expressions_to_analyze = [\n",
+                "    re.compile(\".{23} WARN \"),\n",
+                "    re.compile(\".{23} ERROR \")\n",
+                "]"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the Hadoop datanode logs from the hadoop container\n",
+                "\n",
+                "### Get tail for log"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Display the last 'tail_lines' of files in 'log_files' list\n",
+                "\n",
+                "pods = api.list_namespaced_pod(namespace)\n",
+                "\n",
+                "entries_for_analysis = []\n",
+                "\n",
+                "for p in pods.items:\n",
+                "    if pod is None or p.metadata.name == pod:\n",
+                "        for c in p.spec.containers:\n",
+                "            if container is None or c.name == container:\n",
+                "                for log_file in log_files:\n",
+                "                    print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n",
+                "                    try:\n",
+                "                        output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n",
+                "                    except Exception:\n",
+                "                        print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n",
+                "                    else:\n",
+                "                        for line in output.split('\\n'):\n",
+                "                            for expression in expressions_to_analyze:\n",
+                "                                if expression.match(line):\n",
+                "                                    entries_for_analysis.append(line)\n",
+                "                            print(line)\n",
+                "print(\"\")\n",
+                "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Analyze log entries and suggest relevant Troubleshooting Guides"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Analyze log entries and suggest further relevant troubleshooting guides\n",
+                "from IPython.display import Markdown\n",
+                "import os\n",
+                "import json\n",
+                "import requests\n",
+                "import ipykernel\n",
+                "import datetime\n",
+                "\n",
+                "from urllib.parse import urljoin\n",
+                "from notebook import notebookapp\n",
+                "\n",
+                "def get_notebook_name():\n",
+                "    \"\"\"Return the full path of the jupyter notebook.   Some runtimes (e.g. ADS) \n",
+                "    have the kernel_id in the filename of the connection file.  If so, the \n",
+                "    notebook name at runtime can be determined using `list_running_servers`.\n",
+                "    Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n",
+                "    the connection file, therefore we are unable to establish the filename\n",
+                "    \"\"\"\n",
+                "    connection_file = os.path.basename(ipykernel.get_connection_file())\n",
+                "    \n",
+                "    # If the runtime has the kernel_id in the connection filename, use it to\n",
+                "    # get the real notebook name at runtime, otherwise, use the notebook \n",
+                "    # filename from build time.\n",
+                "    try: \n",
+                "        kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n",
+                "    except:\n",
+                "        pass\n",
+                "    else:\n",
+                "        for servers in list(notebookapp.list_running_servers()):\n",
+                "            try:\n",
+                "                response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n",
+                "            except:\n",
+                "                pass\n",
+                "            else:\n",
+                "                for nn in json.loads(response.text):\n",
+                "                    if nn['kernel']['id'] == kernel_id:\n",
+                "                        return nn['path']\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def get_notebook_rules():\n",
+                "    \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n",
+                "    file_name = get_notebook_name()\n",
+                "\n",
+                "    if file_name == None:\n",
+                "        return None\n",
+                "    else:\n",
+                "        j = load_json(file_name)\n",
+                "\n",
+                "        if \"azdata\" not in j[\"metadata\"] or \\\n",
+                "            \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n",
+                "            \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "            return []\n",
+                "        else:\n",
+                "            return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n",
+                "\n",
+                "rules = get_notebook_rules()\n",
+                "\n",
+                "if rules == None:\n",
+                "    print(\"\")\n",
+                "    print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n",
+                "else:\n",
+                "    print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n",
+                "    print(rules)\n",
+                "    hints = 0\n",
+                "    if len(rules) > 0:\n",
+                "        for entry in entries_for_analysis:\n",
+                "            for rule in rules:\n",
+                "                if entry.find(rule[0]) != -1:\n",
+                "                    print (entry)\n",
+                "\n",
+                "                    display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n",
+                "                    hints = hints + 1\n",
+                "\n",
+                "    print(\"\")\n",
+                "    print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false
+        }
+    }
+}

+ 288 - 0
Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg090-get-nodemanager-logs.ipynb

@@ -0,0 +1,288 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG090 - Yarn nodemanager logs\n",
+                "==============================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "import re\n",
+                "\n",
+                "tail_lines = 500\n",
+                "\n",
+                "pod = None # All\n",
+                "container = \"hadoop\"\n",
+                "log_files = [ \"/var/log/supervisor/log/nodemanager*.log\" ]\n",
+                "\n",
+                "expressions_to_analyze = [\n",
+                "    re.compile(\".{23} WARN \"),\n",
+                "    re.compile(\".{23} ERROR \")\n",
+                "]"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get tail for log"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Display the last 'tail_lines' of files in 'log_files' list\n",
+                "\n",
+                "pods = api.list_namespaced_pod(namespace)\n",
+                "\n",
+                "entries_for_analysis = []\n",
+                "\n",
+                "for p in pods.items:\n",
+                "    if pod is None or p.metadata.name == pod:\n",
+                "        for c in p.spec.containers:\n",
+                "            if container is None or c.name == container:\n",
+                "                for log_file in log_files:\n",
+                "                    print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n",
+                "                    try:\n",
+                "                        output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n",
+                "                    except Exception:\n",
+                "                        print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n",
+                "                    else:\n",
+                "                        for line in output.split('\\n'):\n",
+                "                            for expression in expressions_to_analyze:\n",
+                "                                if expression.match(line):\n",
+                "                                    entries_for_analysis.append(line)\n",
+                "                            print(line)\n",
+                "print(\"\")\n",
+                "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Analyze log entries and suggest relevant Troubleshooting Guides"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Analyze log entries and suggest further relevant troubleshooting guides\n",
+                "from IPython.display import Markdown\n",
+                "import os\n",
+                "import json\n",
+                "import requests\n",
+                "import ipykernel\n",
+                "import datetime\n",
+                "\n",
+                "from urllib.parse import urljoin\n",
+                "from notebook import notebookapp\n",
+                "\n",
+                "def get_notebook_name():\n",
+                "    \"\"\"Return the full path of the jupyter notebook.   Some runtimes (e.g. ADS) \n",
+                "    have the kernel_id in the filename of the connection file.  If so, the \n",
+                "    notebook name at runtime can be determined using `list_running_servers`.\n",
+                "    Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n",
+                "    the connection file, therefore we are unable to establish the filename\n",
+                "    \"\"\"\n",
+                "    connection_file = os.path.basename(ipykernel.get_connection_file())\n",
+                "    \n",
+                "    # If the runtime has the kernel_id in the connection filename, use it to\n",
+                "    # get the real notebook name at runtime, otherwise, use the notebook \n",
+                "    # filename from build time.\n",
+                "    try: \n",
+                "        kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n",
+                "    except:\n",
+                "        pass\n",
+                "    else:\n",
+                "        for servers in list(notebookapp.list_running_servers()):\n",
+                "            try:\n",
+                "                response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n",
+                "            except:\n",
+                "                pass\n",
+                "            else:\n",
+                "                for nn in json.loads(response.text):\n",
+                "                    if nn['kernel']['id'] == kernel_id:\n",
+                "                        return nn['path']\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def get_notebook_rules():\n",
+                "    \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n",
+                "    file_name = get_notebook_name()\n",
+                "\n",
+                "    if file_name == None:\n",
+                "        return None\n",
+                "    else:\n",
+                "        j = load_json(file_name)\n",
+                "\n",
+                "        if \"azdata\" not in j[\"metadata\"] or \\\n",
+                "            \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n",
+                "            \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "            return []\n",
+                "        else:\n",
+                "            return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n",
+                "\n",
+                "rules = get_notebook_rules()\n",
+                "\n",
+                "if rules == None:\n",
+                "    print(\"\")\n",
+                "    print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n",
+                "else:\n",
+                "    print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n",
+                "    print(rules)\n",
+                "    hints = 0\n",
+                "    if len(rules) > 0:\n",
+                "        for entry in entries_for_analysis:\n",
+                "            for rule in rules:\n",
+                "                if entry.find(rule[0]) != -1:\n",
+                "                    print (entry)\n",
+                "\n",
+                "                    display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n",
+                "                    hints = hints + 1\n",
+                "\n",
+                "    print(\"\")\n",
+                "    print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false
+        }
+    }
+}

+ 288 - 0
Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg092-get-all-supervisord-log-tails.ipynb

@@ -0,0 +1,288 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG092 - Supervisord log tail for all containers in BDC\n",
+                "=======================================================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "import re\n",
+                "\n",
+                "tail_lines = 500\n",
+                "\n",
+                "pod = None # All\n",
+                "container = None # All containers\n",
+                "log_files = [ \"/var/log/supervisor/supervisord.log\" ]\n",
+                "\n",
+                "expressions_to_analyze = [\n",
+                "    re.compile(\".{23} WARN \"),\n",
+                "    re.compile(\".{23} ERROR \")\n",
+                "]"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get tail for log"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Display the last 'tail_lines' of files in 'log_files' list\n",
+                "\n",
+                "pods = api.list_namespaced_pod(namespace)\n",
+                "\n",
+                "entries_for_analysis = []\n",
+                "\n",
+                "for p in pods.items:\n",
+                "    if pod is None or p.metadata.name == pod:\n",
+                "        for c in p.spec.containers:\n",
+                "            if container is None or c.name == container:\n",
+                "                for log_file in log_files:\n",
+                "                    print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n",
+                "                    try:\n",
+                "                        output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n",
+                "                    except Exception:\n",
+                "                        print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n",
+                "                    else:\n",
+                "                        for line in output.split('\\n'):\n",
+                "                            for expression in expressions_to_analyze:\n",
+                "                                if expression.match(line):\n",
+                "                                    entries_for_analysis.append(line)\n",
+                "                            print(line)\n",
+                "print(\"\")\n",
+                "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Analyze log entries and suggest relevant Troubleshooting Guides"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Analyze log entries and suggest further relevant troubleshooting guides\n",
+                "from IPython.display import Markdown\n",
+                "import os\n",
+                "import json\n",
+                "import requests\n",
+                "import ipykernel\n",
+                "import datetime\n",
+                "\n",
+                "from urllib.parse import urljoin\n",
+                "from notebook import notebookapp\n",
+                "\n",
+                "def get_notebook_name():\n",
+                "    \"\"\"Return the full path of the jupyter notebook.   Some runtimes (e.g. ADS) \n",
+                "    have the kernel_id in the filename of the connection file.  If so, the \n",
+                "    notebook name at runtime can be determined using `list_running_servers`.\n",
+                "    Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n",
+                "    the connection file, therefore we are unable to establish the filename\n",
+                "    \"\"\"\n",
+                "    connection_file = os.path.basename(ipykernel.get_connection_file())\n",
+                "    \n",
+                "    # If the runtime has the kernel_id in the connection filename, use it to\n",
+                "    # get the real notebook name at runtime, otherwise, use the notebook \n",
+                "    # filename from build time.\n",
+                "    try: \n",
+                "        kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n",
+                "    except:\n",
+                "        pass\n",
+                "    else:\n",
+                "        for servers in list(notebookapp.list_running_servers()):\n",
+                "            try:\n",
+                "                response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n",
+                "            except:\n",
+                "                pass\n",
+                "            else:\n",
+                "                for nn in json.loads(response.text):\n",
+                "                    if nn['kernel']['id'] == kernel_id:\n",
+                "                        return nn['path']\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def get_notebook_rules():\n",
+                "    \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n",
+                "    file_name = get_notebook_name()\n",
+                "\n",
+                "    if file_name == None:\n",
+                "        return None\n",
+                "    else:\n",
+                "        j = load_json(file_name)\n",
+                "\n",
+                "        if \"azdata\" not in j[\"metadata\"] or \\\n",
+                "            \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n",
+                "            \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "            return []\n",
+                "        else:\n",
+                "            return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n",
+                "\n",
+                "rules = get_notebook_rules()\n",
+                "\n",
+                "if rules == None:\n",
+                "    print(\"\")\n",
+                "    print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n",
+                "else:\n",
+                "    print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n",
+                "    print(rules)\n",
+                "    hints = 0\n",
+                "    if len(rules) > 0:\n",
+                "        for entry in entries_for_analysis:\n",
+                "            for rule in rules:\n",
+                "                if entry.find(rule[0]) != -1:\n",
+                "                    print (entry)\n",
+                "\n",
+                "                    display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n",
+                "                    hints = hints + 1\n",
+                "\n",
+                "    print(\"\")\n",
+                "    print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false
+        }
+    }
+}

+ 262 - 0
Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg093-get-all-agent-log-tails.ipynb

@@ -0,0 +1,262 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG093 - Agent log tail for all containers in BDC\n",
+                "=================================================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "tail_lines = 100\n",
+                "line_offset = 27 # Skip the date/time at start of line\n",
+                "\n",
+                "cmd = f'tail -n {tail_lines} /var/log/agent/agent.log'\n",
+                "\n",
+                "coalesce_duplicates = True"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Analyze log in all pod containers\n",
+                "\n",
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "from IPython.display import Markdown\n",
+                "\n",
+                "import os\n",
+                "import json\n",
+                "import requests\n",
+                "import ipykernel\n",
+                "import datetime\n",
+                "\n",
+                "from urllib.parse import urljoin\n",
+                "from notebook import notebookapp\n",
+                "\n",
+                "def get_notebook_name():\n",
+                "    \"\"\"Return the full path of the jupyter notebook.   Some runtimes (e.g. ADS) \n",
+                "    have the kernel_id in the filename of the connection file.  If so, the \n",
+                "    notebook name at runtime can be determined using `list_running_servers`.\n",
+                "    Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n",
+                "    the connection file, therefore we are unable to establish the filename\n",
+                "    \"\"\"\n",
+                "    connection_file = os.path.basename(ipykernel.get_connection_file())\n",
+                "    \n",
+                "    # If the runtime has the kernel_id in the connection filename, use it to\n",
+                "    # get the real notebook name at runtime, otherwise, use the notebook \n",
+                "    # filename from build time.\n",
+                "    try: \n",
+                "        kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n",
+                "    except:\n",
+                "        pass\n",
+                "    else:\n",
+                "        for servers in list(notebookapp.list_running_servers()):\n",
+                "            try:\n",
+                "                response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n",
+                "            except:\n",
+                "                pass\n",
+                "            else:\n",
+                "                for nn in json.loads(response.text):\n",
+                "                    if nn['kernel']['id'] == kernel_id:\n",
+                "                        return nn['path']\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def get_notebook_rules():\n",
+                "    \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n",
+                "    file_name = get_notebook_name()\n",
+                "\n",
+                "    if file_name == None:\n",
+                "        return None\n",
+                "    else:\n",
+                "        j = load_json(file_name)\n",
+                "\n",
+                "        if \"azdata\" not in j[\"metadata\"] or \\\n",
+                "            \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n",
+                "            \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "            return []\n",
+                "        else:\n",
+                "            return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n",
+                "\n",
+                "rules = get_notebook_rules()\n",
+                "\n",
+                "pod_list = api.list_namespaced_pod(namespace)\n",
+                "pod_names = [pod.metadata.name for pod in pod_list.items]\n",
+                "\n",
+                "for pod in pod_list.items:\n",
+                "    container_names = [container.name for container in pod.spec.containers]\n",
+                "    for container in container_names:\n",
+                "            print (f\"*** LOGS for CONTAINER: {container} in POD: {pod.metadata.name}\")\n",
+                "            try:\n",
+                "                logs=stream(api.connect_get_namespaced_pod_exec, pod.metadata.name, namespace, command=['/bin/sh', '-c', cmd], container=container, stderr=True, stdout=True)\n",
+                "\n",
+                "                if coalesce_duplicates:\n",
+                "                    previous_line = \"\"\n",
+                "                    duplicates = 1\n",
+                "                    for line in logs.split('\\n'):\n",
+                "                        if line[line_offset:] != previous_line[line_offset:]:\n",
+                "                            if duplicates != 1:\n",
+                "                                print(f\"\\t{previous_line} (x{duplicates})\")\n",
+                "                            print(f\"\\t{line}\")\n",
+                "\n",
+                "                            for rule in rules:\n",
+                "                                if line[line_offset:].find(rule[0]) != -1:\n",
+                "                                    display(Markdown(f'HINT: Use [{rule[2]}](rule[3]) to resolve this issue.'))\n",
+                "\n",
+                "                            duplicates = 1\n",
+                "                        else:\n",
+                "                            duplicates = duplicates + 1\n",
+                "                            continue\n",
+                "\n",
+                "                        previous_line = line\n",
+                "                else:\n",
+                "                    print(logs)\n",
+                "\n",
+                "            except Exception:\n",
+                "                print (f\"Failed to get LOGS for CONTAINER: {container} in POD: {pod.metadata.name}\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false,
+            "expert": {
+                "log_analyzer_rules": [
+                    [
+                        "Failed to get file names from controller with Error",
+                        "TSG040",
+                        "TSG040 - Failed to get file names from controller with Error",
+                        "../repair/tsg040-failed-get-file-names-controller.ipynb"
+                    ],
+                    [
+                        "Please increase sysctl fs.aio-max-nr",
+                        "TSG041",
+                        "TSG041 - Unable to create a new asynchronous I/O context (increase sysctl fs.aio-max-nr)",
+                        "../repair/tsg041-increase-fs-aio-max-nr.ipynb"
+                    ]
+                ]
+            }
+        }
+    }
+}

+ 285 - 0
Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg094-get-grafana-logs.ipynb

@@ -0,0 +1,285 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG094 - Grafana logs\n",
+                "=====================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "import re\n",
+                "\n",
+                "tail_lines = 500\n",
+                "\n",
+                "pod = None # All\n",
+                "container = \"grafana\"\n",
+                "log_files = [ \"/var/log/supervisor/log/grafana*.log\" ]\n",
+                "\n",
+                "expressions_to_analyze = []"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get tail for log"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Display the last 'tail_lines' of files in 'log_files' list\n",
+                "\n",
+                "pods = api.list_namespaced_pod(namespace)\n",
+                "\n",
+                "entries_for_analysis = []\n",
+                "\n",
+                "for p in pods.items:\n",
+                "    if pod is None or p.metadata.name == pod:\n",
+                "        for c in p.spec.containers:\n",
+                "            if container is None or c.name == container:\n",
+                "                for log_file in log_files:\n",
+                "                    print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n",
+                "                    try:\n",
+                "                        output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n",
+                "                    except Exception:\n",
+                "                        print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n",
+                "                    else:\n",
+                "                        for line in output.split('\\n'):\n",
+                "                            for expression in expressions_to_analyze:\n",
+                "                                if expression.match(line):\n",
+                "                                    entries_for_analysis.append(line)\n",
+                "                            print(line)\n",
+                "print(\"\")\n",
+                "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Analyze log entries and suggest relevant Troubleshooting Guides"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Analyze log entries and suggest further relevant troubleshooting guides\n",
+                "from IPython.display import Markdown\n",
+                "import os\n",
+                "import json\n",
+                "import requests\n",
+                "import ipykernel\n",
+                "import datetime\n",
+                "\n",
+                "from urllib.parse import urljoin\n",
+                "from notebook import notebookapp\n",
+                "\n",
+                "def get_notebook_name():\n",
+                "    \"\"\"Return the full path of the jupyter notebook.   Some runtimes (e.g. ADS) \n",
+                "    have the kernel_id in the filename of the connection file.  If so, the \n",
+                "    notebook name at runtime can be determined using `list_running_servers`.\n",
+                "    Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n",
+                "    the connection file, therefore we are unable to establish the filename\n",
+                "    \"\"\"\n",
+                "    connection_file = os.path.basename(ipykernel.get_connection_file())\n",
+                "    \n",
+                "    # If the runtime has the kernel_id in the connection filename, use it to\n",
+                "    # get the real notebook name at runtime, otherwise, use the notebook \n",
+                "    # filename from build time.\n",
+                "    try: \n",
+                "        kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n",
+                "    except:\n",
+                "        pass\n",
+                "    else:\n",
+                "        for servers in list(notebookapp.list_running_servers()):\n",
+                "            try:\n",
+                "                response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n",
+                "            except:\n",
+                "                pass\n",
+                "            else:\n",
+                "                for nn in json.loads(response.text):\n",
+                "                    if nn['kernel']['id'] == kernel_id:\n",
+                "                        return nn['path']\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def get_notebook_rules():\n",
+                "    \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n",
+                "    file_name = get_notebook_name()\n",
+                "\n",
+                "    if file_name == None:\n",
+                "        return None\n",
+                "    else:\n",
+                "        j = load_json(file_name)\n",
+                "\n",
+                "        if \"azdata\" not in j[\"metadata\"] or \\\n",
+                "            \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n",
+                "            \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "            return []\n",
+                "        else:\n",
+                "            return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n",
+                "\n",
+                "rules = get_notebook_rules()\n",
+                "\n",
+                "if rules == None:\n",
+                "    print(\"\")\n",
+                "    print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n",
+                "else:\n",
+                "    print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n",
+                "    print(rules)\n",
+                "    hints = 0\n",
+                "    if len(rules) > 0:\n",
+                "        for entry in entries_for_analysis:\n",
+                "            for rule in rules:\n",
+                "                if entry.find(rule[0]) != -1:\n",
+                "                    print (entry)\n",
+                "\n",
+                "                    display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n",
+                "                    hints = hints + 1\n",
+                "\n",
+                "    print(\"\")\n",
+                "    print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false
+        }
+    }
+}

+ 288 - 0
Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg095-get-namenode-logs.ipynb

@@ -0,0 +1,288 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG095 - Hadoop namenode logs\n",
+                "=============================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "import re\n",
+                "\n",
+                "tail_lines = 500\n",
+                "\n",
+                "pod = None # All\n",
+                "container = \"hadoop\"\n",
+                "log_files = [ \"/var/log/supervisor/log/namenode*.log\" ]\n",
+                "\n",
+                "expressions_to_analyze = [\n",
+                "    re.compile(\".{23} WARN \"),\n",
+                "    re.compile(\".{23} ERROR \")\n",
+                "]"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get tail for log"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Display the last 'tail_lines' of files in 'log_files' list\n",
+                "\n",
+                "pods = api.list_namespaced_pod(namespace)\n",
+                "\n",
+                "entries_for_analysis = []\n",
+                "\n",
+                "for p in pods.items:\n",
+                "    if pod is None or p.metadata.name == pod:\n",
+                "        for c in p.spec.containers:\n",
+                "            if container is None or c.name == container:\n",
+                "                for log_file in log_files:\n",
+                "                    print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n",
+                "                    try:\n",
+                "                        output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n",
+                "                    except Exception:\n",
+                "                        print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n",
+                "                    else:\n",
+                "                        for line in output.split('\\n'):\n",
+                "                            for expression in expressions_to_analyze:\n",
+                "                                if expression.match(line):\n",
+                "                                    entries_for_analysis.append(line)\n",
+                "                            print(line)\n",
+                "print(\"\")\n",
+                "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Analyze log entries and suggest relevant Troubleshooting Guides"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Analyze log entries and suggest further relevant troubleshooting guides\n",
+                "from IPython.display import Markdown\n",
+                "import os\n",
+                "import json\n",
+                "import requests\n",
+                "import ipykernel\n",
+                "import datetime\n",
+                "\n",
+                "from urllib.parse import urljoin\n",
+                "from notebook import notebookapp\n",
+                "\n",
+                "def get_notebook_name():\n",
+                "    \"\"\"Return the full path of the jupyter notebook.   Some runtimes (e.g. ADS) \n",
+                "    have the kernel_id in the filename of the connection file.  If so, the \n",
+                "    notebook name at runtime can be determined using `list_running_servers`.\n",
+                "    Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n",
+                "    the connection file, therefore we are unable to establish the filename\n",
+                "    \"\"\"\n",
+                "    connection_file = os.path.basename(ipykernel.get_connection_file())\n",
+                "    \n",
+                "    # If the runtime has the kernel_id in the connection filename, use it to\n",
+                "    # get the real notebook name at runtime, otherwise, use the notebook \n",
+                "    # filename from build time.\n",
+                "    try: \n",
+                "        kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n",
+                "    except:\n",
+                "        pass\n",
+                "    else:\n",
+                "        for servers in list(notebookapp.list_running_servers()):\n",
+                "            try:\n",
+                "                response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n",
+                "            except:\n",
+                "                pass\n",
+                "            else:\n",
+                "                for nn in json.loads(response.text):\n",
+                "                    if nn['kernel']['id'] == kernel_id:\n",
+                "                        return nn['path']\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def get_notebook_rules():\n",
+                "    \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n",
+                "    file_name = get_notebook_name()\n",
+                "\n",
+                "    if file_name == None:\n",
+                "        return None\n",
+                "    else:\n",
+                "        j = load_json(file_name)\n",
+                "\n",
+                "        if \"azdata\" not in j[\"metadata\"] or \\\n",
+                "            \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n",
+                "            \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "            return []\n",
+                "        else:\n",
+                "            return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n",
+                "\n",
+                "rules = get_notebook_rules()\n",
+                "\n",
+                "if rules == None:\n",
+                "    print(\"\")\n",
+                "    print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n",
+                "else:\n",
+                "    print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n",
+                "    print(rules)\n",
+                "    hints = 0\n",
+                "    if len(rules) > 0:\n",
+                "        for entry in entries_for_analysis:\n",
+                "            for rule in rules:\n",
+                "                if entry.find(rule[0]) != -1:\n",
+                "                    print (entry)\n",
+                "\n",
+                "                    display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n",
+                "                    hints = hints + 1\n",
+                "\n",
+                "    print(\"\")\n",
+                "    print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false
+        }
+    }
+}

+ 288 - 0
Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg096-get-zookeeper-logs.ipynb

@@ -0,0 +1,288 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG096 - Zookeeper logs\n",
+                "=======================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "import re\n",
+                "\n",
+                "tail_lines = 500\n",
+                "\n",
+                "pod = None # All\n",
+                "container = \"hadoop\"\n",
+                "log_files = [ \"/var/log/supervisor/log/zkfc*.log\" ]\n",
+                "\n",
+                "expressions_to_analyze = [\n",
+                "    re.compile(\".{23} WARN \"),\n",
+                "    re.compile(\".{23} ERROR \")\n",
+                "]"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get tail for log"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Display the last 'tail_lines' of files in 'log_files' list\n",
+                "\n",
+                "pods = api.list_namespaced_pod(namespace)\n",
+                "\n",
+                "entries_for_analysis = []\n",
+                "\n",
+                "for p in pods.items:\n",
+                "    if pod is None or p.metadata.name == pod:\n",
+                "        for c in p.spec.containers:\n",
+                "            if container is None or c.name == container:\n",
+                "                for log_file in log_files:\n",
+                "                    print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n",
+                "                    try:\n",
+                "                        output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n",
+                "                    except Exception:\n",
+                "                        print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n",
+                "                    else:\n",
+                "                        for line in output.split('\\n'):\n",
+                "                            for expression in expressions_to_analyze:\n",
+                "                                if expression.match(line):\n",
+                "                                    entries_for_analysis.append(line)\n",
+                "                            print(line)\n",
+                "print(\"\")\n",
+                "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Analyze log entries and suggest relevant Troubleshooting Guides"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Analyze log entries and suggest further relevant troubleshooting guides\n",
+                "from IPython.display import Markdown\n",
+                "import os\n",
+                "import json\n",
+                "import requests\n",
+                "import ipykernel\n",
+                "import datetime\n",
+                "\n",
+                "from urllib.parse import urljoin\n",
+                "from notebook import notebookapp\n",
+                "\n",
+                "def get_notebook_name():\n",
+                "    \"\"\"Return the full path of the jupyter notebook.   Some runtimes (e.g. ADS) \n",
+                "    have the kernel_id in the filename of the connection file.  If so, the \n",
+                "    notebook name at runtime can be determined using `list_running_servers`.\n",
+                "    Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n",
+                "    the connection file, therefore we are unable to establish the filename\n",
+                "    \"\"\"\n",
+                "    connection_file = os.path.basename(ipykernel.get_connection_file())\n",
+                "    \n",
+                "    # If the runtime has the kernel_id in the connection filename, use it to\n",
+                "    # get the real notebook name at runtime, otherwise, use the notebook \n",
+                "    # filename from build time.\n",
+                "    try: \n",
+                "        kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n",
+                "    except:\n",
+                "        pass\n",
+                "    else:\n",
+                "        for servers in list(notebookapp.list_running_servers()):\n",
+                "            try:\n",
+                "                response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n",
+                "            except:\n",
+                "                pass\n",
+                "            else:\n",
+                "                for nn in json.loads(response.text):\n",
+                "                    if nn['kernel']['id'] == kernel_id:\n",
+                "                        return nn['path']\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def get_notebook_rules():\n",
+                "    \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n",
+                "    file_name = get_notebook_name()\n",
+                "\n",
+                "    if file_name == None:\n",
+                "        return None\n",
+                "    else:\n",
+                "        j = load_json(file_name)\n",
+                "\n",
+                "        if \"azdata\" not in j[\"metadata\"] or \\\n",
+                "            \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n",
+                "            \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "            return []\n",
+                "        else:\n",
+                "            return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n",
+                "\n",
+                "rules = get_notebook_rules()\n",
+                "\n",
+                "if rules == None:\n",
+                "    print(\"\")\n",
+                "    print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n",
+                "else:\n",
+                "    print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n",
+                "    print(rules)\n",
+                "    hints = 0\n",
+                "    if len(rules) > 0:\n",
+                "        for entry in entries_for_analysis:\n",
+                "            for rule in rules:\n",
+                "                if entry.find(rule[0]) != -1:\n",
+                "                    print (entry)\n",
+                "\n",
+                "                    display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n",
+                "                    hints = hints + 1\n",
+                "\n",
+                "    print(\"\")\n",
+                "    print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false
+        }
+    }
+}

+ 297 - 0
Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg117-get-approxy-nginx-logs.ipynb

@@ -0,0 +1,297 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG117 - App-Deploy Proxy Nginx Logs\n",
+                "====================================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "import re\n",
+                "\n",
+                "tail_lines = 200\n",
+                "\n",
+                "pod = None # All\n",
+                "container = \"app-service-proxy\"\n",
+                "log_files = [ \"/var/log/nginx/error.log\" ]\n",
+                "\n",
+                "expressions_to_analyze = [\n",
+                "    re.compile(\".{23}[error]\")\n",
+                "]"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get tail for log"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Display the last 'tail_lines' of files in 'log_files' list\n",
+                "\n",
+                "pods = api.list_namespaced_pod(namespace)\n",
+                "\n",
+                "entries_for_analysis = []\n",
+                "\n",
+                "for p in pods.items:\n",
+                "    if pod is None or p.metadata.name == pod:\n",
+                "        for c in p.spec.containers:\n",
+                "            if container is None or c.name == container:\n",
+                "                for log_file in log_files:\n",
+                "                    print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n",
+                "                    try:\n",
+                "                        output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n",
+                "                    except Exception:\n",
+                "                        print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n",
+                "                    else:\n",
+                "                        for line in output.split('\\n'):\n",
+                "                            for expression in expressions_to_analyze:\n",
+                "                                if expression.match(line):\n",
+                "                                    entries_for_analysis.append(line)\n",
+                "                            print(line)\n",
+                "print(\"\")\n",
+                "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Analyze log entries and suggest relevant Troubleshooting Guides"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Analyze log entries and suggest further relevant troubleshooting guides\n",
+                "from IPython.display import Markdown\n",
+                "import os\n",
+                "import json\n",
+                "import requests\n",
+                "import ipykernel\n",
+                "import datetime\n",
+                "\n",
+                "from urllib.parse import urljoin\n",
+                "from notebook import notebookapp\n",
+                "\n",
+                "def get_notebook_name():\n",
+                "    \"\"\"Return the full path of the jupyter notebook.   Some runtimes (e.g. ADS) \n",
+                "    have the kernel_id in the filename of the connection file.  If so, the \n",
+                "    notebook name at runtime can be determined using `list_running_servers`.\n",
+                "    Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n",
+                "    the connection file, therefore we are unable to establish the filename\n",
+                "    \"\"\"\n",
+                "    connection_file = os.path.basename(ipykernel.get_connection_file())\n",
+                "    \n",
+                "    # If the runtime has the kernel_id in the connection filename, use it to\n",
+                "    # get the real notebook name at runtime, otherwise, use the notebook \n",
+                "    # filename from build time.\n",
+                "    try: \n",
+                "        kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n",
+                "    except:\n",
+                "        pass\n",
+                "    else:\n",
+                "        for servers in list(notebookapp.list_running_servers()):\n",
+                "            try:\n",
+                "                response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n",
+                "            except:\n",
+                "                pass\n",
+                "            else:\n",
+                "                for nn in json.loads(response.text):\n",
+                "                    if nn['kernel']['id'] == kernel_id:\n",
+                "                        return nn['path']\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def get_notebook_rules():\n",
+                "    \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n",
+                "    file_name = get_notebook_name()\n",
+                "\n",
+                "    if file_name == None:\n",
+                "        return None\n",
+                "    else:\n",
+                "        j = load_json(file_name)\n",
+                "\n",
+                "        if \"azdata\" not in j[\"metadata\"] or \\\n",
+                "            \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n",
+                "            \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "            return []\n",
+                "        else:\n",
+                "            return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n",
+                "\n",
+                "rules = get_notebook_rules()\n",
+                "\n",
+                "if rules == None:\n",
+                "    print(\"\")\n",
+                "    print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n",
+                "else:\n",
+                "    print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n",
+                "    print(rules)\n",
+                "    hints = 0\n",
+                "    if len(rules) > 0:\n",
+                "        for entry in entries_for_analysis:\n",
+                "            for rule in rules:\n",
+                "                if entry.find(rule[0]) != -1:\n",
+                "                    print (entry)\n",
+                "\n",
+                "                    display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n",
+                "                    hints = hints + 1\n",
+                "\n",
+                "    print(\"\")\n",
+                "    print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false,
+            "expert": {
+                "log_analyzer_rules": [
+                    [
+                        "(111: Connection refused) while connecting to upstream",
+                        "TSG113",
+                        "TSG113 - Run App-Deploy",
+                        "../monitor-bdc/tsg113-run-app.ipynb"
+                    ]
+                ]
+            }
+        }
+    }
+}

+ 249 - 0
Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg120-get-all-provisioner-log-tails.ipynb

@@ -0,0 +1,249 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG120 - Provisioner log tail for all containers in BDC\n",
+                "=======================================================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "tail_lines = 100\n",
+                "line_offset = 27 # Skip the date/time at start of line\n",
+                "\n",
+                "cmd = f'tail -n {tail_lines} /var/log/provisioner/provisioner.log'\n",
+                "\n",
+                "coalesce_duplicates = True"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Analyze log in all pod containers\n",
+                "\n",
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "from IPython.display import Markdown\n",
+                "\n",
+                "import os\n",
+                "import json\n",
+                "import requests\n",
+                "import ipykernel\n",
+                "import datetime\n",
+                "\n",
+                "from urllib.parse import urljoin\n",
+                "from notebook import notebookapp\n",
+                "\n",
+                "def get_notebook_name():\n",
+                "    \"\"\"Return the full path of the jupyter notebook.   Some runtimes (e.g. ADS) \n",
+                "    have the kernel_id in the filename of the connection file.  If so, the \n",
+                "    notebook name at runtime can be determined using `list_running_servers`.\n",
+                "    Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n",
+                "    the connection file, therefore we are unable to establish the filename\n",
+                "    \"\"\"\n",
+                "    connection_file = os.path.basename(ipykernel.get_connection_file())\n",
+                "    \n",
+                "    # If the runtime has the kernel_id in the connection filename, use it to\n",
+                "    # get the real notebook name at runtime, otherwise, use the notebook \n",
+                "    # filename from build time.\n",
+                "    try: \n",
+                "        kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n",
+                "    except:\n",
+                "        pass\n",
+                "    else:\n",
+                "        for servers in list(notebookapp.list_running_servers()):\n",
+                "            try:\n",
+                "                response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n",
+                "            except:\n",
+                "                pass\n",
+                "            else:\n",
+                "                for nn in json.loads(response.text):\n",
+                "                    if nn['kernel']['id'] == kernel_id:\n",
+                "                        return nn['path']\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def get_notebook_rules():\n",
+                "    \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n",
+                "    file_name = get_notebook_name()\n",
+                "\n",
+                "    if file_name == None:\n",
+                "        return None\n",
+                "    else:\n",
+                "        j = load_json(file_name)\n",
+                "\n",
+                "        if \"azdata\" not in j[\"metadata\"] or \\\n",
+                "            \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n",
+                "            \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "            return []\n",
+                "        else:\n",
+                "            return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n",
+                "\n",
+                "rules = get_notebook_rules()\n",
+                "\n",
+                "pod_list = api.list_namespaced_pod(namespace)\n",
+                "pod_names = [pod.metadata.name for pod in pod_list.items]\n",
+                "\n",
+                "for pod in pod_list.items:\n",
+                "    container_names = [container.name for container in pod.spec.containers]\n",
+                "    for container in container_names:\n",
+                "            print (f\"*** LOGS for CONTAINER: {container} in POD: {pod.metadata.name}\")\n",
+                "            try:\n",
+                "                logs=stream(api.connect_get_namespaced_pod_exec, pod.metadata.name, namespace, command=['/bin/sh', '-c', cmd], container=container, stderr=True, stdout=True)\n",
+                "\n",
+                "                if coalesce_duplicates:\n",
+                "                    previous_line = \"\"\n",
+                "                    duplicates = 1\n",
+                "                    for line in logs.split('\\n'):\n",
+                "                        if line[line_offset:] != previous_line[line_offset:]:\n",
+                "                            if duplicates != 1:\n",
+                "                                print(f\"\\t{previous_line} (x{duplicates})\")\n",
+                "                            print(f\"\\t{line}\")\n",
+                "\n",
+                "                            for rule in rules:\n",
+                "                                if line[line_offset:].find(rule[0]) != -1:\n",
+                "                                    display(Markdown(f'HINT: Use [{rule[2]}](rule[3]) to resolve this issue.'))\n",
+                "\n",
+                "                            duplicates = 1\n",
+                "                        else:\n",
+                "                            duplicates = duplicates + 1\n",
+                "                            continue\n",
+                "\n",
+                "                        previous_line = line\n",
+                "                else:\n",
+                "                    print(logs)\n",
+                "\n",
+                "            except Exception:\n",
+                "                print (f\"Failed to get LOGS for CONTAINER: {container} in POD: {pod.metadata.name}\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false,
+            "expert": {
+                "log_analyzer_rules": []
+            }
+        }
+    }
+}

+ 291 - 0
Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg121-get-all-supervisor-mssql-logs.ipynb

@@ -0,0 +1,291 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG121 - Supervisor mssql-server logs\n",
+                "=====================================\n",
+                "\n",
+                "These supervisor mssql-server logs can contain some more information\n",
+                "from Polybase, not available in errorlog or the polybase logs.\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "import re\n",
+                "\n",
+                "tail_lines = 500\n",
+                "\n",
+                "pod = None # All\n",
+                "container = \"mssql-server\"\n",
+                "log_files = [ \"/var/log/supervisor/log/mssql-server-*.log\" ]\n",
+                "\n",
+                "expressions_to_analyze = [\n",
+                "    re.compile(\".{26}[WARN ]\"),\n",
+                "    re.compile(\".{26}[ERROR]\")\n",
+                "]"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get tail for log"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Display the last 'tail_lines' of files in 'log_files' list\n",
+                "\n",
+                "pods = api.list_namespaced_pod(namespace)\n",
+                "\n",
+                "entries_for_analysis = []\n",
+                "\n",
+                "for p in pods.items:\n",
+                "    if pod is None or p.metadata.name == pod:\n",
+                "        for c in p.spec.containers:\n",
+                "            if container is None or c.name == container:\n",
+                "                for log_file in log_files:\n",
+                "                    print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n",
+                "                    try:\n",
+                "                        output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n",
+                "                    except Exception:\n",
+                "                        print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n",
+                "                    else:\n",
+                "                        for line in output.split('\\n'):\n",
+                "                            for expression in expressions_to_analyze:\n",
+                "                                if expression.match(line):\n",
+                "                                    entries_for_analysis.append(line)\n",
+                "                            print(line)\n",
+                "print(\"\")\n",
+                "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Analyze log entries and suggest relevant Troubleshooting Guides"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Analyze log entries and suggest further relevant troubleshooting guides\n",
+                "from IPython.display import Markdown\n",
+                "import os\n",
+                "import json\n",
+                "import requests\n",
+                "import ipykernel\n",
+                "import datetime\n",
+                "\n",
+                "from urllib.parse import urljoin\n",
+                "from notebook import notebookapp\n",
+                "\n",
+                "def get_notebook_name():\n",
+                "    \"\"\"Return the full path of the jupyter notebook.   Some runtimes (e.g. ADS) \n",
+                "    have the kernel_id in the filename of the connection file.  If so, the \n",
+                "    notebook name at runtime can be determined using `list_running_servers`.\n",
+                "    Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n",
+                "    the connection file, therefore we are unable to establish the filename\n",
+                "    \"\"\"\n",
+                "    connection_file = os.path.basename(ipykernel.get_connection_file())\n",
+                "    \n",
+                "    # If the runtime has the kernel_id in the connection filename, use it to\n",
+                "    # get the real notebook name at runtime, otherwise, use the notebook \n",
+                "    # filename from build time.\n",
+                "    try: \n",
+                "        kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n",
+                "    except:\n",
+                "        pass\n",
+                "    else:\n",
+                "        for servers in list(notebookapp.list_running_servers()):\n",
+                "            try:\n",
+                "                response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n",
+                "            except:\n",
+                "                pass\n",
+                "            else:\n",
+                "                for nn in json.loads(response.text):\n",
+                "                    if nn['kernel']['id'] == kernel_id:\n",
+                "                        return nn['path']\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def get_notebook_rules():\n",
+                "    \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n",
+                "    file_name = get_notebook_name()\n",
+                "\n",
+                "    if file_name == None:\n",
+                "        return None\n",
+                "    else:\n",
+                "        j = load_json(file_name)\n",
+                "\n",
+                "        if \"azdata\" not in j[\"metadata\"] or \\\n",
+                "            \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n",
+                "            \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "            return []\n",
+                "        else:\n",
+                "            return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n",
+                "\n",
+                "rules = get_notebook_rules()\n",
+                "\n",
+                "if rules == None:\n",
+                "    print(\"\")\n",
+                "    print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n",
+                "else:\n",
+                "    print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n",
+                "    print(rules)\n",
+                "    hints = 0\n",
+                "    if len(rules) > 0:\n",
+                "        for entry in entries_for_analysis:\n",
+                "            for rule in rules:\n",
+                "                if entry.find(rule[0]) != -1:\n",
+                "                    print (entry)\n",
+                "\n",
+                "                    display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n",
+                "                    hints = hints + 1\n",
+                "\n",
+                "    print(\"\")\n",
+                "    print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false
+        }
+    }
+}

+ 288 - 0
Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg122-get-hive-metastore-logs.ipynb

@@ -0,0 +1,288 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG122 - Hive Metastore logs\n",
+                "============================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "import re\n",
+                "\n",
+                "tail_lines = 500\n",
+                "\n",
+                "pod = None # All\n",
+                "container = \"hadoop-hivemetastore\"\n",
+                "log_files = [ \"/var/log/supervisor/log/hivemetastorehttp*.log\" ]\n",
+                "\n",
+                "expressions_to_analyze = [\n",
+                "    re.compile(\".{26}[WARN ]\"),\n",
+                "    re.compile(\".{26}[ERROR]\")\n",
+                "]"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get tail for log"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Display the last 'tail_lines' of files in 'log_files' list\n",
+                "\n",
+                "pods = api.list_namespaced_pod(namespace)\n",
+                "\n",
+                "entries_for_analysis = []\n",
+                "\n",
+                "for p in pods.items:\n",
+                "    if pod is None or p.metadata.name == pod:\n",
+                "        for c in p.spec.containers:\n",
+                "            if container is None or c.name == container:\n",
+                "                for log_file in log_files:\n",
+                "                    print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n",
+                "                    try:\n",
+                "                        output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n",
+                "                    except Exception:\n",
+                "                        print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n",
+                "                    else:\n",
+                "                        for line in output.split('\\n'):\n",
+                "                            for expression in expressions_to_analyze:\n",
+                "                                if expression.match(line):\n",
+                "                                    entries_for_analysis.append(line)\n",
+                "                            print(line)\n",
+                "print(\"\")\n",
+                "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Analyze log entries and suggest relevant Troubleshooting Guides"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Analyze log entries and suggest further relevant troubleshooting guides\n",
+                "from IPython.display import Markdown\n",
+                "import os\n",
+                "import json\n",
+                "import requests\n",
+                "import ipykernel\n",
+                "import datetime\n",
+                "\n",
+                "from urllib.parse import urljoin\n",
+                "from notebook import notebookapp\n",
+                "\n",
+                "def get_notebook_name():\n",
+                "    \"\"\"Return the full path of the jupyter notebook.   Some runtimes (e.g. ADS) \n",
+                "    have the kernel_id in the filename of the connection file.  If so, the \n",
+                "    notebook name at runtime can be determined using `list_running_servers`.\n",
+                "    Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n",
+                "    the connection file, therefore we are unable to establish the filename\n",
+                "    \"\"\"\n",
+                "    connection_file = os.path.basename(ipykernel.get_connection_file())\n",
+                "    \n",
+                "    # If the runtime has the kernel_id in the connection filename, use it to\n",
+                "    # get the real notebook name at runtime, otherwise, use the notebook \n",
+                "    # filename from build time.\n",
+                "    try: \n",
+                "        kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n",
+                "    except:\n",
+                "        pass\n",
+                "    else:\n",
+                "        for servers in list(notebookapp.list_running_servers()):\n",
+                "            try:\n",
+                "                response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n",
+                "            except:\n",
+                "                pass\n",
+                "            else:\n",
+                "                for nn in json.loads(response.text):\n",
+                "                    if nn['kernel']['id'] == kernel_id:\n",
+                "                        return nn['path']\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def get_notebook_rules():\n",
+                "    \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n",
+                "    file_name = get_notebook_name()\n",
+                "\n",
+                "    if file_name == None:\n",
+                "        return None\n",
+                "    else:\n",
+                "        j = load_json(file_name)\n",
+                "\n",
+                "        if \"azdata\" not in j[\"metadata\"] or \\\n",
+                "            \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n",
+                "            \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "            return []\n",
+                "        else:\n",
+                "            return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n",
+                "\n",
+                "rules = get_notebook_rules()\n",
+                "\n",
+                "if rules == None:\n",
+                "    print(\"\")\n",
+                "    print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n",
+                "else:\n",
+                "    print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n",
+                "    print(rules)\n",
+                "    hints = 0\n",
+                "    if len(rules) > 0:\n",
+                "        for entry in entries_for_analysis:\n",
+                "            for rule in rules:\n",
+                "                if entry.find(rule[0]) != -1:\n",
+                "                    print (entry)\n",
+                "\n",
+                "                    display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n",
+                "                    hints = hints + 1\n",
+                "\n",
+                "    print(\"\")\n",
+                "    print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false
+        }
+    }
+}

+ 288 - 0
Big-Data-Clusters/CU8/Public/content/log-analyzers/tsg123-get-hive-logs.ipynb

@@ -0,0 +1,288 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG123 - Hive logs\n",
+                "==================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "import re\n",
+                "\n",
+                "tail_lines = 500\n",
+                "\n",
+                "pod = None # All\n",
+                "container = \"hadoop-hivemetastore\"\n",
+                "log_files = [ \"/var/log/supervisor/log/hive-*.log\" ]\n",
+                "\n",
+                "expressions_to_analyze = [\n",
+                "    re.compile(\".{26}[WARN ]\"),\n",
+                "    re.compile(\".{26}[ERROR]\")\n",
+                "]"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get tail for log"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Display the last 'tail_lines' of files in 'log_files' list\n",
+                "\n",
+                "pods = api.list_namespaced_pod(namespace)\n",
+                "\n",
+                "entries_for_analysis = []\n",
+                "\n",
+                "for p in pods.items:\n",
+                "    if pod is None or p.metadata.name == pod:\n",
+                "        for c in p.spec.containers:\n",
+                "            if container is None or c.name == container:\n",
+                "                for log_file in log_files:\n",
+                "                    print (f\"- LOGS: '{log_file}' for CONTAINER: '{c.name}' in POD: '{p.metadata.name}'\")\n",
+                "                    try:\n",
+                "                        output = stream(api.connect_get_namespaced_pod_exec, p.metadata.name, namespace, command=['/bin/sh', '-c', f'tail -n {tail_lines} {log_file}'], container=c.name, stderr=True, stdout=True)\n",
+                "                    except Exception:\n",
+                "                        print (f\"FAILED to get LOGS for CONTAINER: {c.name} in POD: {p.metadata.name}\")\n",
+                "                    else:\n",
+                "                        for line in output.split('\\n'):\n",
+                "                            for expression in expressions_to_analyze:\n",
+                "                                if expression.match(line):\n",
+                "                                    entries_for_analysis.append(line)\n",
+                "                            print(line)\n",
+                "print(\"\")\n",
+                "print(f\"{len(entries_for_analysis)} log entries found for further analysis.\")"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Analyze log entries and suggest relevant Troubleshooting Guides"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Analyze log entries and suggest further relevant troubleshooting guides\n",
+                "from IPython.display import Markdown\n",
+                "import os\n",
+                "import json\n",
+                "import requests\n",
+                "import ipykernel\n",
+                "import datetime\n",
+                "\n",
+                "from urllib.parse import urljoin\n",
+                "from notebook import notebookapp\n",
+                "\n",
+                "def get_notebook_name():\n",
+                "    \"\"\"Return the full path of the jupyter notebook.   Some runtimes (e.g. ADS) \n",
+                "    have the kernel_id in the filename of the connection file.  If so, the \n",
+                "    notebook name at runtime can be determined using `list_running_servers`.\n",
+                "    Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n",
+                "    the connection file, therefore we are unable to establish the filename\n",
+                "    \"\"\"\n",
+                "    connection_file = os.path.basename(ipykernel.get_connection_file())\n",
+                "    \n",
+                "    # If the runtime has the kernel_id in the connection filename, use it to\n",
+                "    # get the real notebook name at runtime, otherwise, use the notebook \n",
+                "    # filename from build time.\n",
+                "    try: \n",
+                "        kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n",
+                "    except:\n",
+                "        pass\n",
+                "    else:\n",
+                "        for servers in list(notebookapp.list_running_servers()):\n",
+                "            try:\n",
+                "                response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n",
+                "            except:\n",
+                "                pass\n",
+                "            else:\n",
+                "                for nn in json.loads(response.text):\n",
+                "                    if nn['kernel']['id'] == kernel_id:\n",
+                "                        return nn['path']\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def get_notebook_rules():\n",
+                "    \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n",
+                "    file_name = get_notebook_name()\n",
+                "\n",
+                "    if file_name == None:\n",
+                "        return None\n",
+                "    else:\n",
+                "        j = load_json(file_name)\n",
+                "\n",
+                "        if \"azdata\" not in j[\"metadata\"] or \\\n",
+                "            \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n",
+                "            \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "            return []\n",
+                "        else:\n",
+                "            return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n",
+                "\n",
+                "rules = get_notebook_rules()\n",
+                "\n",
+                "if rules == None:\n",
+                "    print(\"\")\n",
+                "    print(f\"Log Analysis only available when run in Azure Data Studio. Not available when run in azdata.\")\n",
+                "else:\n",
+                "    print(f\"Applying the following {len(rules)} rules to {len(entries_for_analysis)} log entries for analysis, looking for HINTs to further troubleshooting.\")\n",
+                "    print(rules)\n",
+                "    hints = 0\n",
+                "    if len(rules) > 0:\n",
+                "        for entry in entries_for_analysis:\n",
+                "            for rule in rules:\n",
+                "                if entry.find(rule[0]) != -1:\n",
+                "                    print (entry)\n",
+                "\n",
+                "                    display(Markdown(f'HINT: Use [{rule[2]}]({rule[3]}) to resolve this issue.'))\n",
+                "                    hints = hints + 1\n",
+                "\n",
+                "    print(\"\")\n",
+                "    print(f\"{len(entries_for_analysis)} log entries analyzed (using {len(rules)} rules). {hints} further troubleshooting hints made inline.\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false
+        }
+    }
+}

+ 19 - 0
Big-Data-Clusters/CU8/Public/content/log-files/readme.md

@@ -0,0 +1,19 @@
+# Logs notebooks
+
+- A set of notebooks to gather logs from a SQL Server Big Data Cluster.
+
+[Home](../readme.md)
+
+## Notebooks in this Chapter
+- [TSG001 - Run azdata copy-logs
](tsg001-copy-logs.ipynb)
+
+- [TSG091 - Get the azdata CLI logs
](tsg091-get-azdata-logs.ipynb)
+
+- [TSG083 - Run kubectl cluster-info dump
](tsg083-run-kubectl-cluster-info-dump.ipynb)
+
+- [TSG061 - Get tail of all container logs for pods in BDC namespace
](tsg061-tail-bdc-container-logs.ipynb)
+
+- [TSG062 - Get tail of all previous container logs for pods in BDC namespace
](tsg062-tail-bdc-previous-container-logs.ipynb)
+
+- [TSG084 - Internal Query Processor Error
](tsg084-internal-query-process-error.ipynb)
+

+ 17 - 0
Big-Data-Clusters/CU8/Public/content/log-files/toc.yml

@@ -0,0 +1,17 @@
+- title: Logs
+  url: /log-files/readme
+  not_numbered: true
+  expand_sections: false
+  sections:
+  - title: TSG001 - Run azdata copy-logs
+    url: log-files/tsg001-copy-logs
+  - title: TSG091 - Get the azdata CLI logs
+    url: log-files/tsg091-get-azdata-logs
+  - title: TSG083 - Run kubectl cluster-info dump
+    url: log-files/tsg083-run-kubectl-cluster-info-dump
+  - title: TSG061 - Get tail of all container logs for pods in BDC namespace
+    url: log-files/tsg061-tail-bdc-container-logs
+  - title: TSG062 - Get tail of all previous container logs for pods in BDC namespace
+    url: log-files/tsg062-tail-bdc-previous-container-logs
+  - title: TSG084 - Internal Query Processor Error
+    url: log-files/tsg084-internal-query-process-error

文件差异内容过多而无法显示
+ 359 - 0
Big-Data-Clusters/CU8/Public/content/log-files/tsg001-copy-logs.ipynb


+ 205 - 0
Big-Data-Clusters/CU8/Public/content/log-files/tsg061-tail-bdc-container-logs.ipynb

@@ -0,0 +1,205 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG061 - Get tail of all container logs for pods in BDC namespace\n",
+                "=================================================================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "since_seconds = 60 * 60 * 1 # the last hour\n",
+                "coalesce_duplicates = True"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get logs for all containers in Big Data Cluster namespace"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "pod_list = api.list_namespaced_pod(namespace)\n",
+                "\n",
+                "pod_names = [pod.metadata.name for pod in pod_list.items]\n",
+                "\n",
+                "print('Scanning pods: ' + ', '.join(pod_names))\n",
+                "\n",
+                "for pod in pod_list.items:\n",
+                "    print(\"*** %s\\t%s\\t%s\" % (pod.metadata.name,\n",
+                "                        pod.status.phase,\n",
+                "                        pod.status.pod_ip))\n",
+                "\n",
+                "    container_names = [container.name for container in pod.spec.containers]\n",
+                "\n",
+                "    for container in container_names:\n",
+                "        print (f\"POD: {pod.metadata.name} / CONTAINER: {container}\")\n",
+                "        try:\n",
+                "            logs = api.read_namespaced_pod_log(pod.metadata.name, namespace, container=container, since_seconds=since_seconds)\n",
+                "\n",
+                "            if coalesce_duplicates:\n",
+                "                previous_line = \"\"\n",
+                "                duplicates = 1\n",
+                "                for line in logs.split('\\n'):\n",
+                "                    if line[27:] != previous_line[27:]:\n",
+                "                        if duplicates != 1:\n",
+                "                            print(f\"\\t{previous_line} (x{duplicates})\")\n",
+                "                        print(f\"\\t{line}\")\n",
+                "                        duplicates = 1\n",
+                "                    else:\n",
+                "                        duplicates = duplicates + 1\n",
+                "\n",
+                "                    previous_line = line\n",
+                "            else:\n",
+                "                print(logs)\n",
+                "        except Exception:\n",
+                "            print (f\"Failed to get LOGS for CONTAINER: {container} in POD: {pod.metadata.name}\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "Related\n",
+                "-------\n",
+                "\n",
+                "-   [TSG062 - Get tail of all previous container logs for pods in BDC\n",
+                "    namespace](../log-files/tsg062-tail-bdc-previous-container-logs.ipynb)"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "side_effects": true,
+        "azdata": {
+            "diagnostic": {
+                "categories": [
+                    "kubernetes"
+                ]
+            },
+            "symlink": true
+        }
+    }
+}

+ 200 - 0
Big-Data-Clusters/CU8/Public/content/log-files/tsg062-tail-bdc-previous-container-logs.ipynb

@@ -0,0 +1,200 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG062 - Get tail of all previous container logs for pods in BDC namespace\n",
+                "==========================================================================\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Parameters"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "tail_lines = 10000\n",
+                "coalesce_duplicates = True"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Instantiate Kubernetes client"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Instantiate the Python Kubernetes client into 'api' variable\n",
+                "\n",
+                "import os\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "try:\n",
+                "    from kubernetes import client, config\n",
+                "    from kubernetes.stream import stream\n",
+                "\n",
+                "    if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n",
+                "        config.load_incluster_config()\n",
+                "    else:\n",
+                "        try:\n",
+                "            config.load_kube_config()\n",
+                "        except:\n",
+                "            display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n",
+                "            raise\n",
+                "    api = client.CoreV1Api()\n",
+                "\n",
+                "    print('Kubernetes client instantiated')\n",
+                "except ImportError:\n",
+                "    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n",
+                "    raise"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get the namespace for the big data cluster\n",
+                "\n",
+                "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n",
+                "\n",
+                "**NOTE:**\n",
+                "\n",
+                "If there is more than one Big Data Cluster in the target Kubernetes\n",
+                "cluster, then either:\n",
+                "\n",
+                "-   set \\[0\\] to the correct value for the big data cluster.\n",
+                "-   set the environment variable AZDATA\\_NAMESPACE, before starting\n",
+                "    Azure Data Studio."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Place Kubernetes namespace name for BDC into 'namespace' variable\n",
+                "\n",
+                "if \"AZDATA_NAMESPACE\" in os.environ:\n",
+                "    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n",
+                "else:\n",
+                "    try:\n",
+                "        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n",
+                "    except IndexError:\n",
+                "        from IPython.display import Markdown\n",
+                "        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n",
+                "        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n",
+                "        raise\n",
+                "\n",
+                "print('The kubernetes namespace for your big data cluster is: ' + namespace)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Get logs for previous instance of all containers in Big Data Cluster namespace"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "pod_list = api.list_namespaced_pod(namespace)\n",
+                "\n",
+                "pod_names = [pod.metadata.name for pod in pod_list.items]\n",
+                "\n",
+                "print('Scanning pods: ' + ', '.join(pod_names))\n",
+                "\n",
+                "for pod in pod_list.items:\n",
+                "    print(\"*** %s\\t%s\\t%s\" % (pod.metadata.name,\n",
+                "                        pod.status.phase,\n",
+                "                        pod.status.pod_ip))\n",
+                "\n",
+                "    container_names = [container.name for container in pod.spec.containers]\n",
+                "\n",
+                "    for container in container_names:\n",
+                "        print (f\"POD: {pod.metadata.name} / CONTAINER: {container}\")\n",
+                "        try:\n",
+                "            logs = api.read_namespaced_pod_log(pod.metadata.name, namespace, container=container, tail_lines=tail_lines, previous=True)\n",
+                "\n",
+                "            if coalesce_duplicates:\n",
+                "                previous_line = \"\"\n",
+                "                duplicates = 1\n",
+                "                for line in logs.split('\\n'):\n",
+                "                    if line[27:] != previous_line[27:]:\n",
+                "                        if duplicates != 1:\n",
+                "                            print(f\"\\t{previous_line} (x{duplicates})\")\n",
+                "                        print(f\"\\t{line}\")\n",
+                "                        duplicates = 1\n",
+                "                    else:\n",
+                "                        duplicates = duplicates + 1\n",
+                "\n",
+                "                    previous_line = line\n",
+                "            else:\n",
+                "                print(logs)\n",
+                "        except Exception:\n",
+                "            print (f\"Failed to get LOGS for CONTAINER: {container} in POD: {pod.metadata.name}\")"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "Related\n",
+                "-------\n",
+                "\n",
+                "-   [TSG061 - Get tail of all container logs for pods in BDC\n",
+                "    namespace](../log-files/tsg061-tail-bdc-container-logs.ipynb)"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false,
+            "symlink": true
+        }
+    }
+}

+ 405 - 0
Big-Data-Clusters/CU8/Public/content/log-files/tsg083-run-kubectl-cluster-info-dump.ipynb

@@ -0,0 +1,405 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "TSG083 - Run kubectl cluster-info dump\n",
+                "======================================\n",
+                "\n",
+                "NOTE: This kubectl command can produce a lot of output, and may take\n",
+                "some time (and produce a large notebook!). For Kubernetes clusters that\n",
+                "have been up for a long time, consider running this command outside of a\n",
+                "notebook.\n",
+                "\n",
+                "Steps\n",
+                "-----\n",
+                "\n",
+                "### Common functions\n",
+                "\n",
+                "Define helper functions used in this notebook."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {
+                "tags": [
+                    "hide_input"
+                ]
+            },
+            "outputs": [],
+            "source": [
+                "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n",
+                "import sys\n",
+                "import os\n",
+                "import re\n",
+                "import json\n",
+                "import platform\n",
+                "import shlex\n",
+                "import shutil\n",
+                "import datetime\n",
+                "\n",
+                "from subprocess import Popen, PIPE\n",
+                "from IPython.display import Markdown\n",
+                "\n",
+                "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
+                "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
+                "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
+                "\n",
+                "first_run = True\n",
+                "rules = None\n",
+                "debug_logging = False\n",
+                "\n",
+                "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
+                "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
+                "\n",
+                "    NOTES:\n",
+                "\n",
+                "    1.  Commands that need this kind of ' quoting on Windows e.g.:\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
+                "\n",
+                "        Need to actually pass in as '\"':\n",
+                "\n",
+                "            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
+                "\n",
+                "        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
+                "        \n",
+                "            `iter(p.stdout.readline, b'')`\n",
+                "\n",
+                "        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
+                "    \"\"\"\n",
+                "    MAX_RETRIES = 5\n",
+                "    output = \"\"\n",
+                "    retry = False\n",
+                "\n",
+                "    global first_run\n",
+                "    global rules\n",
+                "\n",
+                "    if first_run:\n",
+                "        first_run = False\n",
+                "        rules = load_rules()\n",
+                "\n",
+                "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
+                "    #\n",
+                "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
+                "    #\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
+                "        cmd = cmd.replace(\"\\n\", \" \")\n",
+                "\n",
+                "    # shlex.split is required on bash and for Windows paths with spaces\n",
+                "    #\n",
+                "    cmd_actual = shlex.split(cmd)\n",
+                "\n",
+                "    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
+                "    #\n",
+                "    user_provided_exe_name = cmd_actual[0].lower()\n",
+                "\n",
+                "    # When running python, use the python in the ADS sandbox ({sys.executable})\n",
+                "    #\n",
+                "    if cmd.startswith(\"python \"):\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
+                "\n",
+                "        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
+                "        # with:\n",
+                "        #\n",
+                "        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
+                "        #\n",
+                "        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
+                "        #\n",
+                "        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
+                "            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
+                "\n",
+                "    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
+                "    #\n",
+                "    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
+                "        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
+                "\n",
+                "    # To aid supportability, determine which binary file will actually be executed on the machine\n",
+                "    #\n",
+                "    which_binary = None\n",
+                "\n",
+                "    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n",
+                "    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n",
+                "    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n",
+                "    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
+                "    # look for the 2nd installation of CURL in the path)\n",
+                "    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
+                "        path = os.getenv('PATH')\n",
+                "        for p in path.split(os.path.pathsep):\n",
+                "            p = os.path.join(p, \"curl.exe\")\n",
+                "            if os.path.exists(p) and os.access(p, os.X_OK):\n",
+                "                if p.lower().find(\"system32\") == -1:\n",
+                "                    cmd_actual[0] = p\n",
+                "                    which_binary = p\n",
+                "                    break\n",
+                "\n",
+                "    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
+                "    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n",
+                "    #\n",
+                "    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        which_binary = shutil.which(cmd_actual[0])\n",
+                "\n",
+                "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+                "    #\n",
+                "    if which_binary == None:\n",
+                "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+                "        print(sys.path)\n",
+                "\n",
+                "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
+                "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
+                "    else:   \n",
+                "        cmd_actual[0] = which_binary\n",
+                "\n",
+                "    start_time = datetime.datetime.now().replace(microsecond=0)\n",
+                "\n",
+                "    print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
+                "    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
+                "    print(f\"       cwd: {os.getcwd()}\")\n",
+                "\n",
+                "    # Command-line tools such as CURL and AZDATA HDFS commands output\n",
+                "    # scrolling progress bars, which causes Jupyter to hang forever, to\n",
+                "    # workaround this, use no_output=True\n",
+                "    #\n",
+                "\n",
+                "\n",
+                "    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
+                "    #\n",
+                "    wait = True \n",
+                "\n",
+                "    try:\n",
+                "        if no_output:\n",
+                "            p = Popen(cmd_actual)\n",
+                "        else:\n",
+                "            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
+                "            with p.stdout:\n",
+                "                for line in iter(p.stdout.readline, b''):\n",
+                "                    line = line.decode()\n",
+                "                    if return_output:\n",
+                "                        output = output + line\n",
+                "                    else:\n",
+                "                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
+                "                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n",
+                "                            match = regex.match(line)\n",
+                "                            if match:\n",
+                "                                if match.group(1).find(\"HTML\") != -1:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
+                "                                else:\n",
+                "                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
+                "\n",
+                "                                    wait = False\n",
+                "                                    break # otherwise infinite hang, have not worked out why yet.\n",
+                "                        else:\n",
+                "                            print(line, end='')\n",
+                "                            if rules is not None:\n",
+                "                                apply_expert_rules(line)\n",
+                "\n",
+                "        if wait:\n",
+                "            p.wait()\n",
+                "    except FileNotFoundError as e:\n",
+                "        if install_hint is not None:\n",
+                "            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
+                "\n",
+                "        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
+                "\n",
+                "    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
+                "\n",
+                "    if not no_output:\n",
+                "        for line in iter(p.stderr.readline, b''):\n",
+                "            try:\n",
+                "                line_decoded = line.decode()\n",
+                "            except UnicodeDecodeError:\n",
+                "                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
+                "                #\n",
+                "                #   \\xa0\n",
+                "                #\n",
+                "                # For example see this in the response from `az group create`:\n",
+                "                #\n",
+                "                # ERROR: Get Token request returned http error: 400 and server \n",
+                "                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
+                "                # The refresh token has expired due to inactivity.\\xa0The token was \n",
+                "                # issued on 2018-10-25T23:35:11.9832872Z\n",
+                "                #\n",
+                "                # which generates the exception:\n",
+                "                #\n",
+                "                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
+                "                #\n",
+                "                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
+                "                print(line)\n",
+                "                line_decoded = \"\"\n",
+                "                pass\n",
+                "            else:\n",
+                "\n",
+                "                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
+                "                # print this empty \"ERR:\" as it confuses.\n",
+                "                #\n",
+                "                if line_decoded == \"\":\n",
+                "                    continue\n",
+                "                \n",
+                "                print(f\"STDERR: {line_decoded}\", end='')\n",
+                "\n",
+                "                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
+                "                    exit_code_workaround = 1\n",
+                "\n",
+                "                # inject HINTs to next TSG/SOP based on output in stderr\n",
+                "                #\n",
+                "                if user_provided_exe_name in error_hints:\n",
+                "                    for error_hint in error_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(error_hint[0]) != -1:\n",
+                "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
+                "\n",
+                "                # apply expert rules (to run follow-on notebooks), based on output\n",
+                "                #\n",
+                "                if rules is not None:\n",
+                "                    apply_expert_rules(line_decoded)\n",
+                "\n",
+                "                # Verify if a transient error, if so automatically retry (recursive)\n",
+                "                #\n",
+                "                if user_provided_exe_name in retry_hints:\n",
+                "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
+                "                        if line_decoded.find(retry_hint) != -1:\n",
+                "                            if retry_count < MAX_RETRIES:\n",
+                "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
+                "                                retry_count = retry_count + 1\n",
+                "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
+                "\n",
+                "                                if return_output:\n",
+                "                                    if base64_decode:\n",
+                "                                        import base64\n",
+                "\n",
+                "                                        return base64.b64decode(output).decode('utf-8')\n",
+                "                                    else:\n",
+                "                                        return output\n",
+                "\n",
+                "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
+                "\n",
+                "    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
+                "    # don't wait here, if success known above\n",
+                "    #\n",
+                "    if wait: \n",
+                "        if p.returncode != 0:\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
+                "    else:\n",
+                "        if exit_code_workaround !=0 :\n",
+                "            raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
+                "\n",
+                "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
+                "\n",
+                "    if return_output:\n",
+                "        if base64_decode:\n",
+                "            import base64\n",
+                "\n",
+                "            return base64.b64decode(output).decode('utf-8')\n",
+                "        else:\n",
+                "            return output\n",
+                "\n",
+                "def load_json(filename):\n",
+                "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
+                "\n",
+                "    with open(filename, encoding=\"utf8\") as json_file:\n",
+                "        return json.load(json_file)\n",
+                "\n",
+                "def load_rules():\n",
+                "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
+                "\n",
+                "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
+                "    #\n",
+                "    try:\n",
+                "        j = load_json(\"tsg083-run-kubectl-cluster-info-dump.ipynb\")\n",
+                "    except:\n",
+                "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
+                "    else:\n",
+                "        if \"metadata\" in j and \\\n",
+                "            \"azdata\" in j[\"metadata\"] and \\\n",
+                "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
+                "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
+                "\n",
+                "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
+                "\n",
+                "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
+                "\n",
+                "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
+                "\n",
+                "            return rules\n",
+                "\n",
+                "def apply_expert_rules(line):\n",
+                "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
+                "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
+                "\n",
+                "    global rules\n",
+                "\n",
+                "    for rule in rules:\n",
+                "        notebook = rule[1]\n",
+                "        cell_type = rule[2]\n",
+                "        output_type = rule[3] # i.e. stream or error\n",
+                "        output_type_name = rule[4] # i.e. ename or name \n",
+                "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
+                "        details_name = rule[6]  # i.e. evalue or text \n",
+                "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
+                "\n",
+                "        if debug_logging:\n",
+                "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
+                "\n",
+                "        if re.match(expression, line, re.DOTALL):\n",
+                "\n",
+                "            if debug_logging:\n",
+                "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
+                "\n",
+                "            match_found = True\n",
+                "\n",
+                "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+                "\n",
+                "\n",
+                "\n",
+                "print('Common functions defined successfully.')\n",
+                "\n",
+                "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+                "#\n",
+                "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n",
+                "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n",
+                "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "### Run cluster-info dump"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "run(f'kubectl cluster-info dump')"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "print('Notebook execution complete.')"
+            ]
+        }
+    ],
+    "nbformat": 4,
+    "nbformat_minor": 5,
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3"
+        },
+        "azdata": {
+            "side_effects": false
+        }
+    }
+}

部分文件因为文件数量过多而无法显示