|
@@ -405,6 +405,7 @@ v2.2.2 - 10/26/2017 - Corrected auto soft NUMA reporting wrong status (thanks Bj
|
|
|
v2.2.2.1 - 1/11/2018 - Fixed issues with unicode characters (thanks Brent Ozar);
|
|
|
Fixed max server memory calculations;
|
|
|
Added check for Database Health Detection in Server_checks section (thanks Anders Uhl Pedersen).
|
|
|
+v2.2.3 - 10/27/2018 - Fixed performance checks duplicate results issue on SQL 2016+.
|
|
|
|
|
|
PURPOSE: Checks SQL Server in scope for some of most common skewed Best Practices. Valid from SQL Server 2005 onwards.
|
|
|
|
|
@@ -8076,7 +8077,32 @@ WHERE (cntr_type = 272696576 OR cntr_type = 1073874176 OR cntr_type = 1073939712
|
|
|
HAVING (t2.wait_time_ms-t1.wait_time_ms) > 0
|
|
|
ORDER BY wait_time_s DESC;
|
|
|
|
|
|
- SELECT 'Performance_checks' AS [Category], 'Waits_Last_' + CONVERT(VARCHAR(3), @duration) + 's' AS [Information], W1.wait_type,
|
|
|
+ -- SOS_SCHEDULER_YIELD = Might indicate CPU pressure if very high overall percentage. Check yielding conditions in http://technet.microsoft.com/en-us/library/cc917684.aspx
|
|
|
+ -- THREADPOOL = Look for high blocking or contention problems with workers. This will not show up in sys.dm_exec_requests;
|
|
|
+ -- LATCH = indicates contention for access to some non-page structures. ACCESS_METHODS_DATASET_PARENT, ACCESS_METHODS_SCAN_RANGE_GENERATOR or NESTING_TRANSACTION_FULL latches indicate parallelism issues;
|
|
|
+ -- PAGELATCH = indicates contention for access to in-memory copies of pages, like PFS, SGAM and GAM;
|
|
|
+ -- PAGELATCH_UP = Does the filegroup have enough files? Contention in PFS?
|
|
|
+ -- PAGELATCH_EX = Contention while doing many UPDATE statements against small tables?
|
|
|
+ -- PAGELATCH_EX = Many concurrent INSERT statements into a table that has an index on an IDENTITY or NEWSEQUENTIALID column? -> http://blogs.msdn.com/b/blogdoezequiel/archive/2013/05/23/pagelatch-ex-waits-and-heavy-inserts.aspx
|
|
|
+ -- PAGEIOLATCH = indicates IO problems, or BP pressure.
|
|
|
+ -- PREEMPTIVE_OS_WRITEFILEGATHERER (2008+) = usually autogrow scenarios, usually together with WRITELOG;
|
|
|
+ -- IO_COMPLETION = usually TempDB spilling;
|
|
|
+ -- ASYNC_IO_COMPLETION = usually when not using IFI, or waiting on backups.
|
|
|
+ -- DISKIO_SUSPEND = High wait times here indicate the SNAPSHOT BACKUP may be taking longer than expected. Typically the delay is within the VDI application perform the snapshot backup;
|
|
|
+ -- BACKUPIO = check for slow backup media slow, like Tapes or Disks;
|
|
|
+ -- BACKUPBUFFER = usually when backing up to Tape;
|
|
|
+ -- Check sys.dm_os_waiting_tasks for Exchange wait types in http://technet.microsoft.com/en-us/library/ms188743.aspx;
|
|
|
+ -- Wait Resource e_waitPipeNewRow in CXPACKET waits Producer waiting on consumer for a packet to fill;
|
|
|
+ -- Wait Resource e_waitPipeGetRow in CXPACKET waits Consumer waiting on producer to fill a packet;
|
|
|
+ -- CXPACKET = if OLTP, check for parallelism issues if above 20 pct. If combined with a high number of PAGEIOLATCH_XX waits, it could be large parallel table scans going on because of incorrect non-clustered indexes, or out-of-date statistics causing a bad query plan;
|
|
|
+ -- WRITELOG = log management system waiting for a log flush to disk. Examine the IO latency for the log file
|
|
|
+ -- CMEMTHREAD = indicates that the rate of insertion of entries into the plan cache is very high and there is contention -> http://blogs.msdn.com/b/psssql/archive/2012/12/20/how-it-works-cmemthread-and-debugging-them.aspx
|
|
|
+ -- SOS_RESERVEDMEMBLOCKLIST = look for procedures with a large number of parameters, or queries with a long list of expression values specified in an IN clause, which would require multi-page allocations
|
|
|
+ -- RESOURCE_SEMAPHORE_SMALL_QUERY or RESOURCE_SEMAPHORE = queries are waiting for execution memory. Look for plans with excessive hashing or sorts.
|
|
|
+ -- RESOURCE_SEMAPHORE_QUERY_COMPILE = usually high compilation or recompilation scenario (higher ratio of prepared plans vs. compiled plans). On x64 usually memory hungry queries and compiles. On x86 perhaps short on VAS. -> http://technet.microsoft.com/en-us/library/cc293620.aspx
|
|
|
+ -- DBMIRROR_DBM_MUTEX = indicates contention for the send buffer that database mirroring shares between all the mirroring sessions.
|
|
|
+
|
|
|
+ SELECT 'Performance_checks' AS [Category], 'Waits_Last_' + CONVERT(VARCHAR(3), @duration_in) + 's' AS [Information], W1.wait_type,
|
|
|
CAST(W1.wait_time_s AS DECIMAL(12, 2)) AS wait_time_s,
|
|
|
CAST(W1.signal_wait_time_s AS DECIMAL(12, 2)) AS signal_wait_time_s,
|
|
|
CAST(W1.resource_wait_time_s AS DECIMAL(12, 2)) AS resource_wait_time_s,
|
|
@@ -8098,7 +8124,6 @@ WHERE (cntr_type = 272696576 OR cntr_type = 1073874176 OR cntr_type = 1073939712
|
|
|
WHEN W1.wait_type LIKE N'CLR%' OR W1.wait_type LIKE N'SQLCLR%' THEN N'SQL CLR'
|
|
|
WHEN W1.wait_type LIKE N'DBMIRROR%' OR W1.wait_type = N'MIRROR_SEND_MESSAGE' THEN N'Mirroring'
|
|
|
WHEN W1.wait_type LIKE N'XACT%' or W1.wait_type LIKE N'DTC%' or W1.wait_type LIKE N'TRAN_MARKLATCH_%' or W1.wait_type LIKE N'MSQL_XACT_%' or W1.wait_type = N'TRANSACTION_MUTEX' THEN N'Transaction'
|
|
|
- --WHEN W1.wait_type LIKE N'SLEEP_%' or W1.wait_type IN (N'LAZYWRITER_SLEEP', N'SQLTRACE_BUFFER_FLUSH', N'SQLTRACE_INCREMENTAL_FLUSH_SLEEP', N'SQLTRACE_WAIT_ENTRIES', N'FT_IFTS_SCHEDULER_IDLE_WAIT', N'XE_DISPATCHER_WAIT', N'REQUEST_FOR_DEADLOCK_SEARCH', N'LOGMGR_QUEUE', N'ONDEMAND_TASK_QUEUE', N'CHECKPOINT_QUEUE', N'XE_TIMER_EVENT') THEN N'Idle'
|
|
|
WHEN W1.wait_type LIKE N'PREEMPTIVE_%' THEN N'External APIs or XPs'
|
|
|
WHEN W1.wait_type LIKE N'BROKER_%' AND W1.wait_type <> N'BROKER_RECEIVE_WAITFOR' THEN N'Service Broker'
|
|
|
WHEN W1.wait_type IN (N'LOGMGR', N'LOGBUFFER', N'LOGMGR_RESERVE_APPEND', N'LOGMGR_FLUSH', N'LOGMGR_PMM_LOG', N'CHKPT', N'WRITELOG') THEN N'Tran Log IO'
|
|
@@ -8111,7 +8136,6 @@ WHERE (cntr_type = 272696576 OR cntr_type = 1073874176 OR cntr_type = 1073939712
|
|
|
WHEN W1.wait_type IN (N'BACKUPIO', N'BACKUPBUFFER') THEN 'Backup IO'
|
|
|
WHEN W1.wait_type LIKE N'SE_REPL_%' or W1.wait_type LIKE N'REPL_%' or W1.wait_type IN (N'REPLICA_WRITES', N'FCB_REPLICA_WRITE', N'FCB_REPLICA_READ', N'PWAIT_HADRSIM') THEN N'Replication'
|
|
|
WHEN W1.wait_type IN (N'LOG_RATE_GOVERNOR', N'POOL_LOG_RATE_GOVERNOR', N'HADR_THROTTLE_LOG_RATE_GOVERNOR', N'INSTANCE_LOG_RATE_GOVERNOR') THEN N'Log Rate Governor'
|
|
|
- -- WHEN W1.wait_type LIKE N'SLEEP_%' OR W1.wait_type IN(N'LAZYWRITER_SLEEP', N'SQLTRACE_BUFFER_FLUSH', N'WAITFOR', N'WAIT_FOR_RESULTS', N'SQLTRACE_INCREMENTAL_FLUSH_SLEEP', N'SLEEP_TASK', N'SLEEP_SYSTEMTASK') THEN N'Sleep'
|
|
|
WHEN W1.wait_type = N'REPLICA_WRITE' THEN 'Snapshots'
|
|
|
WHEN W1.wait_type = N'WAIT_XTP_OFFLINE_CKPT_LOG_IO' OR W1.wait_type = N'WAIT_XTP_CKPT_CLOSE' THEN 'In-Memory OLTP Logging'
|
|
|
WHEN W1.wait_type LIKE N'QDS%' THEN N'Query Store'
|
|
@@ -8120,9 +8144,12 @@ WHERE (cntr_type = 272696576 OR cntr_type = 1073874176 OR cntr_type = 1073939712
|
|
|
WHEN W1.wait_type LIKE N'COLUMNSTORE%' THEN N'Columnstore'
|
|
|
ELSE N'Other' END AS 'wait_category'
|
|
|
FROM #tblFinalWaits AS W1 INNER JOIN #tblFinalWaits AS W2 ON W2.rn <= W1.rn
|
|
|
- GROUP BY W1.rn, W1.wait_type, W1.wait_time_s, W1.pct, W1.signal_wait_time_s, W1.resource_wait_time_s, W1.signal_wait_pct, W1.resource_wait_pct
|
|
|
- HAVING W1.wait_time_s >= 0.01 AND (SUM(W2.pct)-W1.pct) < 100 -- percentage threshold
|
|
|
- ORDER BY W1.rn;
|
|
|
+ GROUP BY W1.rn, W1.wait_type, CAST(W1.wait_time_s AS DECIMAL(12, 2)), CAST(W1.pct AS DECIMAL(12, 2)), CAST(W1.signal_wait_time_s AS DECIMAL(12, 2)), CAST(W1.resource_wait_time_s AS DECIMAL(12, 2)), CAST(W1.signal_wait_pct AS DECIMAL(12, 2)), CAST(W1.resource_wait_pct AS DECIMAL(12, 2))
|
|
|
+ HAVING CAST(W1.wait_time_s as DECIMAL(12, 2)) >= 0.01 AND (SUM(W2.pct)-CAST(W1.pct AS DECIMAL(12, 2))) < 100 -- percentage threshold
|
|
|
+ ORDER BY W1.rn
|
|
|
+
|
|
|
+ SET @params = N'@maxservermemIN bigint, @minservermemIN bigint, @systemmemIN bigint, @systemfreememIN bigint, @commit_targetIN bigint, @committedIN bigint';
|
|
|
+ EXECUTE sp_executesql @sqlcmd, @params, @maxservermemIN=@maxservermem
|
|
|
|
|
|
;WITH Waits AS
|
|
|
(SELECT wait_type, wait_time_ms / 1000. AS wait_time_s,
|
|
@@ -8152,19 +8179,11 @@ WHERE (cntr_type = 272696576 OR cntr_type = 1073874176 OR cntr_type = 1073939712
|
|
|
CAST(SUM(W2.pct) AS DECIMAL(12, 2)) AS overall_running_pct,
|
|
|
CAST(W1.signal_wait_pct AS DECIMAL(12, 2)) AS signal_wait_pct,
|
|
|
CAST(W1.resource_wait_pct AS DECIMAL(12, 2)) AS resource_wait_pct,
|
|
|
- -- SOS_SCHEDULER_YIELD = Might indicate CPU pressure if very high overall percentage. Check yielding conditions in http://technet.microsoft.com/en-us/library/cc917684.aspx
|
|
|
CASE WHEN W1.wait_type = N'SOS_SCHEDULER_YIELD' THEN N'CPU'
|
|
|
- -- THREADPOOL = Look for high blocking or contention problems with workers. This will not show up in sys.dm_exec_requests;
|
|
|
WHEN W1.wait_type = N'THREADPOOL' THEN 'CPU - Unavailable Worker Threads'
|
|
|
WHEN W1.wait_type LIKE N'LCK_%' OR W1.wait_type = N'LOCK' THEN N'Lock'
|
|
|
- -- LATCH = indicates contention for access to some non-page structures. ACCESS_METHODS_DATASET_PARENT, ACCESS_METHODS_SCAN_RANGE_GENERATOR or NESTING_TRANSACTION_FULL latches indicate parallelism issues;
|
|
|
WHEN W1.wait_type LIKE N'LATCH_%' THEN N'Latch'
|
|
|
- -- PAGELATCH = indicates contention for access to in-memory copies of pages, like PFS, SGAM and GAM;
|
|
|
- -- PAGELATCH_UP = Does the filegroup have enough files? Contention in PFS?
|
|
|
- -- PAGELATCH_EX = Contention while doing many UPDATE statements against small tables?
|
|
|
- -- PAGELATCH_EX = Many concurrent INSERT statements into a table that has an index on an IDENTITY or NEWSEQUENTIALID column? -> http://blogs.msdn.com/b/blogdoezequiel/archive/2013/05/23/pagelatch-ex-waits-and-heavy-inserts.aspx
|
|
|
WHEN W1.wait_type LIKE N'PAGELATCH_%' THEN N'Buffer Latch'
|
|
|
- -- PAGEIOLATCH = indicates IO problems, or BP pressure.
|
|
|
WHEN W1.wait_type LIKE N'PAGEIOLATCH_%' THEN N'Buffer IO'
|
|
|
WHEN W1.wait_type LIKE N'HADR_SYNC_COMMIT' THEN N'Always On - Secondary Synch'
|
|
|
WHEN W1.wait_type LIKE N'HADR_%' OR W1.wait_type LIKE N'PWAIT_HADR_%' THEN N'Always On'
|
|
@@ -8174,8 +8193,6 @@ WHERE (cntr_type = 272696576 OR cntr_type = 1073874176 OR cntr_type = 1073939712
|
|
|
WHEN W1.wait_type LIKE N'CLR%' OR W1.wait_type LIKE N'SQLCLR%' THEN N'SQL CLR'
|
|
|
WHEN W1.wait_type LIKE N'DBMIRROR%' OR W1.wait_type = N'MIRROR_SEND_MESSAGE' THEN N'Mirroring'
|
|
|
WHEN W1.wait_type LIKE N'XACT%' or W1.wait_type LIKE N'DTC%' or W1.wait_type LIKE N'TRAN_MARKLATCH_%' or W1.wait_type LIKE N'MSQL_XACT_%' or W1.wait_type = N'TRANSACTION_MUTEX' THEN N'Transaction'
|
|
|
- --WHEN W1.wait_type LIKE N'SLEEP_%' or W1.wait_type IN (N'LAZYWRITER_SLEEP', N'SQLTRACE_BUFFER_FLUSH', N'SQLTRACE_INCREMENTAL_FLUSH_SLEEP', N'SQLTRACE_WAIT_ENTRIES', N'FT_IFTS_SCHEDULER_IDLE_WAIT', N'XE_DISPATCHER_WAIT', N'REQUEST_FOR_DEADLOCK_SEARCH', N'LOGMGR_QUEUE', N'ONDEMAND_TASK_QUEUE', N'CHECKPOINT_QUEUE', N'XE_TIMER_EVENT') THEN N'Idle'
|
|
|
- -- PREEMPTIVE_OS_WRITEFILEGATHERER (2008+) = usually autogrow scenarios, usually together with WRITELOG;
|
|
|
WHEN W1.wait_type LIKE N'PREEMPTIVE_%' THEN N'External APIs or XPs' -- Used to indicate a worker is running code that is not under the SQLOS Scheduling;
|
|
|
WHEN W1.wait_type LIKE N'BROKER_%' AND W1.wait_type <> N'BROKER_RECEIVE_WAITFOR' THEN N'Service Broker'
|
|
|
WHEN W1.wait_type IN (N'LOGMGR', N'LOGBUFFER', N'LOGMGR_RESERVE_APPEND', N'LOGMGR_FLUSH', N'LOGMGR_PMM_LOG', N'CHKPT', N'WRITELOG') THEN N'Tran Log IO'
|
|
@@ -8184,31 +8201,16 @@ WHERE (cntr_type = 272696576 OR cntr_type = 1073874176 OR cntr_type = 1073939712
|
|
|
WHEN W1.wait_type IN (N'WAITFOR', N'WAIT_FOR_RESULTS', N'BROKER_RECEIVE_WAITFOR') THEN N'User Wait'
|
|
|
WHEN W1.wait_type IN (N'TRACEWRITE', N'SQLTRACE_LOCK', N'SQLTRACE_FILE_BUFFER', N'SQLTRACE_FILE_WRITE_IO_COMPLETION', N'SQLTRACE_FILE_READ_IO_COMPLETION', N'SQLTRACE_PENDING_BUFFER_WRITERS', N'SQLTRACE_SHUTDOWN', N'QUERY_TRACEOUT', N'TRACE_EVTNOTIF') THEN N'Tracing'
|
|
|
WHEN W1.wait_type LIKE N'FT_%' OR W1.wait_type IN (N'FULLTEXT GATHERER', N'MSSEARCH', N'PWAIT_RESOURCE_SEMAPHORE_FT_PARALLEL_QUERY_SYNC') THEN N'Full Text Search'
|
|
|
- -- IO_COMPLETION = usually TempDB spilling;
|
|
|
- -- ASYNC_IO_COMPLETION = usually when not using IFI, or waiting on backups.
|
|
|
- -- DISKIO_SUSPEND = High wait times here indicate the SNAPSHOT BACKUP may be taking longer than expected. Typically the delay is within the VDI application perform the snapshot backup;
|
|
|
WHEN W1.wait_type IN (N'ASYNC_IO_COMPLETION', N'IO_COMPLETION', N'WRITE_COMPLETION', N'IO_QUEUE_LIMIT', /*N'HADR_FILESTREAM_IOMGR_IOCOMPLETION',*/ N'IO_RETRY') THEN N'Other Disk IO'
|
|
|
- -- BACKUPIO = check for slow backup media slow, like Tapes or Disks;
|
|
|
- -- BACKUPBUFFER = usually when backing up to Tape;
|
|
|
WHEN W1.wait_type IN(N'BACKUPIO', N'BACKUPBUFFER') THEN 'Backup IO'
|
|
|
- -- Check sys.dm_os_waiting_tasks for Exchange wait types in http://technet.microsoft.com/en-us/library/ms188743.aspx;
|
|
|
- -- Wait Resource e_waitPipeNewRow in CXPACKET waits Producer waiting on consumer for a packet to fill;
|
|
|
- -- Wait Resource e_waitPipeGetRow in CXPACKET waits Consumer waiting on producer to fill a packet;
|
|
|
- -- CXPACKET = if OLTP, check for parallelism issues if above 20 pct. If combined with a high number of PAGEIOLATCH_XX waits, it could be large parallel table scans going on because of incorrect non-clustered indexes, or out-of-date statistics causing a bad query plan;
|
|
|
WHEN W1.wait_type IN (N'CXPACKET', N'EXCHANGE', N'CXCONSUMER') THEN N'CPU - Parallelism'
|
|
|
- -- WRITELOG = log management system waiting for a log flush to disk. Examine the IO latency for the log file
|
|
|
WHEN W1.wait_type IN (N'LOGMGR', N'LOGBUFFER', N'LOGMGR_RESERVE_APPEND', N'LOGMGR_FLUSH', N'WRITELOG') THEN N'Logging'
|
|
|
WHEN W1.wait_type IN (N'NET_WAITFOR_PACKET',N'NETWORK_IO') THEN N'Network IO'
|
|
|
WHEN W1.wait_type = N'ASYNC_NETWORK_IO' THEN N'Client Network IO'
|
|
|
- -- CMEMTHREAD = indicates that the rate of insertion of entries into the plan cache is very high and there is contention -> http://blogs.msdn.com/b/psssql/archive/2012/12/20/how-it-works-cmemthread-and-debugging-them.aspx
|
|
|
- -- SOS_RESERVEDMEMBLOCKLIST = look for procedures with a large number of parameters, or queries with a long list of expression values specified in an IN clause, which would require multi-page allocations
|
|
|
WHEN W1.wait_type IN (N'UTIL_PAGE_ALLOC',N'SOS_VIRTUALMEMORY_LOW',N'CMEMTHREAD', N'SOS_RESERVEDMEMBLOCKLIST') THEN N'Memory'
|
|
|
- -- RESOURCE_SEMAPHORE_SMALL_QUERY or RESOURCE_SEMAPHORE = queries are waiting for execution memory. Look for plans with excessive hashing or sorts.
|
|
|
WHEN W1.wait_type IN (N'RESOURCE_SEMAPHORE_SMALL_QUERY', N'RESOURCE_SEMAPHORE') THEN N'Memory - Hash or Sort'
|
|
|
- -- RESOURCE_SEMAPHORE_QUERY_COMPILE = usually high compilation or recompilation scenario (higher ratio of prepared plans vs. compiled plans). On x64 usually memory hungry queries and compiles. On x86 perhaps short on VAS. -> http://technet.microsoft.com/en-us/library/cc293620.aspx
|
|
|
WHEN W1.wait_type LIKE N'RESOURCE_SEMAPHORE_%' OR W1.wait_type LIKE N'RESOURCE_SEMAPHORE_QUERY_COMPILE' THEN N'Memory - Compilation'
|
|
|
WHEN W1.wait_type LIKE N'CLR_%' OR W1.wait_type LIKE N'SQLCLR%' THEN N'CLR'
|
|
|
- -- DBMIRROR_DBM_MUTEX = indicates contention for the send buffer that database mirroring shares between all the mirroring sessions.
|
|
|
WHEN W1.wait_type LIKE N'DBMIRROR%' OR W1.wait_type = N'MIRROR_SEND_MESSAGE' THEN N'Mirroring'
|
|
|
WHEN W1.wait_type LIKE N'RESOURCE_SEMAPHORE_%' OR W1.wait_type LIKE N'RESOURCE_SEMAPHORE_QUERY_COMPILE' THEN N'Compilation'
|
|
|
WHEN W1.wait_type LIKE N'XACT%' OR W1.wait_type LIKE N'DTC_%' OR W1.wait_type LIKE N'TRAN_MARKLATCH_%' OR W1.wait_type LIKE N'MSQL_XACT_%' OR W1.wait_type = N'TRANSACTION_MUTEX' THEN N'Transaction'
|
|
@@ -8221,8 +8223,8 @@ WHERE (cntr_type = 272696576 OR cntr_type = 1073874176 OR cntr_type = 1073939712
|
|
|
WHEN W1.wait_type LIKE N'COLUMNSTORE%' THEN N'Columnstore'
|
|
|
ELSE N'Other' END AS 'wait_category'
|
|
|
FROM Waits AS W1 INNER JOIN Waits AS W2 ON W2.rn <= W1.rn
|
|
|
- GROUP BY W1.rn, W1.wait_type, W1.wait_time_s, W1.pct, W1.signal_wait_time_s, W1.resource_wait_time_s, W1.signal_wait_pct, W1.resource_wait_pct
|
|
|
- HAVING W1.wait_time_s >= 0.01 AND (SUM(W2.pct)-W1.pct) < 100 -- percentage threshold
|
|
|
+ GROUP BY W1.rn, W1.wait_type, CAST(W1.wait_time_s AS DECIMAL(12, 2)), CAST(W1.pct AS DECIMAL(12, 2)), CAST(W1.signal_wait_time_s AS DECIMAL(12, 2)), CAST(W1.resource_wait_time_s AS DECIMAL(12, 2)), CAST(W1.signal_wait_pct AS DECIMAL(12, 2)), CAST(W1.resource_wait_pct AS DECIMAL(12, 2))
|
|
|
+ HAVING CAST(W1.wait_time_s as DECIMAL(12, 2)) >= 0.01 AND (SUM(W2.pct)-CAST(W1.pct AS DECIMAL(12, 2))) < 100 -- percentage threshold
|
|
|
ORDER BY W1.rn;
|
|
|
|
|
|
;WITH cteLatches1 (latch_class,wait_time_ms,waiting_requests_count) AS (SELECT latch_class,wait_time_ms,waiting_requests_count FROM #tblLatches WHERE [retrieval_time] = @minctr),
|
|
@@ -8238,6 +8240,15 @@ WHERE (cntr_type = 272696576 OR cntr_type = 1073874176 OR cntr_type = 1073939712
|
|
|
HAVING (t2.wait_time_ms-t1.wait_time_ms) > 0
|
|
|
ORDER BY wait_time_s DESC;
|
|
|
|
|
|
+ -- ACCESS_METHODS_HOBT_VIRTUAL_ROOT = This latch is used to access the metadata for an index that contains the page ID of the index's root page. Contention on this latch can occur when a B-tree root page split occurs (requiring the latch in EX mode) and threads wanting to navigate down the B-tree (requiring the latch in SH mode) have to wait. This could be from very fast population of a small index using many concurrent connections, with or without page splits from random key values causing cascading page splits (from leaf to root).
|
|
|
+ -- ACCESS_METHODS_HOBT_COUNT = This latch is used to flush out page and row count deltas for a HoBt (Heap-or-B-tree) to the Storage Engine metadata tables. Contention would indicate *lots* of small, concurrent DML operations on a single table.
|
|
|
+ -- ACCESS_METHODS_DATASET_PARENT and ACCESS_METHODS_SCAN_RANGE_GENERATOR = These two latches are used during parallel scans to give each thread a range of page IDs to scan. The LATCH_XX waits for these latches will typically appear with CXPACKET waits and PAGEIOLATCH_XX waits (if the data being scanned is not memory-resident). Use normal parallelism troubleshooting methods to investigate further (e.g. is the parallelism warranted? maybe increase 'cost threshold for parallelism', lower MAXDOP, use a MAXDOP hint, use Resource Governor to limit DOP using a workload group with a MAX_DOP limit. Did a plan change from index seeks to parallel table scans because a tipping point was reached or a plan recompiled with an atypical SP parameter or poor statistics? Do NOT knee-jerk and set server MAXDOP to 1 – that's some of the worst advice I see on the Internet.);
|
|
|
+ -- NESTING_TRANSACTION_FULL = This latch, along with NESTING_TRANSACTION_READONLY, is used to control access to transaction description structures (called an XDES) for parallel nested transactions. The _FULL is for a transaction that's 'active', i.e. it's changed the database (usually for an index build/rebuild), and that makes the _READONLY description obvious. A query that involves a parallel operator must start a sub-transaction for each parallel thread that is used – these transactions are sub-transactions of the parallel nested transaction. For contention on these, I'd investigate unwanted parallelism but I don't have a definite "it's usually this problem". Also check out the comments for some info about these also sometimes being a problem when RCSI is used.
|
|
|
+ -- LOG_MANAGER = you see this latch it is almost certainly because a transaction log is growing because it could not clear/truncate for some reason. Find the database where the log is growing and then figure out what's preventing log clearing using sys.databases.
|
|
|
+ -- DBCC_MULTIOBJECT_SCANNER = This latch appears on Enterprise Edition when DBCC CHECK_ commands are allowed to run in parallel. It is used by threads to request the next data file page to process. Late last year this was identified as a major contention point inside DBCC CHECK* and there was work done to reduce the contention and make DBCC CHECK* run faster.
|
|
|
+ -- http://blogs.msdn.com/b/psssql/archive/2012/02/23/a-faster-checkdb-part-ii.aspx
|
|
|
+ -- FGCB_ADD_REMOVE = FGCB stands for File Group Control Block. This latch is required whenever a file is added or dropped from the filegroup, whenever a file is grown (manually or automatically), when recalculating proportional-fill weightings, and when cycling through the files in the filegroup as part of round-robin allocation. If you're seeing this, the most common cause is that there's a lot of file auto-growth happening. It could also be from a filegroup with lots of file (e.g. the primary filegroup in tempdb) where there are thousands of concurrent connections doing allocations. The proportional-fill weightings are recalculated every 8192 allocations, so there's the possibility of a slowdown with frequent recalculations over many files.
|
|
|
+
|
|
|
SELECT 'Performance_checks' AS [Category], 'Latches_Last_' + CONVERT(VARCHAR(3), @duration) + 's' AS [Information], W1.latch_class,
|
|
|
CAST(W1.wait_time_s AS DECIMAL(14, 2)) AS wait_time_s,
|
|
|
W1.waiting_requests_count,
|
|
@@ -8257,8 +8268,8 @@ WHERE (cntr_type = 272696576 OR cntr_type = 1073874176 OR cntr_type = 1073939712
|
|
|
WHEN W1.latch_class LIKE N'BUFFER' THEN N'[Buffer Pool]'
|
|
|
ELSE N'[Other]' END AS 'latch_category'
|
|
|
FROM #tblFinalLatches AS W1 INNER JOIN #tblFinalLatches AS W2 ON W2.rn <= W1.rn
|
|
|
- GROUP BY W1.rn, W1.latch_class, W1.wait_time_s, W1.waiting_requests_count, W1.pct
|
|
|
- HAVING SUM (W2.pct) - W1.pct < 100; -- percentage threshold
|
|
|
+ GROUP BY W1.rn, W1.latch_class, CAST(W1.wait_time_s AS DECIMAL(12, 2)), W1.waiting_requests_count, CAST(W1.pct AS DECIMAL(12, 2))
|
|
|
+ HAVING SUM(W2.pct) - CAST(W1.pct AS DECIMAL(12, 2)) < 100; -- percentage threshold
|
|
|
|
|
|
;WITH Latches AS
|
|
|
(SELECT latch_class,
|
|
@@ -8276,22 +8287,14 @@ WHERE (cntr_type = 272696576 OR cntr_type = 1073874176 OR cntr_type = 1073939712
|
|
|
CAST(W1.pct AS DECIMAL(14, 2)) AS pct,
|
|
|
CAST(SUM(W1.pct) AS DECIMAL(12, 2)) AS overall_running_pct,
|
|
|
CAST((W1.wait_time_s / W1.waiting_requests_count) AS DECIMAL (14, 4)) AS avg_wait_s,
|
|
|
- -- ACCESS_METHODS_HOBT_VIRTUAL_ROOT = This latch is used to access the metadata for an index that contains the page ID of the index's root page. Contention on this latch can occur when a B-tree root page split occurs (requiring the latch in EX mode) and threads wanting to navigate down the B-tree (requiring the latch in SH mode) have to wait. This could be from very fast population of a small index using many concurrent connections, with or without page splits from random key values causing cascading page splits (from leaf to root).
|
|
|
- -- ACCESS_METHODS_HOBT_COUNT = This latch is used to flush out page and row count deltas for a HoBt (Heap-or-B-tree) to the Storage Engine metadata tables. Contention would indicate *lots* of small, concurrent DML operations on a single table.
|
|
|
CASE WHEN W1.latch_class LIKE N'ACCESS_METHODS_HOBT_COUNT'
|
|
|
OR W1.latch_class LIKE N'ACCESS_METHODS_HOBT_VIRTUAL_ROOT' THEN N'[HoBT - Metadata]'
|
|
|
- -- ACCESS_METHODS_DATASET_PARENT and ACCESS_METHODS_SCAN_RANGE_GENERATOR = These two latches are used during parallel scans to give each thread a range of page IDs to scan. The LATCH_XX waits for these latches will typically appear with CXPACKET waits and PAGEIOLATCH_XX waits (if the data being scanned is not memory-resident). Use normal parallelism troubleshooting methods to investigate further (e.g. is the parallelism warranted? maybe increase 'cost threshold for parallelism', lower MAXDOP, use a MAXDOP hint, use Resource Governor to limit DOP using a workload group with a MAX_DOP limit. Did a plan change from index seeks to parallel table scans because a tipping point was reached or a plan recompiled with an atypical SP parameter or poor statistics? Do NOT knee-jerk and set server MAXDOP to 1 – that's some of the worst advice I see on the Internet.);
|
|
|
- -- NESTING_TRANSACTION_FULL = This latch, along with NESTING_TRANSACTION_READONLY, is used to control access to transaction description structures (called an XDES) for parallel nested transactions. The _FULL is for a transaction that's 'active', i.e. it's changed the database (usually for an index build/rebuild), and that makes the _READONLY description obvious. A query that involves a parallel operator must start a sub-transaction for each parallel thread that is used – these transactions are sub-transactions of the parallel nested transaction. For contention on these, I'd investigate unwanted parallelism but I don't have a definite "it's usually this problem". Also check out the comments for some info about these also sometimes being a problem when RCSI is used.
|
|
|
WHEN W1.latch_class LIKE N'ACCESS_METHODS_DATASET_PARENT'
|
|
|
OR W1.latch_class LIKE N'ACCESS_METHODS_SCAN_RANGE_GENERATOR'
|
|
|
OR W1.latch_class LIKE N'NESTING_TRANSACTION_FULL' THEN N'[Parallelism]'
|
|
|
- -- LOG_MANAGER = you see this latch it is almost certainly because a transaction log is growing because it could not clear/truncate for some reason. Find the database where the log is growing and then figure out what's preventing log clearing using sys.databases.
|
|
|
WHEN W1.latch_class LIKE N'LOG_MANAGER' THEN N'[IO - Log]'
|
|
|
WHEN W1.latch_class LIKE N'TRACE_CONTROLLER' THEN N'[Trace]'
|
|
|
- -- DBCC_MULTIOBJECT_SCANNER = This latch appears on Enterprise Edition when DBCC CHECK_ commands are allowed to run in parallel. It is used by threads to request the next data file page to process. Late last year this was identified as a major contention point inside DBCC CHECK* and there was work done to reduce the contention and make DBCC CHECK* run faster.
|
|
|
- -- http://blogs.msdn.com/b/psssql/archive/2012/02/23/a-faster-checkdb-part-ii.aspx
|
|
|
WHEN W1.latch_class LIKE N'DBCC_MULTIOBJECT_SCANNER ' THEN N'[Parallelism - DBCC CHECK_]'
|
|
|
- -- FGCB_ADD_REMOVE = FGCB stands for File Group Control Block. This latch is required whenever a file is added or dropped from the filegroup, whenever a file is grown (manually or automatically), when recalculating proportional-fill weightings, and when cycling through the files in the filegroup as part of round-robin allocation. If you're seeing this, the most common cause is that there's a lot of file auto-growth happening. It could also be from a filegroup with lots of file (e.g. the primary filegroup in tempdb) where there are thousands of concurrent connections doing allocations. The proportional-fill weightings are recalculated every 8192 allocations, so there's the possibility of a slowdown with frequent recalculations over many files.
|
|
|
WHEN W1.latch_class LIKE N'FGCB_ADD_REMOVE' THEN N'[IO Operations]'
|
|
|
WHEN W1.latch_class LIKE N'DATABASE_MIRRORING_CONNECTION ' THEN N'[Mirroring - Busy]'
|
|
|
WHEN W1.latch_class LIKE N'BUFFER' THEN N'[Buffer Pool - PAGELATCH or PAGEIOLATCH]'
|
|
@@ -8299,8 +8302,8 @@ WHERE (cntr_type = 272696576 OR cntr_type = 1073874176 OR cntr_type = 1073939712
|
|
|
FROM Latches AS W1
|
|
|
INNER JOIN Latches AS W2
|
|
|
ON W2.rn <= W1.rn
|
|
|
- GROUP BY W1.rn, W1.latch_class, W1.wait_time_s, W1.waiting_requests_count, W1.pct
|
|
|
- HAVING SUM (W2.pct) - W1.pct < 100; -- percentage threshold
|
|
|
+ GROUP BY W1.rn, W1.latch_class, CAST(W1.wait_time_s AS DECIMAL(12, 2)), W1.waiting_requests_count, CAST(W1.pct AS DECIMAL(12, 2))
|
|
|
+ HAVING SUM(W2.pct) - CAST(W1.pct AS DECIMAL(12, 2)) < 100; -- percentage threshold
|
|
|
|
|
|
;WITH Latches AS
|
|
|
(SELECT latch_class,
|
|
@@ -8318,22 +8321,14 @@ WHERE (cntr_type = 272696576 OR cntr_type = 1073874176 OR cntr_type = 1073939712
|
|
|
CAST(W1.pct AS DECIMAL(14, 2)) AS pct,
|
|
|
CAST(SUM(W1.pct) AS DECIMAL(12, 2)) AS overall_running_pct,
|
|
|
CAST((W1.wait_time_s / W1.waiting_requests_count) AS DECIMAL (14, 4)) AS avg_wait_s,
|
|
|
- -- ACCESS_METHODS_HOBT_VIRTUAL_ROOT = This latch is used to access the metadata for an index that contains the page ID of the index's root page. Contention on this latch can occur when a B-tree root page split occurs (requiring the latch in EX mode) and threads wanting to navigate down the B-tree (requiring the latch in SH mode) have to wait. This could be from very fast population of a small index using many concurrent connections, with or without page splits from random key values causing cascading page splits (from leaf to root).
|
|
|
- -- ACCESS_METHODS_HOBT_COUNT = This latch is used to flush out page and row count deltas for a HoBt (Heap-or-B-tree) to the Storage Engine metadata tables. Contention would indicate *lots* of small, concurrent DML operations on a single table.
|
|
|
CASE WHEN W1.latch_class LIKE N'ACCESS_METHODS_HOBT_COUNT'
|
|
|
OR W1.latch_class LIKE N'ACCESS_METHODS_HOBT_VIRTUAL_ROOT' THEN N'[HoBT - Metadata]'
|
|
|
- -- ACCESS_METHODS_DATASET_PARENT and ACCESS_METHODS_SCAN_RANGE_GENERATOR = These two latches are used during parallel scans to give each thread a range of page IDs to scan. The LATCH_XX waits for these latches will typically appear with CXPACKET waits and PAGEIOLATCH_XX waits (if the data being scanned is not memory-resident). Use normal parallelism troubleshooting methods to investigate further (e.g. is the parallelism warranted? maybe increase 'cost threshold for parallelism', lower MAXDOP, use a MAXDOP hint, use Resource Governor to limit DOP using a workload group with a MAX_DOP limit. Did a plan change from index seeks to parallel table scans because a tipping point was reached or a plan recompiled with an atypical SP parameter or poor statistics? Do NOT knee-jerk and set server MAXDOP to 1 – that's some of the worst advice I see on the Internet.);
|
|
|
- -- NESTING_TRANSACTION_FULL = This latch, along with NESTING_TRANSACTION_READONLY, is used to control access to transaction description structures (called an XDES) for parallel nested transactions. The _FULL is for a transaction that's 'active', i.e. it's changed the database (usually for an index build/rebuild), and that makes the _READONLY description obvious. A query that involves a parallel operator must start a sub-transaction for each parallel thread that is used – these transactions are sub-transactions of the parallel nested transaction. For contention on these, I'd investigate unwanted parallelism but I don't have a definite "it's usually this problem". Also check out the comments for some info about these also sometimes being a problem when RCSI is used.
|
|
|
WHEN W1.latch_class LIKE N'ACCESS_METHODS_DATASET_PARENT'
|
|
|
OR W1.latch_class LIKE N'ACCESS_METHODS_SCAN_RANGE_GENERATOR'
|
|
|
OR W1.latch_class LIKE N'NESTING_TRANSACTION_FULL' THEN N'[Parallelism]'
|
|
|
- -- LOG_MANAGER = you see this latch it is almost certainly because a transaction log is growing because it could not clear/truncate for some reason. Find the database where the log is growing and then figure out what's preventing log clearing using sys.databases.
|
|
|
WHEN W1.latch_class LIKE N'LOG_MANAGER' THEN N'[IO - Log]'
|
|
|
WHEN W1.latch_class LIKE N'TRACE_CONTROLLER' THEN N'[Trace]'
|
|
|
- -- DBCC_MULTIOBJECT_SCANNER = This latch appears on Enterprise Edition when DBCC CHECK_ commands are allowed to run in parallel. It is used by threads to request the next data file page to process. Late last year this was identified as a major contention point inside DBCC CHECK* and there was work done to reduce the contention and make DBCC CHECK* run faster.
|
|
|
- -- http://blogs.msdn.com/b/psssql/archive/2012/02/23/a-faster-checkdb-part-ii.aspx
|
|
|
WHEN W1.latch_class LIKE N'DBCC_MULTIOBJECT_SCANNER ' THEN N'[Parallelism - DBCC CHECK_]'
|
|
|
- -- FGCB_ADD_REMOVE = FGCB stands for File Group Control Block. This latch is required whenever a file is added or dropped from the filegroup, whenever a file is grown (manually or automatically), when recalculating proportional-fill weightings, and when cycling through the files in the filegroup as part of round-robin allocation. If you're seeing this, the most common cause is that there's a lot of file auto-growth happening. It could also be from a filegroup with lots of file (e.g. the primary filegroup in tempdb) where there are thousands of concurrent connections doing allocations. The proportional-fill weightings are recalculated every 8192 allocations, so there's the possibility of a slowdown with frequent recalculations over many files.
|
|
|
WHEN W1.latch_class LIKE N'FGCB_ADD_REMOVE' THEN N'[IO Operations]'
|
|
|
WHEN W1.latch_class LIKE N'DATABASE_MIRRORING_CONNECTION ' THEN N'[Mirroring - Busy]'
|
|
|
WHEN W1.latch_class LIKE N'BUFFER' THEN N'[Buffer Pool - PAGELATCH or PAGEIOLATCH]'
|
|
@@ -8341,8 +8336,8 @@ WHERE (cntr_type = 272696576 OR cntr_type = 1073874176 OR cntr_type = 1073939712
|
|
|
FROM Latches AS W1
|
|
|
INNER JOIN Latches AS W2
|
|
|
ON W2.rn <= W1.rn
|
|
|
- GROUP BY W1.rn, W1.latch_class, W1.wait_time_s, W1.waiting_requests_count, W1.pct
|
|
|
- HAVING SUM (W2.pct) - W1.pct < 100; -- percentage threshold
|
|
|
+ GROUP BY W1.rn, W1.latch_class, CAST(W1.wait_time_s AS DECIMAL(12, 2)), W1.waiting_requests_count, CAST(W1.pct AS DECIMAL(12, 2))
|
|
|
+ HAVING SUM(W2.pct) - CAST(W1.pct AS DECIMAL(12, 2)) < 100; -- percentage threshold
|
|
|
|
|
|
;WITH cteSpinlocks1 AS (SELECT name, collisions, spins, spins_per_collision, sleep_time, backoffs FROM #tblSpinlocksBefore),
|
|
|
cteSpinlocks2 AS (SELECT name, collisions, spins, spins_per_collision, sleep_time, backoffs FROM #tblSpinlocksAfter)
|
|
@@ -8357,7 +8352,7 @@ WHERE (cntr_type = 272696576 OR cntr_type = 1073874176 OR cntr_type = 1073939712
|
|
|
ROW_NUMBER() OVER(ORDER BY t2.spins DESC) AS rn
|
|
|
FROM cteSpinlocks1 t1 INNER JOIN cteSpinlocks2 t2 ON t1.name = t2.name
|
|
|
GROUP BY t1.name, t1.collisions, t2.collisions, t1.spins, t2.spins, t1.spins_per_collision, t2.spins_per_collision, t1.sleep_time, t2.sleep_time, t1.backoffs, t2.backoffs
|
|
|
- HAVING (t2.spins-t1.spins) > 0
|
|
|
+ HAVING CAST(SUM(t2.spins_pct) AS DECIMAL(12, 2)) - CAST(t1.spins_pct AS DECIMAL(12, 2)) > 0
|
|
|
ORDER BY spins DESC;
|
|
|
|
|
|
SELECT 'Performance_checks' AS [Category], 'Spinlocks_Last_' + CONVERT(VARCHAR(3), @duration) + 's' AS [Information], S1.name,
|
|
@@ -8366,7 +8361,7 @@ WHERE (cntr_type = 272696576 OR cntr_type = 1073874176 OR cntr_type = 1073939712
|
|
|
CAST(SUM(S1.spins_pct) AS DECIMAL(12, 2)) AS overall_running_spins_pct
|
|
|
FROM #tblFinalSpinlocks AS S1 INNER JOIN #tblFinalSpinlocks AS S2 ON S2.rn <= S1.rn
|
|
|
GROUP BY S1.rn, S1.name, S1.collisions, S1.spins, S1.spins_per_collision, S1.sleep_time, S1.backoffs, S1.spins_pct
|
|
|
- HAVING SUM(S2.spins_pct) - S1.spins_pct < 100 -- percentage threshold
|
|
|
+ HAVING CAST(SUM(S2.spins_pct) AS DECIMAL(12, 2)) - CAST(S1.spins_pct AS DECIMAL(12, 2)) < 100 -- percentage threshold
|
|
|
ORDER BY spins DESC;
|
|
|
END;
|
|
|
|