4 gadi atpakaļ · 9b8a551d84
--- a/Utilities/Scripts/update-zstd.bash
+++ b/Utilities/Scripts/update-zstd.bash
@@ -8,7 +8,7 @@ readonly name="zstd"
 
				 readonly ownership="zstd upstream <[email protected]>"
			
 
				 readonly subtree="Utilities/cmzstd"
			
 
				 readonly repo="https://github.com/facebook/zstd.git"
			
 
				-readonly tag="v1.4.5"
			
 
				+readonly tag="v1.5.0"
			
 
				 readonly shortlog=false
			
 
				 readonly paths="
			
 
				   LICENSE
			
@@ -23,7 +23,7 @@ readonly paths="
 
				   lib/deprecated/*.h
			
 
				   lib/dictBuilder/*.c
			
 
				   lib/dictBuilder/*.h
			
 
				-  lib/zstd.h
			
 
				+  lib/*.h
			
 
				 "
			
 
				 
			
 
				 extract_source () {
			
--- a/Utilities/cmzstd/README.md
+++ b/Utilities/cmzstd/README.md
@@ -176,6 +176,12 @@ Going into `build` directory, you will find additional possibilities:
 
				 You can build the zstd binary via buck by executing: `buck build programs:zstd` from the root of the repo.
			
 
				 The output binary will be in `buck-out/gen/programs/`.
			
 
				 
			
 
				+## Testing
			
 
				+
			
 
				+You can run quick local smoke tests by executing the `playTest.sh` script from the `src/tests` directory.
			
 
				+Two env variables `$ZSTD_BIN` and `$DATAGEN_BIN` are needed for the test script to locate the zstd and datagen binary.
			
 
				+For information on CI testing, please refer to TESTING.md
			
 
				+
			
 
				 ## Status
			
 
				 
			
 
				 Zstandard is currently deployed within Facebook. It is used continuously to compress large amounts of data in multiple formats and use cases.
			
@@ -187,7 +193,7 @@ Zstandard is dual-licensed under [BSD](LICENSE) and [GPLv2](COPYING).
 
				 
			
 
				 ## Contributing
			
 
				 
			
 
				-The "dev" branch is the one where all contributions are merged before reaching "master".
			
 
				-If you plan to propose a patch, please commit into the "dev" branch, or its own feature branch.
			
 
				-Direct commit to "master" are not permitted.
			
 
				+The `dev` branch is the one where all contributions are merged before reaching `release`.
			
 
				+If you plan to propose a patch, please commit into the `dev` branch, or its own feature branch.
			
 
				+Direct commit to `release` are not permitted.
			
 
				 For more information, please read [CONTRIBUTING](CONTRIBUTING.md).
			
--- a/Utilities/cmzstd/lib/common/bitstream.h
+++ b/Utilities/cmzstd/lib/common/bitstream.h
@@ -1,7 +1,7 @@
 
				 /* ******************************************************************
			
 
				  * bitstream
			
 
				  * Part of FSE library
			
 
				- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  *
			
 
				  * You can contact the author at :
			
 
				  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
			
@@ -17,7 +17,6 @@
 
				 #if defined (__cplusplus)
			
 
				 extern "C" {
			
 
				 #endif
			
 
				-
			
 
				 /*
			
 
				 *  This API consists of small unitary functions, which must be inlined for best performance.
			
 
				 *  Since link-time-optimization is not available for all compilers,
			
@@ -36,10 +35,12 @@ extern "C" {
 
				 /*=========================================
			
 
				 *  Target specific
			
 
				 =========================================*/
			
 
				-#if defined(__BMI__) && defined(__GNUC__)
			
 
				-#  include <immintrin.h>   /* support for bextr (experimental) */
			
 
				-#elif defined(__ICCARM__)
			
 
				-#  include <intrinsics.h>
			
 
				+#ifndef ZSTD_NO_INTRINSICS
			
 
				+#  if defined(__BMI__) && defined(__GNUC__)
			
 
				+#    include <immintrin.h>   /* support for bextr (experimental) */
			
 
				+#  elif defined(__ICCARM__)
			
 
				+#    include <intrinsics.h>
			
 
				+#  endif
			
 
				 #endif
			
 
				 
			
 
				 #define STREAM_ACCUMULATOR_MIN_32  25
			
@@ -141,8 +142,12 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
 
				     assert(val != 0);
			
 
				     {
			
 
				 #   if defined(_MSC_VER)   /* Visual */
			
 
				-        unsigned long r=0;
			
 
				-        return _BitScanReverse ( &r, val ) ? (unsigned)r : 0;
			
 
				+#       if STATIC_BMI2 == 1
			
 
				+		return _lzcnt_u32(val) ^ 31;
			
 
				+#       else
			
 
				+		unsigned long r = 0;
			
 
				+		return _BitScanReverse(&r, val) ? (unsigned)r : 0;
			
 
				+#       endif
			
 
				 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
			
 
				         return __builtin_clz (val) ^ 31;
			
 
				 #   elif defined(__ICCARM__)    /* IAR Intrinsic */
			
@@ -198,7 +203,7 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
 
				 MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
			
 
				                             size_t value, unsigned nbBits)
			
 
				 {
			
 
				-    MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32);
			
 
				+    DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
			
 
				     assert(nbBits < BIT_MASK_SIZE);
			
 
				     assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
			
 
				     bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
			
@@ -271,7 +276,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
 
				  */
			
 
				 MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
			
 
				 {
			
 
				-    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
			
 
				+    if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
			
 
				 
			
 
				     bitD->start = (const char*)srcBuffer;
			
 
				     bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
			
@@ -317,12 +322,12 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
 
				     return srcSize;
			
 
				 }
			
 
				 
			
 
				-MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
			
 
				+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
			
 
				 {
			
 
				     return bitContainer >> start;
			
 
				 }
			
 
				 
			
 
				-MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
			
 
				+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
			
 
				 {
			
 
				     U32 const regMask = sizeof(bitContainer)*8 - 1;
			
 
				     /* if start > regMask, bitstream is corrupted, and result is undefined */
			
@@ -330,10 +335,14 @@ MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 co
 
				     return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
			
 
				 }
			
 
				 
			
 
				-MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
			
 
				+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
			
 
				 {
			
 
				+#if defined(STATIC_BMI2) && STATIC_BMI2 == 1
			
 
				+	return  _bzhi_u64(bitContainer, nbBits);
			
 
				+#else
			
 
				     assert(nbBits < BIT_MASK_SIZE);
			
 
				     return bitContainer & BIT_mask[nbBits];
			
 
				+#endif
			
 
				 }
			
 
				 
			
 
				 /*! BIT_lookBits() :
			
@@ -342,7 +351,7 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
 
				  *  On 32-bits, maxNbBits==24.
			
 
				  *  On 64-bits, maxNbBits==56.
			
 
				  * @return : value extracted */
			
 
				-MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
			
 
				+MEM_STATIC  FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t*  bitD, U32 nbBits)
			
 
				 {
			
 
				     /* arbitrate between double-shift and shift+mask */
			
 
				 #if 1
			
@@ -365,7 +374,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
 
				     return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
			
 
				 }
			
 
				 
			
 
				-MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
			
 
				+MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
			
 
				 {
			
 
				     bitD->bitsConsumed += nbBits;
			
 
				 }
			
@@ -374,7 +383,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
 
				  *  Read (consume) next n bits from local register and update.
			
 
				  *  Pay attention to not read more than nbBits contained into local register.
			
 
				  * @return : extracted value. */
			
 
				-MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
			
 
				+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
			
 
				 {
			
 
				     size_t const value = BIT_lookBits(bitD, nbBits);
			
 
				     BIT_skipBits(bitD, nbBits);
			
--- a/Utilities/cmzstd/lib/common/compiler.h
+++ b/Utilities/cmzstd/lib/common/compiler.h
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -38,6 +38,17 @@
 
				 
			
 
				 #endif
			
 
				 
			
 
				+/**
			
 
				+  On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
			
 
				+  This explictly marks such functions as __cdecl so that the code will still compile
			
 
				+  if a CC other than __cdecl has been made the default.
			
 
				+*/
			
 
				+#if  defined(_MSC_VER)
			
 
				+#  define WIN_CDECL __cdecl
			
 
				+#else
			
 
				+#  define WIN_CDECL
			
 
				+#endif
			
 
				+
			
 
				 /**
			
 
				  * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
			
 
				  * parameters. They must be inlined for the compiler to eliminate the constant
			
@@ -79,6 +90,7 @@
 
				 #  endif
			
 
				 #endif
			
 
				 
			
 
				+
			
 
				 /* target attribute */
			
 
				 #ifndef __has_attribute
			
 
				   #define __has_attribute(x) 0  /* Compatibility with non-clang compilers. */
			
@@ -114,12 +126,12 @@
 
				 #    include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
			
 
				 #    define PREFETCH_L1(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
			
 
				 #    define PREFETCH_L2(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
			
 
				-#    elif defined(__aarch64__)
			
 
				-#     define PREFETCH_L1(ptr)  __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
			
 
				-#     define PREFETCH_L2(ptr)  __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
			
 
				 #  elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
			
 
				 #    define PREFETCH_L1(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
			
 
				 #    define PREFETCH_L2(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
			
 
				+#  elif defined(__aarch64__)
			
 
				+#    define PREFETCH_L1(ptr)  __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
			
 
				+#    define PREFETCH_L2(ptr)  __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
			
 
				 #  else
			
 
				 #    define PREFETCH_L1(ptr) (void)(ptr)  /* disabled */
			
 
				 #    define PREFETCH_L2(ptr) (void)(ptr)  /* disabled */
			
@@ -172,4 +184,106 @@
 
				 #  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
			
 
				 #endif
			
 
				 
			
 
				+/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
			
 
				+#ifndef STATIC_BMI2
			
 
				+#  if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))
			
 
				+#    ifdef __AVX2__  //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2
			
 
				+#       define STATIC_BMI2 1
			
 
				+#    endif
			
 
				+#  endif
			
 
				+#endif
			
 
				+
			
 
				+#ifndef STATIC_BMI2
			
 
				+    #define STATIC_BMI2 0
			
 
				+#endif
			
 
				+
			
 
				+/* compat. with non-clang compilers */
			
 
				+#ifndef __has_builtin
			
 
				+#  define __has_builtin(x) 0
			
 
				+#endif
			
 
				+
			
 
				+/* compat. with non-clang compilers */
			
 
				+#ifndef __has_feature
			
 
				+#  define __has_feature(x) 0
			
 
				+#endif
			
 
				+
			
 
				+/* detects whether we are being compiled under msan */
			
 
				+#ifndef ZSTD_MEMORY_SANITIZER
			
 
				+#  if __has_feature(memory_sanitizer)
			
 
				+#    define ZSTD_MEMORY_SANITIZER 1
			
 
				+#  else
			
 
				+#    define ZSTD_MEMORY_SANITIZER 0
			
 
				+#  endif
			
 
				+#endif
			
 
				+
			
 
				+#if ZSTD_MEMORY_SANITIZER
			
 
				+/* Not all platforms that support msan provide sanitizers/msan_interface.h.
			
 
				+ * We therefore declare the functions we need ourselves, rather than trying to
			
 
				+ * include the header file... */
			
 
				+#include <stddef.h>  /* size_t */
			
 
				+#define ZSTD_DEPS_NEED_STDINT
			
 
				+#include "zstd_deps.h"  /* intptr_t */
			
 
				+
			
 
				+/* Make memory region fully initialized (without changing its contents). */
			
 
				+void __msan_unpoison(const volatile void *a, size_t size);
			
 
				+
			
 
				+/* Make memory region fully uninitialized (without changing its contents).
			
 
				+   This is a legacy interface that does not update origin information. Use
			
 
				+   __msan_allocated_memory() instead. */
			
 
				+void __msan_poison(const volatile void *a, size_t size);
			
 
				+
			
 
				+/* Returns the offset of the first (at least partially) poisoned byte in the
			
 
				+   memory range, or -1 if the whole range is good. */
			
 
				+intptr_t __msan_test_shadow(const volatile void *x, size_t size);
			
 
				+#endif
			
 
				+
			
 
				+/* detects whether we are being compiled under asan */
			
 
				+#ifndef ZSTD_ADDRESS_SANITIZER
			
 
				+#  if __has_feature(address_sanitizer)
			
 
				+#    define ZSTD_ADDRESS_SANITIZER 1
			
 
				+#  elif defined(__SANITIZE_ADDRESS__)
			
 
				+#    define ZSTD_ADDRESS_SANITIZER 1
			
 
				+#  else
			
 
				+#    define ZSTD_ADDRESS_SANITIZER 0
			
 
				+#  endif
			
 
				+#endif
			
 
				+
			
 
				+#if ZSTD_ADDRESS_SANITIZER
			
 
				+/* Not all platforms that support asan provide sanitizers/asan_interface.h.
			
 
				+ * We therefore declare the functions we need ourselves, rather than trying to
			
 
				+ * include the header file... */
			
 
				+#include <stddef.h>  /* size_t */
			
 
				+
			
 
				+/**
			
 
				+ * Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
			
 
				+ *
			
 
				+ * This memory must be previously allocated by your program. Instrumented
			
 
				+ * code is forbidden from accessing addresses in this region until it is
			
 
				+ * unpoisoned. This function is not guaranteed to poison the entire region -
			
 
				+ * it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
			
 
				+ * alignment restrictions.
			
 
				+ *
			
 
				+ * \note This function is not thread-safe because no two threads can poison or
			
 
				+ * unpoison memory in the same memory region simultaneously.
			
 
				+ *
			
 
				+ * \param addr Start of memory region.
			
 
				+ * \param size Size of memory region. */
			
 
				+void __asan_poison_memory_region(void const volatile *addr, size_t size);
			
 
				+
			
 
				+/**
			
 
				+ * Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
			
 
				+ *
			
 
				+ * This memory must be previously allocated by your program. Accessing
			
 
				+ * addresses in this region is allowed until this region is poisoned again.
			
 
				+ * This function could unpoison a super-region of <c>[addr, addr+size)</c> due
			
 
				+ * to ASan alignment restrictions.
			
 
				+ *
			
 
				+ * \note This function is not thread-safe because no two threads can
			
 
				+ * poison or unpoison memory in the same memory region simultaneously.
			
 
				+ *
			
 
				+ * \param addr Start of memory region.
			
 
				+ * \param size Size of memory region. */
			
 
				+void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
			
 
				+#endif
			
 
				+
			
 
				 #endif /* ZSTD_COMPILER_H */
			
--- a/Utilities/cmzstd/lib/common/cpu.h
+++ b/Utilities/cmzstd/lib/common/cpu.h
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2018-2020, Facebook, Inc.
			
 
				+ * Copyright (c) Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -16,8 +16,6 @@
 
				  * https://github.com/facebook/folly/blob/master/folly/CpuId.h
			
 
				  */
			
 
				 
			
 
				-#include <string.h>
			
 
				-
			
 
				 #include "mem.h"
			
 
				 
			
 
				 #ifdef _MSC_VER
			
--- a/Utilities/cmzstd/lib/common/debug.c
+++ b/Utilities/cmzstd/lib/common/debug.c
@@ -1,7 +1,7 @@
 
				 /* ******************************************************************
			
 
				  * debug
			
 
				  * Part of FSE library
			
 
				- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  *
			
 
				  * You can contact the author at :
			
 
				  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
			
--- a/Utilities/cmzstd/lib/common/debug.h
+++ b/Utilities/cmzstd/lib/common/debug.h
@@ -1,7 +1,7 @@
 
				 /* ******************************************************************
			
 
				  * debug
			
 
				  * Part of FSE library
			
 
				- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  *
			
 
				  * You can contact the author at :
			
 
				  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
			
@@ -51,15 +51,6 @@ extern "C" {
 
				 #endif
			
 
				 
			
 
				 
			
 
				-/* DEBUGFILE can be defined externally,
			
 
				- * typically through compiler command line.
			
 
				- * note : currently useless.
			
 
				- * Value must be stderr or stdout */
			
 
				-#ifndef DEBUGFILE
			
 
				-#  define DEBUGFILE stderr
			
 
				-#endif
			
 
				-
			
 
				-
			
 
				 /* recommended values for DEBUGLEVEL :
			
 
				  * 0 : release mode, no debug, all run-time checks disabled
			
 
				  * 1 : enables assert() only, no display
			
@@ -76,7 +67,8 @@ extern "C" {
 
				  */
			
 
				 
			
 
				 #if (DEBUGLEVEL>=1)
			
 
				-#  include <assert.h>
			
 
				+#  define ZSTD_DEPS_NEED_ASSERT
			
 
				+#  include "zstd_deps.h"
			
 
				 #else
			
 
				 #  ifndef assert   /* assert may be already defined, due to prior #include <assert.h> */
			
 
				 #    define assert(condition) ((void)0)   /* disable assert (default) */
			
@@ -84,7 +76,8 @@ extern "C" {
 
				 #endif
			
 
				 
			
 
				 #if (DEBUGLEVEL>=2)
			
 
				-#  include <stdio.h>
			
 
				+#  define ZSTD_DEPS_NEED_IO
			
 
				+#  include "zstd_deps.h"
			
 
				 extern int g_debuglevel; /* the variable is only declared,
			
 
				                             it actually lives in debug.c,
			
 
				                             and is shared by the whole process.
			
@@ -92,14 +85,14 @@ extern int g_debuglevel; /* the variable is only declared,
 
				                             It's useful when enabling very verbose levels
			
 
				                             on selective conditions (such as position in src) */
			
 
				 
			
 
				-#  define RAWLOG(l, ...) {                                      \
			
 
				-                if (l<=g_debuglevel) {                          \
			
 
				-                    fprintf(stderr, __VA_ARGS__);               \
			
 
				+#  define RAWLOG(l, ...) {                                       \
			
 
				+                if (l<=g_debuglevel) {                           \
			
 
				+                    ZSTD_DEBUG_PRINT(__VA_ARGS__);               \
			
 
				             }   }
			
 
				-#  define DEBUGLOG(l, ...) {                                    \
			
 
				-                if (l<=g_debuglevel) {                          \
			
 
				-                    fprintf(stderr, __FILE__ ": " __VA_ARGS__); \
			
 
				-                    fprintf(stderr, " \n");                     \
			
 
				+#  define DEBUGLOG(l, ...) {                                     \
			
 
				+                if (l<=g_debuglevel) {                           \
			
 
				+                    ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \
			
 
				+                    ZSTD_DEBUG_PRINT(" \n");                     \
			
 
				             }   }
			
 
				 #else
			
 
				 #  define RAWLOG(l, ...)      {}    /* disabled */
			
--- a/Utilities/cmzstd/lib/common/entropy_common.c
+++ b/Utilities/cmzstd/lib/common/entropy_common.c
@@ -1,6 +1,6 @@
 
				 /* ******************************************************************
			
 
				  * Common functions of New Generation Entropy library
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  *
			
 
				  *  You can contact the author at :
			
 
				  *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
			
@@ -38,8 +38,31 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
 
				 /*-**************************************************************
			
 
				 *  FSE NCount encoding-decoding
			
 
				 ****************************************************************/
			
 
				-size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
			
 
				-                 const void* headerBuffer, size_t hbSize)
			
 
				+static U32 FSE_ctz(U32 val)
			
 
				+{
			
 
				+    assert(val != 0);
			
 
				+    {
			
 
				+#   if defined(_MSC_VER)   /* Visual */
			
 
				+        unsigned long r=0;
			
 
				+        return _BitScanForward(&r, val) ? (unsigned)r : 0;
			
 
				+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */
			
 
				+        return __builtin_ctz(val);
			
 
				+#   elif defined(__ICCARM__)    /* IAR Intrinsic */
			
 
				+        return __CTZ(val);
			
 
				+#   else   /* Software version */
			
 
				+        U32 count = 0;
			
 
				+        while ((val & 1) == 0) {
			
 
				+            val >>= 1;
			
 
				+            ++count;
			
 
				+        }
			
 
				+        return count;
			
 
				+#   endif
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+FORCE_INLINE_TEMPLATE
			
 
				+size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
			
 
				+                           const void* headerBuffer, size_t hbSize)
			
 
				 {
			
 
				     const BYTE* const istart = (const BYTE*) headerBuffer;
			
 
				     const BYTE* const iend = istart + hbSize;
			
@@ -50,23 +73,23 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
 
				     U32 bitStream;
			
 
				     int bitCount;
			
 
				     unsigned charnum = 0;
			
 
				+    unsigned const maxSV1 = *maxSVPtr + 1;
			
 
				     int previous0 = 0;
			
 
				 
			
 
				-    if (hbSize < 4) {
			
 
				-        /* This function only works when hbSize >= 4 */
			
 
				-        char buffer[4];
			
 
				-        memset(buffer, 0, sizeof(buffer));
			
 
				-        memcpy(buffer, headerBuffer, hbSize);
			
 
				+    if (hbSize < 8) {
			
 
				+        /* This function only works when hbSize >= 8 */
			
 
				+        char buffer[8] = {0};
			
 
				+        ZSTD_memcpy(buffer, headerBuffer, hbSize);
			
 
				         {   size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
			
 
				                                                     buffer, sizeof(buffer));
			
 
				             if (FSE_isError(countSize)) return countSize;
			
 
				             if (countSize > hbSize) return ERROR(corruption_detected);
			
 
				             return countSize;
			
 
				     }   }
			
 
				-    assert(hbSize >= 4);
			
 
				+    assert(hbSize >= 8);
			
 
				 
			
 
				     /* init */
			
 
				-    memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0]));   /* all symbols not present in NCount have a frequency of 0 */
			
 
				+    ZSTD_memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0]));   /* all symbols not present in NCount have a frequency of 0 */
			
 
				     bitStream = MEM_readLE32(ip);
			
 
				     nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
			
 
				     if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
			
@@ -77,36 +100,58 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
 
				     threshold = 1<<nbBits;
			
 
				     nbBits++;
			
 
				 
			
 
				-    while ((remaining>1) & (charnum<=*maxSVPtr)) {
			
 
				+    for (;;) {
			
 
				         if (previous0) {
			
 
				-            unsigned n0 = charnum;
			
 
				-            while ((bitStream & 0xFFFF) == 0xFFFF) {
			
 
				-                n0 += 24;
			
 
				-                if (ip < iend-5) {
			
 
				-                    ip += 2;
			
 
				-                    bitStream = MEM_readLE32(ip) >> bitCount;
			
 
				+            /* Count the number of repeats. Each time the
			
 
				+             * 2-bit repeat code is 0b11 there is another
			
 
				+             * repeat.
			
 
				+             * Avoid UB by setting the high bit to 1.
			
 
				+             */
			
 
				+            int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
			
 
				+            while (repeats >= 12) {
			
 
				+                charnum += 3 * 12;
			
 
				+                if (LIKELY(ip <= iend-7)) {
			
 
				+                    ip += 3;
			
 
				                 } else {
			
 
				-                    bitStream >>= 16;
			
 
				-                    bitCount   += 16;
			
 
				-            }   }
			
 
				-            while ((bitStream & 3) == 3) {
			
 
				-                n0 += 3;
			
 
				-                bitStream >>= 2;
			
 
				-                bitCount += 2;
			
 
				+                    bitCount -= (int)(8 * (iend - 7 - ip));
			
 
				+                    bitCount &= 31;
			
 
				+                    ip = iend - 4;
			
 
				+                }
			
 
				+                bitStream = MEM_readLE32(ip) >> bitCount;
			
 
				+                repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
			
 
				             }
			
 
				-            n0 += bitStream & 3;
			
 
				+            charnum += 3 * repeats;
			
 
				+            bitStream >>= 2 * repeats;
			
 
				+            bitCount += 2 * repeats;
			
 
				+
			
 
				+            /* Add the final repeat which isn't 0b11. */
			
 
				+            assert((bitStream & 3) < 3);
			
 
				+            charnum += bitStream & 3;
			
 
				             bitCount += 2;
			
 
				-            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
			
 
				-            while (charnum < n0) normalizedCounter[charnum++] = 0;
			
 
				-            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
			
 
				+
			
 
				+            /* This is an error, but break and return an error
			
 
				+             * at the end, because returning out of a loop makes
			
 
				+             * it harder for the compiler to optimize.
			
 
				+             */
			
 
				+            if (charnum >= maxSV1) break;
			
 
				+
			
 
				+            /* We don't need to set the normalized count to 0
			
 
				+             * because we already memset the whole buffer to 0.
			
 
				+             */
			
 
				+
			
 
				+            if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
			
 
				                 assert((bitCount >> 3) <= 3); /* For first condition to work */
			
 
				                 ip += bitCount>>3;
			
 
				                 bitCount &= 7;
			
 
				-                bitStream = MEM_readLE32(ip) >> bitCount;
			
 
				             } else {
			
 
				-                bitStream >>= 2;
			
 
				-        }   }
			
 
				-        {   int const max = (2*threshold-1) - remaining;
			
 
				+                bitCount -= (int)(8 * (iend - 4 - ip));
			
 
				+                bitCount &= 31;
			
 
				+                ip = iend - 4;
			
 
				+            }
			
 
				+            bitStream = MEM_readLE32(ip) >> bitCount;
			
 
				+        }
			
 
				+        {
			
 
				+            int const max = (2*threshold-1) - remaining;
			
 
				             int count;
			
 
				 
			
 
				             if ((bitStream & (threshold-1)) < (U32)max) {
			
@@ -119,24 +164,43 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
 
				             }
			
 
				 
			
 
				             count--;   /* extra accuracy */
			
 
				-            remaining -= count < 0 ? -count : count;   /* -1 means +1 */
			
 
				+            /* When it matters (small blocks), this is a
			
 
				+             * predictable branch, because we don't use -1.
			
 
				+             */
			
 
				+            if (count >= 0) {
			
 
				+                remaining -= count;
			
 
				+            } else {
			
 
				+                assert(count == -1);
			
 
				+                remaining += count;
			
 
				+            }
			
 
				             normalizedCounter[charnum++] = (short)count;
			
 
				             previous0 = !count;
			
 
				-            while (remaining < threshold) {
			
 
				-                nbBits--;
			
 
				-                threshold >>= 1;
			
 
				+
			
 
				+            assert(threshold > 1);
			
 
				+            if (remaining < threshold) {
			
 
				+                /* This branch can be folded into the
			
 
				+                 * threshold update condition because we
			
 
				+                 * know that threshold > 1.
			
 
				+                 */
			
 
				+                if (remaining <= 1) break;
			
 
				+                nbBits = BIT_highbit32(remaining) + 1;
			
 
				+                threshold = 1 << (nbBits - 1);
			
 
				             }
			
 
				+            if (charnum >= maxSV1) break;
			
 
				 
			
 
				-            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
			
 
				+            if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
			
 
				                 ip += bitCount>>3;
			
 
				                 bitCount &= 7;
			
 
				             } else {
			
 
				                 bitCount -= (int)(8 * (iend - 4 - ip));
			
 
				+                bitCount &= 31;
			
 
				                 ip = iend - 4;
			
 
				             }
			
 
				-            bitStream = MEM_readLE32(ip) >> (bitCount & 31);
			
 
				-    }   }   /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
			
 
				+            bitStream = MEM_readLE32(ip) >> bitCount;
			
 
				+    }   }
			
 
				     if (remaining != 1) return ERROR(corruption_detected);
			
 
				+    /* Only possible when there are too many zeros. */
			
 
				+    if (charnum > maxSV1) return ERROR(maxSymbolValue_tooSmall);
			
 
				     if (bitCount > 32) return ERROR(corruption_detected);
			
 
				     *maxSVPtr = charnum-1;
			
 
				 
			
@@ -144,6 +208,43 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
 
				     return ip-istart;
			
 
				 }
			
 
				 
			
 
				+/* Avoids the FORCE_INLINE of the _body() function. */
			
 
				+static size_t FSE_readNCount_body_default(
			
 
				+        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
			
 
				+        const void* headerBuffer, size_t hbSize)
			
 
				+{
			
 
				+    return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
			
 
				+}
			
 
				+
			
 
				+#if DYNAMIC_BMI2
			
 
				+TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2(
			
 
				+        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
			
 
				+        const void* headerBuffer, size_t hbSize)
			
 
				+{
			
 
				+    return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+size_t FSE_readNCount_bmi2(
			
 
				+        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
			
 
				+        const void* headerBuffer, size_t hbSize, int bmi2)
			
 
				+{
			
 
				+#if DYNAMIC_BMI2
			
 
				+    if (bmi2) {
			
 
				+        return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
			
 
				+    }
			
 
				+#endif
			
 
				+    (void)bmi2;
			
 
				+    return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
			
 
				+}
			
 
				+
			
 
				+size_t FSE_readNCount(
			
 
				+        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
			
 
				+        const void* headerBuffer, size_t hbSize)
			
 
				+{
			
 
				+    return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0);
			
 
				+}
			
 
				+
			
 
				 
			
 
				 /*! HUF_readStats() :
			
 
				     Read compact Huffman tree, saved by HUF_writeCTable().
			
@@ -155,6 +256,17 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
 
				 size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
			
 
				                      U32* nbSymbolsPtr, U32* tableLogPtr,
			
 
				                      const void* src, size_t srcSize)
			
 
				+{
			
 
				+    U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
			
 
				+    return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0);
			
 
				+}
			
 
				+
			
 
				+FORCE_INLINE_TEMPLATE size_t
			
 
				+HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
			
 
				+                   U32* nbSymbolsPtr, U32* tableLogPtr,
			
 
				+                   const void* src, size_t srcSize,
			
 
				+                   void* workSpace, size_t wkspSize,
			
 
				+                   int bmi2)
			
 
				 {
			
 
				     U32 weightTotal;
			
 
				     const BYTE* ip = (const BYTE*) src;
			
@@ -163,7 +275,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
 
				 
			
 
				     if (!srcSize) return ERROR(srcSize_wrong);
			
 
				     iSize = ip[0];
			
 
				-    /* memset(huffWeight, 0, hwSize);   *//* is not necessary, even though some analyzer complain ... */
			
 
				+    /* ZSTD_memset(huffWeight, 0, hwSize);   *//* is not necessary, even though some analyzer complain ... */
			
 
				 
			
 
				     if (iSize >= 128) {  /* special header */
			
 
				         oSize = iSize - 127;
			
@@ -177,14 +289,14 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
 
				                 huffWeight[n+1] = ip[n/2] & 15;
			
 
				     }   }   }
			
 
				     else  {   /* header compressed with FSE (normal case) */
			
 
				-        FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)];  /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */
			
 
				         if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
			
 
				-        oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6);   /* max (hwSize-1) values decoded, as last one is implied */
			
 
				+        /* max (hwSize-1) values decoded, as last one is implied */
			
 
				+        oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize, bmi2);
			
 
				         if (FSE_isError(oSize)) return oSize;
			
 
				     }
			
 
				 
			
 
				     /* collect weight stats */
			
 
				-    memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
			
 
				+    ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
			
 
				     weightTotal = 0;
			
 
				     {   U32 n; for (n=0; n<oSize; n++) {
			
 
				             if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
			
@@ -214,3 +326,37 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
 
				     *nbSymbolsPtr = (U32)(oSize+1);
			
 
				     return iSize+1;
			
 
				 }
			
 
				+
			
 
				+/* Avoids the FORCE_INLINE of the _body() function. */
			
 
				+static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats,
			
 
				+                     U32* nbSymbolsPtr, U32* tableLogPtr,
			
 
				+                     const void* src, size_t srcSize,
			
 
				+                     void* workSpace, size_t wkspSize)
			
 
				+{
			
 
				+    return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 0);
			
 
				+}
			
 
				+
			
 
				+#if DYNAMIC_BMI2
			
 
				+static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
			
 
				+                     U32* nbSymbolsPtr, U32* tableLogPtr,
			
 
				+                     const void* src, size_t srcSize,
			
 
				+                     void* workSpace, size_t wkspSize)
			
 
				+{
			
 
				+    return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 1);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
			
 
				+                     U32* nbSymbolsPtr, U32* tableLogPtr,
			
 
				+                     const void* src, size_t srcSize,
			
 
				+                     void* workSpace, size_t wkspSize,
			
 
				+                     int bmi2)
			
 
				+{
			
 
				+#if DYNAMIC_BMI2
			
 
				+    if (bmi2) {
			
 
				+        return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
			
 
				+    }
			
 
				+#endif
			
 
				+    (void)bmi2;
			
 
				+    return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
			
 
				+}
			
--- a/Utilities/cmzstd/lib/common/error_private.c
+++ b/Utilities/cmzstd/lib/common/error_private.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -48,6 +48,7 @@ const char* ERR_getErrorString(ERR_enum code)
 
				     case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
			
 
				     case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
			
 
				     case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
			
 
				+    case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
			
 
				     case PREFIX(maxCode):
			
 
				     default: return notErrorCode;
			
 
				     }
			
--- a/Utilities/cmzstd/lib/common/error_private.h
+++ b/Utilities/cmzstd/lib/common/error_private.h
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -21,8 +21,8 @@ extern "C" {
 
				 /* ****************************************
			
 
				 *  Dependencies
			
 
				 ******************************************/
			
 
				-#include <stddef.h>        /* size_t */
			
 
				-#include "zstd_errors.h"  /* enum list */
			
 
				+#include "../zstd_errors.h"  /* enum list */
			
 
				+#include "zstd_deps.h"       /* size_t */
			
 
				 
			
 
				 
			
 
				 /* ****************************************
			
--- a/Utilities/cmzstd/lib/common/fse.h
+++ b/Utilities/cmzstd/lib/common/fse.h
@@ -1,7 +1,7 @@
 
				 /* ******************************************************************
			
 
				  * FSE : Finite State Entropy codec
			
 
				  * Public Prototypes declaration
			
 
				- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  *
			
 
				  * You can contact the author at :
			
 
				  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
			
@@ -23,7 +23,7 @@ extern "C" {
 
				 /*-*****************************************
			
 
				 *  Dependencies
			
 
				 ******************************************/
			
 
				-#include <stddef.h>    /* size_t, ptrdiff_t */
			
 
				+#include "zstd_deps.h"    /* size_t, ptrdiff_t */
			
 
				 
			
 
				 
			
 
				 /*-*****************************************
			
@@ -137,10 +137,16 @@ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize
 
				 /*! FSE_normalizeCount():
			
 
				     normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
			
 
				     'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
			
 
				+    useLowProbCount is a boolean parameter which trades off compressed size for
			
 
				+    faster header decoding. When it is set to 1, the compressed data will be slightly
			
 
				+    smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be
			
 
				+    faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0
			
 
				+    is a good default, since header deserialization makes a big speed difference.
			
 
				+    Otherwise, useLowProbCount=1 is a good default, since the speed difference is small.
			
 
				     @return : tableLog,
			
 
				               or an errorCode, which can be tested using FSE_isError() */
			
 
				 FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
			
 
				-                    const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
			
 
				+                    const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount);
			
 
				 
			
 
				 /*! FSE_NCountWriteBound():
			
 
				     Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
			
@@ -228,6 +234,13 @@ FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
 
				                            unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
			
 
				                            const void* rBuffer, size_t rBuffSize);
			
 
				 
			
 
				+/*! FSE_readNCount_bmi2():
			
 
				+ * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise.
			
 
				+ */
			
 
				+FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
			
 
				+                           unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
			
 
				+                           const void* rBuffer, size_t rBuffSize, int bmi2);
			
 
				+
			
 
				 /*! Constructor and Destructor of FSE_DTable.
			
 
				     Note that its size depends on 'tableLog' */
			
 
				 typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
			
@@ -288,12 +301,12 @@ If there is an error, the function will return an error code, which can be teste
 
				 *******************************************/
			
 
				 /* FSE buffer bounds */
			
 
				 #define FSE_NCOUNTBOUND 512
			
 
				-#define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
			
 
				+#define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
			
 
				 #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
			
 
				 
			
 
				 /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
			
 
				-#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
			
 
				-#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
			
 
				+#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2))
			
 
				+#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<(maxTableLog)))
			
 
				 
			
 
				 /* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
			
 
				 #define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue)   (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
			
@@ -309,9 +322,9 @@ unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsi
 
				 
			
 
				 /* FSE_compress_wksp() :
			
 
				  * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
			
 
				- * FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
			
 
				+ * FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
			
 
				  */
			
 
				-#define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue)   ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
			
 
				+#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue)   ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
			
 
				 size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
			
 
				 
			
 
				 size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
			
@@ -322,18 +335,30 @@ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
 
				 
			
 
				 /* FSE_buildCTable_wksp() :
			
 
				  * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
			
 
				- * `wkspSize` must be >= `(1<<tableLog)`.
			
 
				+ * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
			
 
				  */
			
 
				+#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (maxSymbolValue + 2 + (1ull << (tableLog - 2)))
			
 
				+#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
			
 
				 size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
			
 
				 
			
 
				+#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8)
			
 
				+#define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned))
			
 
				+FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
			
 
				+/**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
			
 
				+
			
 
				 size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
			
 
				 /**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
			
 
				 
			
 
				 size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
			
 
				 /**< build a fake FSE_DTable, designed to always generate the same symbolValue */
			
 
				 
			
 
				-size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog);
			
 
				-/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
			
 
				+#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
			
 
				+#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
			
 
				+size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize);
			
 
				+/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */
			
 
				+
			
 
				+size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
			
 
				+/**< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */
			
 
				 
			
 
				 typedef enum {
			
 
				    FSE_repeat_none,  /**< Cannot use the previous table */
			
@@ -644,6 +669,9 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
 
				 #ifndef FSE_DEFAULT_MEMORY_USAGE
			
 
				 #  define FSE_DEFAULT_MEMORY_USAGE 13
			
 
				 #endif
			
 
				+#if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE)
			
 
				+#  error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE"
			
 
				+#endif
			
 
				 
			
 
				 /*!FSE_MAX_SYMBOL_VALUE :
			
 
				 *  Maximum symbol value authorized.
			
@@ -677,7 +705,7 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
 
				 #  error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
			
 
				 #endif
			
 
				 
			
 
				-#define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3)
			
 
				+#define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3)
			
 
				 
			
 
				 
			
 
				 #endif /* FSE_STATIC_LINKING_ONLY */
			
--- a/Utilities/cmzstd/lib/common/fse_decompress.c
+++ b/Utilities/cmzstd/lib/common/fse_decompress.c
@@ -1,6 +1,6 @@
 
				 /* ******************************************************************
			
 
				  * FSE : Finite State Entropy decoder
			
 
				- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  *
			
 
				  *  You can contact the author at :
			
 
				  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
			
@@ -16,13 +16,14 @@
 
				 /* **************************************************************
			
 
				 *  Includes
			
 
				 ****************************************************************/
			
 
				-#include <stdlib.h>     /* malloc, free, qsort */
			
 
				-#include <string.h>     /* memcpy, memset */
			
 
				+#include "debug.h"      /* assert */
			
 
				 #include "bitstream.h"
			
 
				 #include "compiler.h"
			
 
				 #define FSE_STATIC_LINKING_ONLY
			
 
				 #include "fse.h"
			
 
				 #include "error_private.h"
			
 
				+#define ZSTD_DEPS_NEED_MALLOC
			
 
				+#include "zstd_deps.h"
			
 
				 
			
 
				 
			
 
				 /* **************************************************************
			
@@ -59,25 +60,27 @@
 
				 FSE_DTable* FSE_createDTable (unsigned tableLog)
			
 
				 {
			
 
				     if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
			
 
				-    return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
			
 
				+    return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
			
 
				 }
			
 
				 
			
 
				 void FSE_freeDTable (FSE_DTable* dt)
			
 
				 {
			
 
				-    free(dt);
			
 
				+    ZSTD_free(dt);
			
 
				 }
			
 
				 
			
 
				-size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
			
 
				+static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
			
 
				 {
			
 
				     void* const tdPtr = dt+1;   /* because *dt is unsigned, 32-bits aligned on 32-bits */
			
 
				     FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
			
 
				-    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
			
 
				+    U16* symbolNext = (U16*)workSpace;
			
 
				+    BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1);
			
 
				 
			
 
				     U32 const maxSV1 = maxSymbolValue + 1;
			
 
				     U32 const tableSize = 1 << tableLog;
			
 
				     U32 highThreshold = tableSize-1;
			
 
				 
			
 
				     /* Sanity Checks */
			
 
				+    if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge);
			
 
				     if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
			
 
				     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
			
 
				 
			
@@ -95,11 +98,57 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
 
				                     if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
			
 
				                     symbolNext[s] = normalizedCounter[s];
			
 
				         }   }   }
			
 
				-        memcpy(dt, &DTableH, sizeof(DTableH));
			
 
				+        ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
			
 
				     }
			
 
				 
			
 
				     /* Spread symbols */
			
 
				-    {   U32 const tableMask = tableSize-1;
			
 
				+    if (highThreshold == tableSize - 1) {
			
 
				+        size_t const tableMask = tableSize-1;
			
 
				+        size_t const step = FSE_TABLESTEP(tableSize);
			
 
				+        /* First lay down the symbols in order.
			
 
				+         * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
			
 
				+         * misses since small blocks generally have small table logs, so nearly
			
 
				+         * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
			
 
				+         * our buffer to handle the over-write.
			
 
				+         */
			
 
				+        {
			
 
				+            U64 const add = 0x0101010101010101ull;
			
 
				+            size_t pos = 0;
			
 
				+            U64 sv = 0;
			
 
				+            U32 s;
			
 
				+            for (s=0; s<maxSV1; ++s, sv += add) {
			
 
				+                int i;
			
 
				+                int const n = normalizedCounter[s];
			
 
				+                MEM_write64(spread + pos, sv);
			
 
				+                for (i = 8; i < n; i += 8) {
			
 
				+                    MEM_write64(spread + pos + i, sv);
			
 
				+                }
			
 
				+                pos += n;
			
 
				+            }
			
 
				+        }
			
 
				+        /* Now we spread those positions across the table.
			
 
				+         * The benefit of doing it in two stages is that we avoid the the
			
 
				+         * variable size inner loop, which caused lots of branch misses.
			
 
				+         * Now we can run through all the positions without any branch misses.
			
 
				+         * We unroll the loop twice, since that is what emperically worked best.
			
 
				+         */
			
 
				+        {
			
 
				+            size_t position = 0;
			
 
				+            size_t s;
			
 
				+            size_t const unroll = 2;
			
 
				+            assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
			
 
				+            for (s = 0; s < (size_t)tableSize; s += unroll) {
			
 
				+                size_t u;
			
 
				+                for (u = 0; u < unroll; ++u) {
			
 
				+                    size_t const uPosition = (position + (u * step)) & tableMask;
			
 
				+                    tableDecode[uPosition].symbol = spread[s + u];
			
 
				+                }
			
 
				+                position = (position + (unroll * step)) & tableMask;
			
 
				+            }
			
 
				+            assert(position == 0);
			
 
				+        }
			
 
				+    } else {
			
 
				+        U32 const tableMask = tableSize-1;
			
 
				         U32 const step = FSE_TABLESTEP(tableSize);
			
 
				         U32 s, position = 0;
			
 
				         for (s=0; s<maxSV1; s++) {
			
@@ -124,6 +173,11 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
 
				     return 0;
			
 
				 }
			
 
				 
			
 
				+size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
			
 
				+{
			
 
				+    return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);
			
 
				+}
			
 
				+
			
 
				 
			
 
				 #ifndef FSE_COMMONDEFS_ONLY
			
 
				 
			
@@ -251,36 +305,99 @@ size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
 
				 }
			
 
				 
			
 
				 
			
 
				-size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog)
			
 
				+size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
			
 
				+{
			
 
				+    return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0);
			
 
				+}
			
 
				+
			
 
				+typedef struct {
			
 
				+    short ncount[FSE_MAX_SYMBOL_VALUE + 1];
			
 
				+    FSE_DTable dtable[1]; /* Dynamically sized */
			
 
				+} FSE_DecompressWksp;
			
 
				+
			
 
				+
			
 
				+FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
			
 
				+        void* dst, size_t dstCapacity,
			
 
				+        const void* cSrc, size_t cSrcSize,
			
 
				+        unsigned maxLog, void* workSpace, size_t wkspSize,
			
 
				+        int bmi2)
			
 
				 {
			
 
				     const BYTE* const istart = (const BYTE*)cSrc;
			
 
				     const BYTE* ip = istart;
			
 
				-    short counting[FSE_MAX_SYMBOL_VALUE+1];
			
 
				     unsigned tableLog;
			
 
				     unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
			
 
				+    FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
			
 
				+
			
 
				+    DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
			
 
				+    if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
			
 
				 
			
 
				     /* normal FSE decoding mode */
			
 
				-    size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
			
 
				-    if (FSE_isError(NCountLength)) return NCountLength;
			
 
				-    /* if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); */  /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */
			
 
				-    if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
			
 
				-    ip += NCountLength;
			
 
				-    cSrcSize -= NCountLength;
			
 
				+    {
			
 
				+        size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
			
 
				+        if (FSE_isError(NCountLength)) return NCountLength;
			
 
				+        if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
			
 
				+        assert(NCountLength <= cSrcSize);
			
 
				+        ip += NCountLength;
			
 
				+        cSrcSize -= NCountLength;
			
 
				+    }
			
 
				+
			
 
				+    if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
			
 
				+    workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog);
			
 
				+    wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
			
 
				+
			
 
				+    CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
			
 
				+
			
 
				+    {
			
 
				+        const void* ptr = wksp->dtable;
			
 
				+        const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
			
 
				+        const U32 fastMode = DTableH->fastMode;
			
 
				 
			
 
				-    CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) );
			
 
				+        /* select fast mode (static) */
			
 
				+        if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1);
			
 
				+        return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+/* Avoids the FORCE_INLINE of the _body() function. */
			
 
				+static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
			
 
				+{
			
 
				+    return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);
			
 
				+}
			
 
				+
			
 
				+#if DYNAMIC_BMI2
			
 
				+TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
			
 
				+{
			
 
				+    return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
			
 
				+}
			
 
				+#endif
			
 
				 
			
 
				-    return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace);   /* always return, even if it is an error code */
			
 
				+size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)
			
 
				+{
			
 
				+#if DYNAMIC_BMI2
			
 
				+    if (bmi2) {
			
 
				+        return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
			
 
				+    }
			
 
				+#endif
			
 
				+    (void)bmi2;
			
 
				+    return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
			
 
				 }
			
 
				 
			
 
				 
			
 
				 typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
			
 
				 
			
 
				+#ifndef ZSTD_NO_UNUSED_FUNCTIONS
			
 
				+size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) {
			
 
				+    U32 wksp[FSE_BUILD_DTABLE_WKSP_SIZE_U32(FSE_TABLELOG_ABSOLUTE_MAX, FSE_MAX_SYMBOL_VALUE)];
			
 
				+    return FSE_buildDTable_wksp(dt, normalizedCounter, maxSymbolValue, tableLog, wksp, sizeof(wksp));
			
 
				+}
			
 
				+
			
 
				 size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize)
			
 
				 {
			
 
				-    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
			
 
				-    return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG);
			
 
				+    /* Static analyzer seems unable to understand this table will be properly initialized later */
			
 
				+    U32 wksp[FSE_DECOMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
			
 
				+    return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, FSE_MAX_TABLELOG, wksp, sizeof(wksp));
			
 
				 }
			
 
				-
			
 
				+#endif
			
 
				 
			
 
				 
			
 
				 #endif   /* FSE_COMMONDEFS_ONLY */
			
--- a/Utilities/cmzstd/lib/common/huf.h
+++ b/Utilities/cmzstd/lib/common/huf.h
@@ -1,7 +1,7 @@
 
				 /* ******************************************************************
			
 
				  * huff0 huffman codec,
			
 
				  * part of Finite State Entropy library
			
 
				- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  *
			
 
				  * You can contact the author at :
			
 
				  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
			
@@ -20,7 +20,7 @@ extern "C" {
 
				 #define HUF_H_298734234
			
 
				 
			
 
				 /* *** Dependencies *** */
			
 
				-#include <stddef.h>    /* size_t */
			
 
				+#include "zstd_deps.h"    /* size_t */
			
 
				 
			
 
				 
			
 
				 /* *** library symbols visibility *** */
			
@@ -111,6 +111,8 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
 
				 
			
 
				 /* *** Dependencies *** */
			
 
				 #include "mem.h"   /* U32 */
			
 
				+#define FSE_STATIC_LINKING_ONLY
			
 
				+#include "fse.h"
			
 
				 
			
 
				 
			
 
				 /* *** Constants *** */
			
@@ -133,12 +135,16 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
 
				 #define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
			
 
				 
			
 
				 /* static allocation of HUF's Compression Table */
			
 
				+/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */
			
 
				+struct HUF_CElt_s {
			
 
				+  U16  val;
			
 
				+  BYTE nbBits;
			
 
				+};   /* typedef'd to HUF_CElt */
			
 
				+typedef struct HUF_CElt_s HUF_CElt;   /* consider it an incomplete type */
			
 
				 #define HUF_CTABLE_SIZE_U32(maxSymbolValue)   ((maxSymbolValue)+1)   /* Use tables of U32, for proper alignment */
			
 
				 #define HUF_CTABLE_SIZE(maxSymbolValue)       (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
			
 
				 #define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
			
 
				-    U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \
			
 
				-    void* name##hv = &(name##hb); \
			
 
				-    HUF_CElt* name = (HUF_CElt*)(name##hv)   /* no final ; */
			
 
				+    HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */
			
 
				 
			
 
				 /* static allocation of HUF's DTable */
			
 
				 typedef U32 HUF_DTable;
			
@@ -184,9 +190,9 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
 
				  *  or to save and regenerate 'CTable' using external methods.
			
 
				  */
			
 
				 unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
			
 
				-typedef struct HUF_CElt_s HUF_CElt;   /* incomplete type */
			
 
				 size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits);   /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
			
 
				 size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
			
 
				+size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
			
 
				 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
			
 
				 size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
			
 
				 int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
			
@@ -226,6 +232,19 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
 
				                      U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
			
 
				                      const void* src, size_t srcSize);
			
 
				 
			
 
				+/*! HUF_readStats_wksp() :
			
 
				+ * Same as HUF_readStats() but takes an external workspace which must be
			
 
				+ * 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE.
			
 
				+ * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
			
 
				+ */
			
 
				+#define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1)
			
 
				+#define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned))
			
 
				+size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
			
 
				+                          U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
			
 
				+                          const void* src, size_t srcSize,
			
 
				+                          void* workspace, size_t wkspSize,
			
 
				+                          int bmi2);
			
 
				+
			
 
				 /** HUF_readCTable() :
			
 
				  *  Loading a CTable saved with HUF_writeCTable() */
			
 
				 size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
			
@@ -260,7 +279,7 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
 
				  *  a required workspace size greater than that specified in the following
			
 
				  *  macro.
			
 
				  */
			
 
				-#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
			
 
				+#define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9))
			
 
				 #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
			
 
				 
			
 
				 #ifndef HUF_FORCE_DECOMPRESS_X2
			
@@ -332,6 +351,9 @@ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstS
 
				 #endif
			
 
				 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
			
 
				 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
			
 
				+#ifndef HUF_FORCE_DECOMPRESS_X2
			
 
				+size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
			
 
				+#endif
			
 
				 
			
 
				 #endif /* HUF_STATIC_LINKING_ONLY */
			
 
				 
			
--- a/Utilities/cmzstd/lib/common/mem.h
+++ b/Utilities/cmzstd/lib/common/mem.h
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -18,8 +18,10 @@ extern "C" {
 
				 /*-****************************************
			
 
				 *  Dependencies
			
 
				 ******************************************/
			
 
				-#include <stddef.h>     /* size_t, ptrdiff_t */
			
 
				-#include <string.h>     /* memcpy */
			
 
				+#include <stddef.h>  /* size_t, ptrdiff_t */
			
 
				+#include "compiler.h"  /* __has_builtin */
			
 
				+#include "debug.h"  /* DEBUG_STATIC_ASSERT */
			
 
				+#include "zstd_deps.h"  /* ZSTD_memcpy */
			
 
				 
			
 
				 
			
 
				 /*-****************************************
			
@@ -39,93 +41,15 @@ extern "C" {
 
				 #  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
			
 
				 #endif
			
 
				 
			
 
				-#ifndef __has_builtin
			
 
				-#  define __has_builtin(x) 0  /* compat. with non-clang compilers */
			
 
				-#endif
			
 
				-
			
 
				-/* code only tested on 32 and 64 bits systems */
			
 
				-#define MEM_STATIC_ASSERT(c)   { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
			
 
				-MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
			
 
				-
			
 
				-/* detects whether we are being compiled under msan */
			
 
				-#if defined (__has_feature)
			
 
				-#  if __has_feature(memory_sanitizer)
			
 
				-#    define MEMORY_SANITIZER 1
			
 
				-#  endif
			
 
				-#endif
			
 
				-
			
 
				-#if defined (MEMORY_SANITIZER)
			
 
				-/* Not all platforms that support msan provide sanitizers/msan_interface.h.
			
 
				- * We therefore declare the functions we need ourselves, rather than trying to
			
 
				- * include the header file... */
			
 
				-
			
 
				-#include <stdint.h> /* intptr_t */
			
 
				-
			
 
				-/* Make memory region fully initialized (without changing its contents). */
			
 
				-void __msan_unpoison(const volatile void *a, size_t size);
			
 
				-
			
 
				-/* Make memory region fully uninitialized (without changing its contents).
			
 
				-   This is a legacy interface that does not update origin information. Use
			
 
				-   __msan_allocated_memory() instead. */
			
 
				-void __msan_poison(const volatile void *a, size_t size);
			
 
				-
			
 
				-/* Returns the offset of the first (at least partially) poisoned byte in the
			
 
				-   memory range, or -1 if the whole range is good. */
			
 
				-intptr_t __msan_test_shadow(const volatile void *x, size_t size);
			
 
				-#endif
			
 
				-
			
 
				-/* detects whether we are being compiled under asan */
			
 
				-#if defined (__has_feature)
			
 
				-#  if __has_feature(address_sanitizer)
			
 
				-#    define ADDRESS_SANITIZER 1
			
 
				-#  endif
			
 
				-#elif defined(__SANITIZE_ADDRESS__)
			
 
				-#  define ADDRESS_SANITIZER 1
			
 
				-#endif
			
 
				-
			
 
				-#if defined (ADDRESS_SANITIZER)
			
 
				-/* Not all platforms that support asan provide sanitizers/asan_interface.h.
			
 
				- * We therefore declare the functions we need ourselves, rather than trying to
			
 
				- * include the header file... */
			
 
				-
			
 
				-/**
			
 
				- * Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
			
 
				- *
			
 
				- * This memory must be previously allocated by your program. Instrumented
			
 
				- * code is forbidden from accessing addresses in this region until it is
			
 
				- * unpoisoned. This function is not guaranteed to poison the entire region -
			
 
				- * it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
			
 
				- * alignment restrictions.
			
 
				- *
			
 
				- * \note This function is not thread-safe because no two threads can poison or
			
 
				- * unpoison memory in the same memory region simultaneously.
			
 
				- *
			
 
				- * \param addr Start of memory region.
			
 
				- * \param size Size of memory region. */
			
 
				-void __asan_poison_memory_region(void const volatile *addr, size_t size);
			
 
				-
			
 
				-/**
			
 
				- * Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
			
 
				- *
			
 
				- * This memory must be previously allocated by your program. Accessing
			
 
				- * addresses in this region is allowed until this region is poisoned again.
			
 
				- * This function could unpoison a super-region of <c>[addr, addr+size)</c> due
			
 
				- * to ASan alignment restrictions.
			
 
				- *
			
 
				- * \note This function is not thread-safe because no two threads can
			
 
				- * poison or unpoison memory in the same memory region simultaneously.
			
 
				- *
			
 
				- * \param addr Start of memory region.
			
 
				- * \param size Size of memory region. */
			
 
				-void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
			
 
				-#endif
			
 
				-
			
 
				-
			
 
				 /*-**************************************************************
			
 
				 *  Basic Types
			
 
				 *****************************************************************/
			
 
				 #if  !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
			
 
				-# include <stdint.h>
			
 
				+#  if defined(_AIX)
			
 
				+#    include <inttypes.h>
			
 
				+#  else
			
 
				+#    include <stdint.h> /* intptr_t */
			
 
				+#  endif
			
 
				   typedef   uint8_t BYTE;
			
 
				   typedef  uint16_t U16;
			
 
				   typedef   int16_t S16;
			
@@ -157,7 +81,53 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
 
				 
			
 
				 
			
 
				 /*-**************************************************************
			
 
				-*  Memory I/O
			
 
				+*  Memory I/O API
			
 
				+*****************************************************************/
			
 
				+/*=== Static platform detection ===*/
			
 
				+MEM_STATIC unsigned MEM_32bits(void);
			
 
				+MEM_STATIC unsigned MEM_64bits(void);
			
 
				+MEM_STATIC unsigned MEM_isLittleEndian(void);
			
 
				+
			
 
				+/*=== Native unaligned read/write ===*/
			
 
				+MEM_STATIC U16 MEM_read16(const void* memPtr);
			
 
				+MEM_STATIC U32 MEM_read32(const void* memPtr);
			
 
				+MEM_STATIC U64 MEM_read64(const void* memPtr);
			
 
				+MEM_STATIC size_t MEM_readST(const void* memPtr);
			
 
				+
			
 
				+MEM_STATIC void MEM_write16(void* memPtr, U16 value);
			
 
				+MEM_STATIC void MEM_write32(void* memPtr, U32 value);
			
 
				+MEM_STATIC void MEM_write64(void* memPtr, U64 value);
			
 
				+
			
 
				+/*=== Little endian unaligned read/write ===*/
			
 
				+MEM_STATIC U16 MEM_readLE16(const void* memPtr);
			
 
				+MEM_STATIC U32 MEM_readLE24(const void* memPtr);
			
 
				+MEM_STATIC U32 MEM_readLE32(const void* memPtr);
			
 
				+MEM_STATIC U64 MEM_readLE64(const void* memPtr);
			
 
				+MEM_STATIC size_t MEM_readLEST(const void* memPtr);
			
 
				+
			
 
				+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val);
			
 
				+MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val);
			
 
				+MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32);
			
 
				+MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64);
			
 
				+MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val);
			
 
				+
			
 
				+/*=== Big endian unaligned read/write ===*/
			
 
				+MEM_STATIC U32 MEM_readBE32(const void* memPtr);
			
 
				+MEM_STATIC U64 MEM_readBE64(const void* memPtr);
			
 
				+MEM_STATIC size_t MEM_readBEST(const void* memPtr);
			
 
				+
			
 
				+MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32);
			
 
				+MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64);
			
 
				+MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val);
			
 
				+
			
 
				+/*=== Byteswap ===*/
			
 
				+MEM_STATIC U32 MEM_swap32(U32 in);
			
 
				+MEM_STATIC U64 MEM_swap64(U64 in);
			
 
				+MEM_STATIC size_t MEM_swapST(size_t in);
			
 
				+
			
 
				+
			
 
				+/*-**************************************************************
			
 
				+*  Memory I/O Implementation
			
 
				 *****************************************************************/
			
 
				 /* MEM_FORCE_MEMORY_ACCESS :
			
 
				  * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
			
@@ -173,9 +143,7 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
 
				  * Prefer these methods in priority order (0 > 1 > 2)
			
 
				  */
			
 
				 #ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
			
 
				-#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
			
 
				-#    define MEM_FORCE_MEMORY_ACCESS 2
			
 
				-#  elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
			
 
				+#  if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
			
 
				 #    define MEM_FORCE_MEMORY_ACCESS 1
			
 
				 #  endif
			
 
				 #endif
			
@@ -236,37 +204,37 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v =
 
				 
			
 
				 MEM_STATIC U16 MEM_read16(const void* memPtr)
			
 
				 {
			
 
				-    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
			
 
				+    U16 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
			
 
				 }
			
 
				 
			
 
				 MEM_STATIC U32 MEM_read32(const void* memPtr)
			
 
				 {
			
 
				-    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
			
 
				+    U32 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
			
 
				 }
			
 
				 
			
 
				 MEM_STATIC U64 MEM_read64(const void* memPtr)
			
 
				 {
			
 
				-    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
			
 
				+    U64 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
			
 
				 }
			
 
				 
			
 
				 MEM_STATIC size_t MEM_readST(const void* memPtr)
			
 
				 {
			
 
				-    size_t val; memcpy(&val, memPtr, sizeof(val)); return val;
			
 
				+    size_t val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
			
 
				 }
			
 
				 
			
 
				 MEM_STATIC void MEM_write16(void* memPtr, U16 value)
			
 
				 {
			
 
				-    memcpy(memPtr, &value, sizeof(value));
			
 
				+    ZSTD_memcpy(memPtr, &value, sizeof(value));
			
 
				 }
			
 
				 
			
 
				 MEM_STATIC void MEM_write32(void* memPtr, U32 value)
			
 
				 {
			
 
				-    memcpy(memPtr, &value, sizeof(value));
			
 
				+    ZSTD_memcpy(memPtr, &value, sizeof(value));
			
 
				 }
			
 
				 
			
 
				 MEM_STATIC void MEM_write64(void* memPtr, U64 value)
			
 
				 {
			
 
				-    memcpy(memPtr, &value, sizeof(value));
			
 
				+    ZSTD_memcpy(memPtr, &value, sizeof(value));
			
 
				 }
			
 
				 
			
 
				 #endif /* MEM_FORCE_MEMORY_ACCESS */
			
@@ -338,7 +306,7 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
 
				 
			
 
				 MEM_STATIC U32 MEM_readLE24(const void* memPtr)
			
 
				 {
			
 
				-    return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
			
 
				+    return (U32)MEM_readLE16(memPtr) + ((U32)(((const BYTE*)memPtr)[2]) << 16);
			
 
				 }
			
 
				 
			
 
				 MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val)
			
@@ -445,6 +413,9 @@ MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val)
 
				         MEM_writeBE64(memPtr, (U64)val);
			
 
				 }
			
 
				 
			
 
				+/* code only tested on 32 and 64 bits systems */
			
 
				+MEM_STATIC void MEM_check(void) { DEBUG_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
			
 
				+
			
 
				 
			
 
				 #if defined (__cplusplus)
			
 
				 }
			
--- a/Utilities/cmzstd/lib/common/pool.c
+++ b/Utilities/cmzstd/lib/common/pool.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -10,9 +10,9 @@
 
				 
			
 
				 
			
 
				 /* ======   Dependencies   ======= */
			
 
				-#include <stddef.h>    /* size_t */
			
 
				+#include "zstd_deps.h" /* size_t */
			
 
				 #include "debug.h"     /* assert */
			
 
				-#include "zstd_internal.h"  /* ZSTD_malloc, ZSTD_free */
			
 
				+#include "zstd_internal.h"  /* ZSTD_customMalloc, ZSTD_customFree */
			
 
				 #include "pool.h"
			
 
				 
			
 
				 /* ======   Compiler specifics   ====== */
			
@@ -105,6 +105,10 @@ static void* POOL_thread(void* opaque) {
 
				     assert(0);  /* Unreachable */
			
 
				 }
			
 
				 
			
 
				+POOL_ctx* ZSTD_createThreadPool(size_t numThreads) {
			
 
				+    return POOL_create (numThreads, 0);
			
 
				+}
			
 
				+
			
 
				 POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
			
 
				     return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
			
 
				 }
			
@@ -115,14 +119,14 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
 
				     /* Check parameters */
			
 
				     if (!numThreads) { return NULL; }
			
 
				     /* Allocate the context and zero initialize */
			
 
				-    ctx = (POOL_ctx*)ZSTD_calloc(sizeof(POOL_ctx), customMem);
			
 
				+    ctx = (POOL_ctx*)ZSTD_customCalloc(sizeof(POOL_ctx), customMem);
			
 
				     if (!ctx) { return NULL; }
			
 
				     /* Initialize the job queue.
			
 
				      * It needs one extra space since one space is wasted to differentiate
			
 
				      * empty and full queues.
			
 
				      */
			
 
				     ctx->queueSize = queueSize + 1;
			
 
				-    ctx->queue = (POOL_job*)ZSTD_malloc(ctx->queueSize * sizeof(POOL_job), customMem);
			
 
				+    ctx->queue = (POOL_job*)ZSTD_customMalloc(ctx->queueSize * sizeof(POOL_job), customMem);
			
 
				     ctx->queueHead = 0;
			
 
				     ctx->queueTail = 0;
			
 
				     ctx->numThreadsBusy = 0;
			
@@ -136,7 +140,7 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
 
				     }
			
 
				     ctx->shutdown = 0;
			
 
				     /* Allocate space for the thread handles */
			
 
				-    ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
			
 
				+    ctx->threads = (ZSTD_pthread_t*)ZSTD_customMalloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
			
 
				     ctx->threadCapacity = 0;
			
 
				     ctx->customMem = customMem;
			
 
				     /* Check for errors */
			
@@ -179,12 +183,14 @@ void POOL_free(POOL_ctx *ctx) {
 
				     ZSTD_pthread_mutex_destroy(&ctx->queueMutex);
			
 
				     ZSTD_pthread_cond_destroy(&ctx->queuePushCond);
			
 
				     ZSTD_pthread_cond_destroy(&ctx->queuePopCond);
			
 
				-    ZSTD_free(ctx->queue, ctx->customMem);
			
 
				-    ZSTD_free(ctx->threads, ctx->customMem);
			
 
				-    ZSTD_free(ctx, ctx->customMem);
			
 
				+    ZSTD_customFree(ctx->queue, ctx->customMem);
			
 
				+    ZSTD_customFree(ctx->threads, ctx->customMem);
			
 
				+    ZSTD_customFree(ctx, ctx->customMem);
			
 
				 }
			
 
				 
			
 
				-
			
 
				+void ZSTD_freeThreadPool (ZSTD_threadPool* pool) {
			
 
				+  POOL_free (pool);
			
 
				+}
			
 
				 
			
 
				 size_t POOL_sizeof(POOL_ctx *ctx) {
			
 
				     if (ctx==NULL) return 0;  /* supports sizeof NULL */
			
@@ -203,11 +209,11 @@ static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads)
 
				         return 0;
			
 
				     }
			
 
				     /* numThreads > threadCapacity */
			
 
				-    {   ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
			
 
				+    {   ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_customMalloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
			
 
				         if (!threadPool) return 1;
			
 
				         /* replace existing thread pool */
			
 
				-        memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool));
			
 
				-        ZSTD_free(ctx->threads, ctx->customMem);
			
 
				+        ZSTD_memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool));
			
 
				+        ZSTD_customFree(ctx->threads, ctx->customMem);
			
 
				         ctx->threads = threadPool;
			
 
				         /* Initialize additional threads */
			
 
				         {   size_t threadId;
			
@@ -301,7 +307,7 @@ int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque)
 
				 struct POOL_ctx_s {
			
 
				     int dummy;
			
 
				 };
			
 
				-static POOL_ctx g_ctx;
			
 
				+static POOL_ctx g_poolCtx;
			
 
				 
			
 
				 POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
			
 
				     return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
			
@@ -311,11 +317,11 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customM
 
				     (void)numThreads;
			
 
				     (void)queueSize;
			
 
				     (void)customMem;
			
 
				-    return &g_ctx;
			
 
				+    return &g_poolCtx;
			
 
				 }
			
 
				 
			
 
				 void POOL_free(POOL_ctx* ctx) {
			
 
				-    assert(!ctx || ctx == &g_ctx);
			
 
				+    assert(!ctx || ctx == &g_poolCtx);
			
 
				     (void)ctx;
			
 
				 }
			
 
				 
			
@@ -337,7 +343,7 @@ int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) {
 
				 
			
 
				 size_t POOL_sizeof(POOL_ctx* ctx) {
			
 
				     if (ctx==NULL) return 0;  /* supports sizeof NULL */
			
 
				-    assert(ctx == &g_ctx);
			
 
				+    assert(ctx == &g_poolCtx);
			
 
				     return sizeof(*ctx);
			
 
				 }
			
 
				 
			
--- a/Utilities/cmzstd/lib/common/pool.h
+++ b/Utilities/cmzstd/lib/common/pool.h
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -16,7 +16,7 @@ extern "C" {
 
				 #endif
			
 
				 
			
 
				 
			
 
				-#include <stddef.h>   /* size_t */
			
 
				+#include "zstd_deps.h"
			
 
				 #define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_customMem */
			
 
				 #include "../zstd.h"
			
 
				 
			
--- a/Utilities/cmzstd/lib/common/threading.c
+++ b/Utilities/cmzstd/lib/common/threading.c
@@ -78,11 +78,12 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr)
 
				 
			
 
				 #if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32)
			
 
				 
			
 
				-#include <stdlib.h>
			
 
				+#define ZSTD_DEPS_NEED_MALLOC
			
 
				+#include "zstd_deps.h"
			
 
				 
			
 
				 int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr)
			
 
				 {
			
 
				-    *mutex = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t));
			
 
				+    *mutex = (pthread_mutex_t*)ZSTD_malloc(sizeof(pthread_mutex_t));
			
 
				     if (!*mutex)
			
 
				         return 1;
			
 
				     return pthread_mutex_init(*mutex, attr);
			
@@ -94,14 +95,14 @@ int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex)
 
				         return 0;
			
 
				     {
			
 
				         int const ret = pthread_mutex_destroy(*mutex);
			
 
				-        free(*mutex);
			
 
				+        ZSTD_free(*mutex);
			
 
				         return ret;
			
 
				     }
			
 
				 }
			
 
				 
			
 
				 int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr)
			
 
				 {
			
 
				-    *cond = (pthread_cond_t*)malloc(sizeof(pthread_cond_t));
			
 
				+    *cond = (pthread_cond_t*)ZSTD_malloc(sizeof(pthread_cond_t));
			
 
				     if (!*cond)
			
 
				         return 1;
			
 
				     return pthread_cond_init(*cond, attr);
			
@@ -113,7 +114,7 @@ int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond)
 
				         return 0;
			
 
				     {
			
 
				         int const ret = pthread_cond_destroy(*cond);
			
 
				-        free(*cond);
			
 
				+        ZSTD_free(*cond);
			
 
				         return ret;
			
 
				     }
			
 
				 }
			
--- a/Utilities/cmzstd/lib/common/xxhash.c
+++ b/Utilities/cmzstd/lib/common/xxhash.c
@@ -1,6 +1,6 @@
 
				 /*
			
 
				  *  xxHash - Fast Hash algorithm
			
 
				- *  Copyright (c) 2012-2020, Yann Collet, Facebook, Inc.
			
 
				+ *  Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  *
			
 
				  *  You can contact the author at :
			
 
				  *  - xxHash homepage: http://www.xxhash.com
			
@@ -30,9 +30,7 @@
 
				  * Prefer these methods in priority order (0 > 1 > 2)
			
 
				  */
			
 
				 #ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
			
 
				-#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
			
 
				-#    define XXH_FORCE_MEMORY_ACCESS 2
			
 
				-#  elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
			
 
				+#  if (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
			
 
				   (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \
			
 
				   defined(__ICCARM__)
			
 
				 #    define XXH_FORCE_MEMORY_ACCESS 1
			
@@ -77,14 +75,12 @@
 
				 *  Includes & Memory related functions
			
 
				 ***************************************/
			
 
				 /* Modify the local functions below should you wish to use some other memory routines */
			
 
				-/* for malloc(), free() */
			
 
				-#include <stdlib.h>
			
 
				-#include <stddef.h>     /* size_t */
			
 
				-static void* XXH_malloc(size_t s) { return malloc(s); }
			
 
				-static void  XXH_free  (void* p)  { free(p); }
			
 
				-/* for memcpy() */
			
 
				-#include <string.h>
			
 
				-static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
			
 
				+/* for ZSTD_malloc(), ZSTD_free() */
			
 
				+#define ZSTD_DEPS_NEED_MALLOC
			
 
				+#include "zstd_deps.h"  /* size_t, ZSTD_malloc, ZSTD_free, ZSTD_memcpy */
			
 
				+static void* XXH_malloc(size_t s) { return ZSTD_malloc(s); }
			
 
				+static void  XXH_free  (void* p)  { ZSTD_free(p); }
			
 
				+static void* XXH_memcpy(void* dest, const void* src, size_t size) { return ZSTD_memcpy(dest,src,size); }
			
 
				 
			
 
				 #ifndef XXH_STATIC_LINKING_ONLY
			
 
				 #  define XXH_STATIC_LINKING_ONLY
			
@@ -95,49 +91,13 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp
 
				 /* *************************************
			
 
				 *  Compiler Specific Options
			
 
				 ***************************************/
			
 
				-#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
			
 
				-#  define INLINE_KEYWORD inline
			
 
				-#else
			
 
				-#  define INLINE_KEYWORD
			
 
				-#endif
			
 
				-
			
 
				-#if defined(__GNUC__) || defined(__ICCARM__)
			
 
				-#  define FORCE_INLINE_ATTR __attribute__((always_inline))
			
 
				-#elif defined(_MSC_VER)
			
 
				-#  define FORCE_INLINE_ATTR __forceinline
			
 
				-#else
			
 
				-#  define FORCE_INLINE_ATTR
			
 
				-#endif
			
 
				-
			
 
				-#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
			
 
				-
			
 
				-
			
 
				-#ifdef _MSC_VER
			
 
				-#  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
			
 
				-#endif
			
 
				+#include "compiler.h"
			
 
				 
			
 
				 
			
 
				 /* *************************************
			
 
				 *  Basic Types
			
 
				 ***************************************/
			
 
				-#ifndef MEM_MODULE
			
 
				-# define MEM_MODULE
			
 
				-# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
			
 
				-#   include <stdint.h>
			
 
				-    typedef uint8_t  BYTE;
			
 
				-    typedef uint16_t U16;
			
 
				-    typedef uint32_t U32;
			
 
				-    typedef  int32_t S32;
			
 
				-    typedef uint64_t U64;
			
 
				-#  else
			
 
				-    typedef unsigned char      BYTE;
			
 
				-    typedef unsigned short     U16;
			
 
				-    typedef unsigned int       U32;
			
 
				-    typedef   signed int       S32;
			
 
				-    typedef unsigned long long U64;   /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */
			
 
				-#  endif
			
 
				-#endif
			
 
				-
			
 
				+#include "mem.h"  /* BYTE, U32, U64, size_t */
			
 
				 
			
 
				 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
			
 
				 
			
@@ -163,14 +123,14 @@ static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
 
				 static U32 XXH_read32(const void* memPtr)
			
 
				 {
			
 
				     U32 val;
			
 
				-    memcpy(&val, memPtr, sizeof(val));
			
 
				+    ZSTD_memcpy(&val, memPtr, sizeof(val));
			
 
				     return val;
			
 
				 }
			
 
				 
			
 
				 static U64 XXH_read64(const void* memPtr)
			
 
				 {
			
 
				     U64 val;
			
 
				-    memcpy(&val, memPtr, sizeof(val));
			
 
				+    ZSTD_memcpy(&val, memPtr, sizeof(val));
			
 
				     return val;
			
 
				 }
			
 
				 
			
@@ -307,12 +267,12 @@ XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
 
				 ****************************/
			
 
				 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState)
			
 
				 {
			
 
				-    memcpy(dstState, srcState, sizeof(*dstState));
			
 
				+    ZSTD_memcpy(dstState, srcState, sizeof(*dstState));
			
 
				 }
			
 
				 
			
 
				 XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState)
			
 
				 {
			
 
				-    memcpy(dstState, srcState, sizeof(*dstState));
			
 
				+    ZSTD_memcpy(dstState, srcState, sizeof(*dstState));
			
 
				 }
			
 
				 
			
 
				 
			
@@ -554,12 +514,12 @@ XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
 
				 XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
			
 
				 {
			
 
				     XXH32_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
			
 
				-    memset(&state, 0, sizeof(state)-4);   /* do not write into reserved, for future removal */
			
 
				+    ZSTD_memset(&state, 0, sizeof(state)-4);   /* do not write into reserved, for future removal */
			
 
				     state.v1 = seed + PRIME32_1 + PRIME32_2;
			
 
				     state.v2 = seed + PRIME32_2;
			
 
				     state.v3 = seed + 0;
			
 
				     state.v4 = seed - PRIME32_1;
			
 
				-    memcpy(statePtr, &state, sizeof(state));
			
 
				+    ZSTD_memcpy(statePtr, &state, sizeof(state));
			
 
				     return XXH_OK;
			
 
				 }
			
 
				 
			
@@ -567,12 +527,12 @@ XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int s
 
				 XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
			
 
				 {
			
 
				     XXH64_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
			
 
				-    memset(&state, 0, sizeof(state)-8);   /* do not write into reserved, for future removal */
			
 
				+    ZSTD_memset(&state, 0, sizeof(state)-8);   /* do not write into reserved, for future removal */
			
 
				     state.v1 = seed + PRIME64_1 + PRIME64_2;
			
 
				     state.v2 = seed + PRIME64_2;
			
 
				     state.v3 = seed + 0;
			
 
				     state.v4 = seed - PRIME64_1;
			
 
				-    memcpy(statePtr, &state, sizeof(state));
			
 
				+    ZSTD_memcpy(statePtr, &state, sizeof(state));
			
 
				     return XXH_OK;
			
 
				 }
			
 
				 
			
@@ -843,14 +803,14 @@ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t
 
				 {
			
 
				     XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
			
 
				     if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
			
 
				-    memcpy(dst, &hash, sizeof(*dst));
			
 
				+    ZSTD_memcpy(dst, &hash, sizeof(*dst));
			
 
				 }
			
 
				 
			
 
				 XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
			
 
				 {
			
 
				     XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
			
 
				     if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
			
 
				-    memcpy(dst, &hash, sizeof(*dst));
			
 
				+    ZSTD_memcpy(dst, &hash, sizeof(*dst));
			
 
				 }
			
 
				 
			
 
				 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
			
--- a/Utilities/cmzstd/lib/common/xxhash.h
+++ b/Utilities/cmzstd/lib/common/xxhash.h
@@ -1,7 +1,7 @@
 
				 /*
			
 
				  * xxHash - Extremely Fast Hash algorithm
			
 
				  * Header File
			
 
				- * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  *
			
 
				  * You can contact the author at :
			
 
				  * - xxHash source repository : https://github.com/Cyan4973/xxHash
			
@@ -55,7 +55,7 @@ extern "C" {
 
				 /* ****************************
			
 
				 *  Definitions
			
 
				 ******************************/
			
 
				-#include <stddef.h>   /* size_t */
			
 
				+#include "zstd_deps.h"
			
 
				 typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
			
 
				 
			
 
				 
			
--- a/Utilities/cmzstd/lib/common/zstd_common.c
+++ b/Utilities/cmzstd/lib/common/zstd_common.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -13,8 +13,8 @@
 
				 /*-*************************************
			
 
				 *  Dependencies
			
 
				 ***************************************/
			
 
				-#include <stdlib.h>      /* malloc, calloc, free */
			
 
				-#include <string.h>      /* memset */
			
 
				+#define ZSTD_DEPS_NEED_MALLOC
			
 
				+#include "zstd_deps.h"   /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
			
 
				 #include "error_private.h"
			
 
				 #include "zstd_internal.h"
			
 
				 
			
@@ -53,31 +53,31 @@ const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString
 
				 /*=**************************************************************
			
 
				 *  Custom allocator
			
 
				 ****************************************************************/
			
 
				-void* ZSTD_malloc(size_t size, ZSTD_customMem customMem)
			
 
				+void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
			
 
				 {
			
 
				     if (customMem.customAlloc)
			
 
				         return customMem.customAlloc(customMem.opaque, size);
			
 
				-    return malloc(size);
			
 
				+    return ZSTD_malloc(size);
			
 
				 }
			
 
				 
			
 
				-void* ZSTD_calloc(size_t size, ZSTD_customMem customMem)
			
 
				+void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
			
 
				 {
			
 
				     if (customMem.customAlloc) {
			
 
				         /* calloc implemented as malloc+memset;
			
 
				          * not as efficient as calloc, but next best guess for custom malloc */
			
 
				         void* const ptr = customMem.customAlloc(customMem.opaque, size);
			
 
				-        memset(ptr, 0, size);
			
 
				+        ZSTD_memset(ptr, 0, size);
			
 
				         return ptr;
			
 
				     }
			
 
				-    return calloc(1, size);
			
 
				+    return ZSTD_calloc(1, size);
			
 
				 }
			
 
				 
			
 
				-void ZSTD_free(void* ptr, ZSTD_customMem customMem)
			
 
				+void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
			
 
				 {
			
 
				     if (ptr!=NULL) {
			
 
				         if (customMem.customFree)
			
 
				             customMem.customFree(customMem.opaque, ptr);
			
 
				         else
			
 
				-            free(ptr);
			
 
				+            ZSTD_free(ptr);
			
 
				     }
			
 
				 }
			
--- a/Utilities/cmzstd/lib/common/zstd_deps.h
+++ b/Utilities/cmzstd/lib/common/zstd_deps.h
@@ -0,0 +1,111 @@
 
				+/*
			
 
				+ * Copyright (c) Facebook, Inc.
			
 
				+ * All rights reserved.
			
 
				+ *
			
 
				+ * This source code is licensed under both the BSD-style license (found in the
			
 
				+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
			
 
				+ * in the COPYING file in the root directory of this source tree).
			
 
				+ * You may select, at your option, one of the above-listed licenses.
			
 
				+ */
			
 
				+
			
 
				+/* This file provides common libc dependencies that zstd requires.
			
 
				+ * The purpose is to allow replacing this file with a custom implementation
			
 
				+ * to compile zstd without libc support.
			
 
				+ */
			
 
				+
			
 
				+/* Need:
			
 
				+ * NULL
			
 
				+ * INT_MAX
			
 
				+ * UINT_MAX
			
 
				+ * ZSTD_memcpy()
			
 
				+ * ZSTD_memset()
			
 
				+ * ZSTD_memmove()
			
 
				+ */
			
 
				+#ifndef ZSTD_DEPS_COMMON
			
 
				+#define ZSTD_DEPS_COMMON
			
 
				+
			
 
				+#include <limits.h>
			
 
				+#include <stddef.h>
			
 
				+#include <string.h>
			
 
				+
			
 
				+#if defined(__GNUC__) && __GNUC__ >= 4
			
 
				+# define ZSTD_memcpy(d,s,l) __builtin_memcpy((d),(s),(l))
			
 
				+# define ZSTD_memmove(d,s,l) __builtin_memmove((d),(s),(l))
			
 
				+# define ZSTD_memset(p,v,l) __builtin_memset((p),(v),(l))
			
 
				+#else
			
 
				+# define ZSTD_memcpy(d,s,l) memcpy((d),(s),(l))
			
 
				+# define ZSTD_memmove(d,s,l) memmove((d),(s),(l))
			
 
				+# define ZSTD_memset(p,v,l) memset((p),(v),(l))
			
 
				+#endif
			
 
				+
			
 
				+#endif /* ZSTD_DEPS_COMMON */
			
 
				+
			
 
				+/* Need:
			
 
				+ * ZSTD_malloc()
			
 
				+ * ZSTD_free()
			
 
				+ * ZSTD_calloc()
			
 
				+ */
			
 
				+#ifdef ZSTD_DEPS_NEED_MALLOC
			
 
				+#ifndef ZSTD_DEPS_MALLOC
			
 
				+#define ZSTD_DEPS_MALLOC
			
 
				+
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+#define ZSTD_malloc(s) malloc(s)
			
 
				+#define ZSTD_calloc(n,s) calloc((n), (s))
			
 
				+#define ZSTD_free(p) free((p))
			
 
				+
			
 
				+#endif /* ZSTD_DEPS_MALLOC */
			
 
				+#endif /* ZSTD_DEPS_NEED_MALLOC */
			
 
				+
			
 
				+/*
			
 
				+ * Provides 64-bit math support.
			
 
				+ * Need:
			
 
				+ * U64 ZSTD_div64(U64 dividend, U32 divisor)
			
 
				+ */
			
 
				+#ifdef ZSTD_DEPS_NEED_MATH64
			
 
				+#ifndef ZSTD_DEPS_MATH64
			
 
				+#define ZSTD_DEPS_MATH64
			
 
				+
			
 
				+#define ZSTD_div64(dividend, divisor) ((dividend) / (divisor))
			
 
				+
			
 
				+#endif /* ZSTD_DEPS_MATH64 */
			
 
				+#endif /* ZSTD_DEPS_NEED_MATH64 */
			
 
				+
			
 
				+/* Need:
			
 
				+ * assert()
			
 
				+ */
			
 
				+#ifdef ZSTD_DEPS_NEED_ASSERT
			
 
				+#ifndef ZSTD_DEPS_ASSERT
			
 
				+#define ZSTD_DEPS_ASSERT
			
 
				+
			
 
				+#include <assert.h>
			
 
				+
			
 
				+#endif /* ZSTD_DEPS_ASSERT */
			
 
				+#endif /* ZSTD_DEPS_NEED_ASSERT */
			
 
				+
			
 
				+/* Need:
			
 
				+ * ZSTD_DEBUG_PRINT()
			
 
				+ */
			
 
				+#ifdef ZSTD_DEPS_NEED_IO
			
 
				+#ifndef ZSTD_DEPS_IO
			
 
				+#define ZSTD_DEPS_IO
			
 
				+
			
 
				+#include <stdio.h>
			
 
				+#define ZSTD_DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__)
			
 
				+
			
 
				+#endif /* ZSTD_DEPS_IO */
			
 
				+#endif /* ZSTD_DEPS_NEED_IO */
			
 
				+
			
 
				+/* Only requested when <stdint.h> is known to be present.
			
 
				+ * Need:
			
 
				+ * intptr_t
			
 
				+ */
			
 
				+#ifdef ZSTD_DEPS_NEED_STDINT
			
 
				+#ifndef ZSTD_DEPS_STDINT
			
 
				+#define ZSTD_DEPS_STDINT
			
 
				+
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+#endif /* ZSTD_DEPS_STDINT */
			
 
				+#endif /* ZSTD_DEPS_NEED_STDINT */
			
--- a/Utilities/cmzstd/lib/common/zstd_internal.h
+++ b/Utilities/cmzstd/lib/common/zstd_internal.h
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -19,7 +19,7 @@
 
				 /*-*************************************
			
 
				 *  Dependencies
			
 
				 ***************************************/
			
 
				-#ifdef __aarch64__
			
 
				+#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
			
 
				 #include <arm_neon.h>
			
 
				 #endif
			
 
				 #include "compiler.h"
			
@@ -36,6 +36,11 @@
 
				 #  define XXH_STATIC_LINKING_ONLY  /* XXH64_state_t */
			
 
				 #endif
			
 
				 #include "xxhash.h"                /* XXH_reset, update, digest */
			
 
				+#ifndef ZSTD_NO_TRACE
			
 
				+#  include "zstd_trace.h"
			
 
				+#else
			
 
				+#  define ZSTD_TRACE 0
			
 
				+#endif
			
 
				 
			
 
				 #if defined (__cplusplus)
			
 
				 extern "C" {
			
@@ -139,7 +144,7 @@ void _force_has_format_string(const char *format, ...) {
 
				 
			
 
				 #define ZSTD_REP_NUM      3                 /* number of repcodes */
			
 
				 #define ZSTD_REP_MOVE     (ZSTD_REP_NUM-1)
			
 
				-static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
			
 
				+static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
			
 
				 
			
 
				 #define KB *(1 <<10)
			
 
				 #define MB *(1 <<20)
			
@@ -153,13 +158,13 @@ static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
 
				 #define BIT0   1
			
 
				 
			
 
				 #define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
			
 
				-static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
			
 
				-static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
			
 
				+static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
			
 
				+static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
			
 
				 
			
 
				 #define ZSTD_FRAMEIDSIZE 4   /* magic number size */
			
 
				 
			
 
				 #define ZSTD_BLOCKHEADERSIZE 3   /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
			
 
				-static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
			
 
				+static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
			
 
				 typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
			
 
				 
			
 
				 #define ZSTD_FRAMECHECKSUMSIZE 4
			
@@ -186,61 +191,75 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
 
				 #define OffFSELog   8
			
 
				 #define MaxFSELog  MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
			
 
				 
			
 
				-static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
			
 
				-                                      0, 0, 0, 0, 0, 0, 0, 0,
			
 
				-                                      1, 1, 1, 1, 2, 2, 3, 3,
			
 
				-                                      4, 6, 7, 8, 9,10,11,12,
			
 
				-                                     13,14,15,16 };
			
 
				-static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2,
			
 
				-                                             2, 2, 2, 2, 2, 1, 1, 1,
			
 
				-                                             2, 2, 2, 2, 2, 2, 2, 2,
			
 
				-                                             2, 3, 2, 1, 1, 1, 1, 1,
			
 
				-                                            -1,-1,-1,-1 };
			
 
				+#define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */
			
 
				+/* Each table cannot take more than #symbols * FSELog bits */
			
 
				+#define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8)
			
 
				+
			
 
				+static UNUSED_ATTR const U32 LL_bits[MaxLL+1] = {
			
 
				+     0, 0, 0, 0, 0, 0, 0, 0,
			
 
				+     0, 0, 0, 0, 0, 0, 0, 0,
			
 
				+     1, 1, 1, 1, 2, 2, 3, 3,
			
 
				+     4, 6, 7, 8, 9,10,11,12,
			
 
				+    13,14,15,16
			
 
				+};
			
 
				+static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = {
			
 
				+     4, 3, 2, 2, 2, 2, 2, 2,
			
 
				+     2, 2, 2, 2, 2, 1, 1, 1,
			
 
				+     2, 2, 2, 2, 2, 2, 2, 2,
			
 
				+     2, 3, 2, 1, 1, 1, 1, 1,
			
 
				+    -1,-1,-1,-1
			
 
				+};
			
 
				 #define LL_DEFAULTNORMLOG 6  /* for static allocation */
			
 
				-static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
			
 
				-
			
 
				-static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
			
 
				-                                      0, 0, 0, 0, 0, 0, 0, 0,
			
 
				-                                      0, 0, 0, 0, 0, 0, 0, 0,
			
 
				-                                      0, 0, 0, 0, 0, 0, 0, 0,
			
 
				-                                      1, 1, 1, 1, 2, 2, 3, 3,
			
 
				-                                      4, 4, 5, 7, 8, 9,10,11,
			
 
				-                                     12,13,14,15,16 };
			
 
				-static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2,
			
 
				-                                             2, 1, 1, 1, 1, 1, 1, 1,
			
 
				-                                             1, 1, 1, 1, 1, 1, 1, 1,
			
 
				-                                             1, 1, 1, 1, 1, 1, 1, 1,
			
 
				-                                             1, 1, 1, 1, 1, 1, 1, 1,
			
 
				-                                             1, 1, 1, 1, 1, 1,-1,-1,
			
 
				-                                            -1,-1,-1,-1,-1 };
			
 
				+static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
			
 
				+
			
 
				+static UNUSED_ATTR const U32 ML_bits[MaxML+1] = {
			
 
				+     0, 0, 0, 0, 0, 0, 0, 0,
			
 
				+     0, 0, 0, 0, 0, 0, 0, 0,
			
 
				+     0, 0, 0, 0, 0, 0, 0, 0,
			
 
				+     0, 0, 0, 0, 0, 0, 0, 0,
			
 
				+     1, 1, 1, 1, 2, 2, 3, 3,
			
 
				+     4, 4, 5, 7, 8, 9,10,11,
			
 
				+    12,13,14,15,16
			
 
				+};
			
 
				+static UNUSED_ATTR const S16 ML_defaultNorm[MaxML+1] = {
			
 
				+     1, 4, 3, 2, 2, 2, 2, 2,
			
 
				+     2, 1, 1, 1, 1, 1, 1, 1,
			
 
				+     1, 1, 1, 1, 1, 1, 1, 1,
			
 
				+     1, 1, 1, 1, 1, 1, 1, 1,
			
 
				+     1, 1, 1, 1, 1, 1, 1, 1,
			
 
				+     1, 1, 1, 1, 1, 1,-1,-1,
			
 
				+    -1,-1,-1,-1,-1
			
 
				+};
			
 
				 #define ML_DEFAULTNORMLOG 6  /* for static allocation */
			
 
				-static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
			
 
				-
			
 
				-static const S16 OF_defaultNorm[DefaultMaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2,
			
 
				-                                                     2, 1, 1, 1, 1, 1, 1, 1,
			
 
				-                                                     1, 1, 1, 1, 1, 1, 1, 1,
			
 
				-                                                    -1,-1,-1,-1,-1 };
			
 
				+static UNUSED_ATTR const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
			
 
				+
			
 
				+static UNUSED_ATTR const S16 OF_defaultNorm[DefaultMaxOff+1] = {
			
 
				+     1, 1, 1, 1, 1, 1, 2, 2,
			
 
				+     2, 1, 1, 1, 1, 1, 1, 1,
			
 
				+     1, 1, 1, 1, 1, 1, 1, 1,
			
 
				+    -1,-1,-1,-1,-1
			
 
				+};
			
 
				 #define OF_DEFAULTNORMLOG 5  /* for static allocation */
			
 
				-static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
			
 
				+static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
			
 
				 
			
 
				 
			
 
				 /*-*******************************************
			
 
				 *  Shared functions to include for inlining
			
 
				 *********************************************/
			
 
				 static void ZSTD_copy8(void* dst, const void* src) {
			
 
				-#ifdef __aarch64__
			
 
				+#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
			
 
				     vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
			
 
				 #else
			
 
				-    memcpy(dst, src, 8);
			
 
				+    ZSTD_memcpy(dst, src, 8);
			
 
				 #endif
			
 
				 }
			
 
				 
			
 
				 #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
			
 
				 static void ZSTD_copy16(void* dst, const void* src) {
			
 
				-#ifdef __aarch64__
			
 
				+#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
			
 
				     vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
			
 
				 #else
			
 
				-    memcpy(dst, src, 16);
			
 
				+    ZSTD_memcpy(dst, src, 16);
			
 
				 #endif
			
 
				 }
			
 
				 #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
			
@@ -255,13 +274,13 @@ typedef enum {
 
				 } ZSTD_overlap_e;
			
 
				 
			
 
				 /*! ZSTD_wildcopy() :
			
 
				- *  Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
			
 
				+ *  Custom version of ZSTD_memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
			
 
				  *  @param ovtype controls the overlap detection
			
 
				  *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
			
 
				  *         - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
			
 
				  *           The src buffer must be before the dst buffer.
			
 
				  */
			
 
				-MEM_STATIC FORCE_INLINE_ATTR 
			
 
				+MEM_STATIC FORCE_INLINE_ATTR
			
 
				 void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
			
 
				 {
			
 
				     ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
			
@@ -284,14 +303,16 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
 
				          * one COPY16() in the first call. Then, do two calls per loop since
			
 
				          * at that point it is more likely to have a high trip count.
			
 
				          */
			
 
				-#ifndef __aarch64__
			
 
				+#ifdef __aarch64__
			
 
				         do {
			
 
				             COPY16(op, ip);
			
 
				         }
			
 
				         while (op < oend);
			
 
				 #else
			
 
				-        COPY16(op, ip);
			
 
				-        if (op >= oend) return;
			
 
				+        ZSTD_copy16(op, ip);
			
 
				+        if (16 >= length) return;
			
 
				+        op += 16;
			
 
				+        ip += 16;
			
 
				         do {
			
 
				             COPY16(op, ip);
			
 
				             COPY16(op, ip);
			
@@ -305,7 +326,7 @@ MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src,
 
				 {
			
 
				     size_t const length = MIN(dstCapacity, srcSize);
			
 
				     if (length > 0) {
			
 
				-        memcpy(dst, src, length);
			
 
				+        ZSTD_memcpy(dst, src, length);
			
 
				     }
			
 
				     return length;
			
 
				 }
			
@@ -320,28 +341,46 @@ MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src,
 
				  * In which case, resize it down to free some memory */
			
 
				 #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
			
 
				 
			
 
				+/* Controls whether the input/output buffer is buffered or stable. */
			
 
				+typedef enum {
			
 
				+    ZSTD_bm_buffered = 0,  /* Buffer the input/output */
			
 
				+    ZSTD_bm_stable = 1     /* ZSTD_inBuffer/ZSTD_outBuffer is stable */
			
 
				+} ZSTD_bufferMode_e;
			
 
				+
			
 
				 
			
 
				 /*-*******************************************
			
 
				 *  Private declarations
			
 
				 *********************************************/
			
 
				 typedef struct seqDef_s {
			
 
				-    U32 offset;
			
 
				+    U32 offset;         /* offset == rawOffset + ZSTD_REP_NUM, or equivalently, offCode + 1 */
			
 
				     U16 litLength;
			
 
				     U16 matchLength;
			
 
				 } seqDef;
			
 
				 
			
 
				+/* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */
			
 
				+typedef enum {
			
 
				+    ZSTD_llt_none = 0,             /* no longLengthType */
			
 
				+    ZSTD_llt_literalLength = 1,    /* represents a long literal */
			
 
				+    ZSTD_llt_matchLength = 2       /* represents a long match */
			
 
				+} ZSTD_longLengthType_e;
			
 
				+
			
 
				 typedef struct {
			
 
				     seqDef* sequencesStart;
			
 
				-    seqDef* sequences;
			
 
				+    seqDef* sequences;      /* ptr to end of sequences */
			
 
				     BYTE* litStart;
			
 
				-    BYTE* lit;
			
 
				+    BYTE* lit;              /* ptr to end of literals */
			
 
				     BYTE* llCode;
			
 
				     BYTE* mlCode;
			
 
				     BYTE* ofCode;
			
 
				     size_t maxNbSeq;
			
 
				     size_t maxNbLit;
			
 
				-    U32   longLengthID;   /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
			
 
				-    U32   longLengthPos;
			
 
				+
			
 
				+    /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength
			
 
				+     * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
			
 
				+     * the existing value of the litLength or matchLength by 0x10000.
			
 
				+     */
			
 
				+    ZSTD_longLengthType_e   longLengthType;
			
 
				+    U32                     longLengthPos;  /* Index of the sequence to apply long length modification to */
			
 
				 } seqStore_t;
			
 
				 
			
 
				 typedef struct {
			
@@ -351,7 +390,7 @@ typedef struct {
 
				 
			
 
				 /**
			
 
				  * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
			
 
				- * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength.
			
 
				+ * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength.
			
 
				  */
			
 
				 MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
			
 
				 {
			
@@ -359,10 +398,10 @@ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore
 
				     seqLen.litLength = seq->litLength;
			
 
				     seqLen.matchLength = seq->matchLength + MINMATCH;
			
 
				     if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
			
 
				-        if (seqStore->longLengthID == 1) {
			
 
				+        if (seqStore->longLengthType == ZSTD_llt_literalLength) {
			
 
				             seqLen.litLength += 0xFFFF;
			
 
				         }
			
 
				-        if (seqStore->longLengthID == 2) {
			
 
				+        if (seqStore->longLengthType == ZSTD_llt_matchLength) {
			
 
				             seqLen.matchLength += 0xFFFF;
			
 
				         }
			
 
				     }
			
@@ -384,9 +423,9 @@ const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);   /* compress & dictBu
 
				 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr);   /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
			
 
				 
			
 
				 /* custom memory allocation functions */
			
 
				-void* ZSTD_malloc(size_t size, ZSTD_customMem customMem);
			
 
				-void* ZSTD_calloc(size_t size, ZSTD_customMem customMem);
			
 
				-void ZSTD_free(void* ptr, ZSTD_customMem customMem);
			
 
				+void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem);
			
 
				+void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem);
			
 
				+void ZSTD_customFree(void* ptr, ZSTD_customMem customMem);
			
 
				 
			
 
				 
			
 
				 MEM_STATIC U32 ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus */
			
@@ -394,8 +433,12 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus
 
				     assert(val != 0);
			
 
				     {
			
 
				 #   if defined(_MSC_VER)   /* Visual */
			
 
				-        unsigned long r=0;
			
 
				-        return _BitScanReverse(&r, val) ? (unsigned)r : 0;
			
 
				+#       if STATIC_BMI2 == 1
			
 
				+            return _lzcnt_u32(val)^31;
			
 
				+#       else
			
 
				+            unsigned long r=0;
			
 
				+            return _BitScanReverse(&r, val) ? (unsigned)r : 0;
			
 
				+#       endif
			
 
				 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */
			
 
				         return __builtin_clz (val) ^ 31;
			
 
				 #   elif defined(__ICCARM__)    /* IAR Intrinsic */
			
--- a/Utilities/cmzstd/lib/common/zstd_trace.h
+++ b/Utilities/cmzstd/lib/common/zstd_trace.h
@@ -0,0 +1,154 @@
 
				+/*
			
 
				+ * Copyright (c) Facebook, Inc.
			
 
				+ * All rights reserved.
			
 
				+ *
			
 
				+ * This source code is licensed under both the BSD-style license (found in the
			
 
				+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
			
 
				+ * in the COPYING file in the root directory of this source tree).
			
 
				+ * You may select, at your option, one of the above-listed licenses.
			
 
				+ */
			
 
				+
			
 
				+#ifndef ZSTD_TRACE_H
			
 
				+#define ZSTD_TRACE_H
			
 
				+
			
 
				+#if defined (__cplusplus)
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+#include <stddef.h>
			
 
				+
			
 
				+/* weak symbol support */
			
 
				+#if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && defined(__GNUC__) && \
			
 
				+    !defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \
			
 
				+    !defined(__CYGWIN__) && !defined(_AIX)
			
 
				+#  define ZSTD_HAVE_WEAK_SYMBOLS 1
			
 
				+#else
			
 
				+#  define ZSTD_HAVE_WEAK_SYMBOLS 0
			
 
				+#endif
			
 
				+#if ZSTD_HAVE_WEAK_SYMBOLS
			
 
				+#  define ZSTD_WEAK_ATTR __attribute__((__weak__))
			
 
				+#else
			
 
				+#  define ZSTD_WEAK_ATTR
			
 
				+#endif
			
 
				+
			
 
				+/* Only enable tracing when weak symbols are available. */
			
 
				+#ifndef ZSTD_TRACE
			
 
				+#  define ZSTD_TRACE ZSTD_HAVE_WEAK_SYMBOLS
			
 
				+#endif
			
 
				+
			
 
				+#if ZSTD_TRACE
			
 
				+
			
 
				+struct ZSTD_CCtx_s;
			
 
				+struct ZSTD_DCtx_s;
			
 
				+struct ZSTD_CCtx_params_s;
			
 
				+
			
 
				+typedef struct {
			
 
				+    /**
			
 
				+     * ZSTD_VERSION_NUMBER
			
 
				+     *
			
 
				+     * This is guaranteed to be the first member of ZSTD_trace.
			
 
				+     * Otherwise, this struct is not stable between versions. If
			
 
				+     * the version number does not match your expectation, you
			
 
				+     * should not interpret the rest of the struct.
			
 
				+     */
			
 
				+    unsigned version;
			
 
				+    /**
			
 
				+     * Non-zero if streaming (de)compression is used.
			
 
				+     */
			
 
				+    unsigned streaming;
			
 
				+    /**
			
 
				+     * The dictionary ID.
			
 
				+     */
			
 
				+    unsigned dictionaryID;
			
 
				+    /**
			
 
				+     * Is the dictionary cold?
			
 
				+     * Only set on decompression.
			
 
				+     */
			
 
				+    unsigned dictionaryIsCold;
			
 
				+    /**
			
 
				+     * The dictionary size or zero if no dictionary.
			
 
				+     */
			
 
				+    size_t dictionarySize;
			
 
				+    /**
			
 
				+     * The uncompressed size of the data.
			
 
				+     */
			
 
				+    size_t uncompressedSize;
			
 
				+    /**
			
 
				+     * The compressed size of the data.
			
 
				+     */
			
 
				+    size_t compressedSize;
			
 
				+    /**
			
 
				+     * The fully resolved CCtx parameters (NULL on decompression).
			
 
				+     */
			
 
				+    struct ZSTD_CCtx_params_s const* params;
			
 
				+    /**
			
 
				+     * The ZSTD_CCtx pointer (NULL on decompression).
			
 
				+     */
			
 
				+    struct ZSTD_CCtx_s const* cctx;
			
 
				+    /**
			
 
				+     * The ZSTD_DCtx pointer (NULL on compression).
			
 
				+     */
			
 
				+    struct ZSTD_DCtx_s const* dctx;
			
 
				+} ZSTD_Trace;
			
 
				+
			
 
				+/**
			
 
				+ * A tracing context. It must be 0 when tracing is disabled.
			
 
				+ * Otherwise, any non-zero value returned by a tracing begin()
			
 
				+ * function is presented to any subsequent calls to end().
			
 
				+ *
			
 
				+ * Any non-zero value is treated as tracing is enabled and not
			
 
				+ * interpreted by the library.
			
 
				+ *
			
 
				+ * Two possible uses are:
			
 
				+ * * A timestamp for when the begin() function was called.
			
 
				+ * * A unique key identifying the (de)compression, like the
			
 
				+ *   address of the [dc]ctx pointer if you need to track
			
 
				+ *   more information than just a timestamp.
			
 
				+ */
			
 
				+typedef unsigned long long ZSTD_TraceCtx;
			
 
				+
			
 
				+/**
			
 
				+ * Trace the beginning of a compression call.
			
 
				+ * @param cctx The dctx pointer for the compression.
			
 
				+ *             It can be used as a key to map begin() to end().
			
 
				+ * @returns Non-zero if tracing is enabled. The return value is
			
 
				+ *          passed to ZSTD_trace_compress_end().
			
 
				+ */
			
 
				+ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_compress_begin(
			
 
				+    struct ZSTD_CCtx_s const* cctx);
			
 
				+
			
 
				+/**
			
 
				+ * Trace the end of a compression call.
			
 
				+ * @param ctx The return value of ZSTD_trace_compress_begin().
			
 
				+ * @param trace The zstd tracing info.
			
 
				+ */
			
 
				+ZSTD_WEAK_ATTR void ZSTD_trace_compress_end(
			
 
				+    ZSTD_TraceCtx ctx,
			
 
				+    ZSTD_Trace const* trace);
			
 
				+
			
 
				+/**
			
 
				+ * Trace the beginning of a decompression call.
			
 
				+ * @param dctx The dctx pointer for the decompression.
			
 
				+ *             It can be used as a key to map begin() to end().
			
 
				+ * @returns Non-zero if tracing is enabled. The return value is
			
 
				+ *          passed to ZSTD_trace_compress_end().
			
 
				+ */
			
 
				+ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_decompress_begin(
			
 
				+    struct ZSTD_DCtx_s const* dctx);
			
 
				+
			
 
				+/**
			
 
				+ * Trace the end of a decompression call.
			
 
				+ * @param ctx The return value of ZSTD_trace_decompress_begin().
			
 
				+ * @param trace The zstd tracing info.
			
 
				+ */
			
 
				+ZSTD_WEAK_ATTR void ZSTD_trace_decompress_end(
			
 
				+    ZSTD_TraceCtx ctx,
			
 
				+    ZSTD_Trace const* trace);
			
 
				+
			
 
				+#endif /* ZSTD_TRACE */
			
 
				+
			
 
				+#if defined (__cplusplus)
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* ZSTD_TRACE_H */
			
--- a/Utilities/cmzstd/lib/compress/fse_compress.c
+++ b/Utilities/cmzstd/lib/compress/fse_compress.c
@@ -1,6 +1,6 @@
 
				 /* ******************************************************************
			
 
				  * FSE : Finite State Entropy encoder
			
 
				- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  *
			
 
				  *  You can contact the author at :
			
 
				  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
			
@@ -15,8 +15,6 @@
 
				 /* **************************************************************
			
 
				 *  Includes
			
 
				 ****************************************************************/
			
 
				-#include <stdlib.h>     /* malloc, free, qsort */
			
 
				-#include <string.h>     /* memcpy, memset */
			
 
				 #include "../common/compiler.h"
			
 
				 #include "../common/mem.h"        /* U32, U16, etc. */
			
 
				 #include "../common/debug.h"      /* assert, DEBUGLOG */
			
@@ -25,6 +23,9 @@
 
				 #define FSE_STATIC_LINKING_ONLY
			
 
				 #include "../common/fse.h"
			
 
				 #include "../common/error_private.h"
			
 
				+#define ZSTD_DEPS_NEED_MALLOC
			
 
				+#define ZSTD_DEPS_NEED_MATH64
			
 
				+#include "../common/zstd_deps.h"  /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */
			
 
				 
			
 
				 
			
 
				 /* **************************************************************
			
@@ -74,13 +75,15 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
 
				     void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
			
 
				     FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
			
 
				     U32 const step = FSE_TABLESTEP(tableSize);
			
 
				-    U32 cumul[FSE_MAX_SYMBOL_VALUE+2];
			
 
				 
			
 
				-    FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace;
			
 
				+    U32* cumul = (U32*)workSpace;
			
 
				+    FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2));
			
 
				+
			
 
				     U32 highThreshold = tableSize-1;
			
 
				 
			
 
				+    if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */
			
 
				+    if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
			
 
				     /* CTable header */
			
 
				-    if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
			
 
				     tableU16[-2] = (U16) tableLog;
			
 
				     tableU16[-1] = (U16) maxSymbolValue;
			
 
				     assert(tableLog < 16);   /* required for threshold strategy to work */
			
@@ -89,7 +92,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
 
				      * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
			
 
				 
			
 
				      #ifdef __clang_analyzer__
			
 
				-     memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize);   /* useless initialization, just to keep scan-build happy */
			
 
				+     ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize);   /* useless initialization, just to keep scan-build happy */
			
 
				      #endif
			
 
				 
			
 
				     /* symbol start positions */
			
@@ -168,12 +171,13 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
 
				     return 0;
			
 
				 }
			
 
				 
			
 
				-
			
 
				+#ifndef ZSTD_NO_UNUSED_FUNCTIONS
			
 
				 size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
			
 
				 {
			
 
				     FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE];   /* memset() is not necessary, even if static analyzer complain about it */
			
 
				     return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
			
 
				 }
			
 
				+#endif
			
 
				 
			
 
				 
			
 
				 
			
@@ -307,10 +311,10 @@ FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
 
				     size_t size;
			
 
				     if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
			
 
				     size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
			
 
				-    return (FSE_CTable*)malloc(size);
			
 
				+    return (FSE_CTable*)ZSTD_malloc(size);
			
 
				 }
			
 
				 
			
 
				-void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
			
 
				+void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); }
			
 
				 
			
 
				 /* provides the minimum logSize to safely represent a distribution */
			
 
				 static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
			
@@ -341,11 +345,10 @@ unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
 
				     return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
			
 
				 }
			
 
				 
			
 
				-
			
 
				 /* Secondary normalization method.
			
 
				    To be used when primary method fails. */
			
 
				 
			
 
				-static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
			
 
				+static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue, short lowProbCount)
			
 
				 {
			
 
				     short const NOT_YET_ASSIGNED = -2;
			
 
				     U32 s;
			
@@ -362,7 +365,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
 
				             continue;
			
 
				         }
			
 
				         if (count[s] <= lowThreshold) {
			
 
				-            norm[s] = -1;
			
 
				+            norm[s] = lowProbCount;
			
 
				             distributed++;
			
 
				             total -= count[s];
			
 
				             continue;
			
@@ -414,7 +417,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
 
				 
			
 
				     {   U64 const vStepLog = 62 - tableLog;
			
 
				         U64 const mid = (1ULL << (vStepLog-1)) - 1;
			
 
				-        U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total;   /* scale on remaining */
			
 
				+        U64 const rStep = ZSTD_div64((((U64)1<<vStepLog) * ToDistribute) + mid, (U32)total);   /* scale on remaining */
			
 
				         U64 tmpTotal = mid;
			
 
				         for (s=0; s<=maxSymbolValue; s++) {
			
 
				             if (norm[s]==NOT_YET_ASSIGNED) {
			
@@ -431,10 +434,9 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
 
				     return 0;
			
 
				 }
			
 
				 
			
 
				-
			
 
				 size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
			
 
				                            const unsigned* count, size_t total,
			
 
				-                           unsigned maxSymbolValue)
			
 
				+                           unsigned maxSymbolValue, unsigned useLowProbCount)
			
 
				 {
			
 
				     /* Sanity checks */
			
 
				     if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
			
@@ -443,8 +445,9 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
 
				     if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC);   /* Too small tableLog, compression potentially impossible */
			
 
				 
			
 
				     {   static U32 const rtbTable[] = {     0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
			
 
				+        short const lowProbCount = useLowProbCount ? -1 : 1;
			
 
				         U64 const scale = 62 - tableLog;
			
 
				-        U64 const step = ((U64)1<<62) / total;   /* <== here, one division ! */
			
 
				+        U64 const step = ZSTD_div64((U64)1<<62, (U32)total);   /* <== here, one division ! */
			
 
				         U64 const vStep = 1ULL<<(scale-20);
			
 
				         int stillToDistribute = 1<<tableLog;
			
 
				         unsigned s;
			
@@ -456,7 +459,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
 
				             if (count[s] == total) return 0;   /* rle special case */
			
 
				             if (count[s] == 0) { normalizedCounter[s]=0; continue; }
			
 
				             if (count[s] <= lowThreshold) {
			
 
				-                normalizedCounter[s] = -1;
			
 
				+                normalizedCounter[s] = lowProbCount;
			
 
				                 stillToDistribute--;
			
 
				             } else {
			
 
				                 short proba = (short)((count[s]*step) >> scale);
			
@@ -470,7 +473,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
 
				         }   }
			
 
				         if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
			
 
				             /* corner case, need another normalization method */
			
 
				-            size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
			
 
				+            size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue, lowProbCount);
			
 
				             if (FSE_isError(errorCode)) return errorCode;
			
 
				         }
			
 
				         else normalizedCounter[largest] += (short)stillToDistribute;
			
@@ -625,6 +628,7 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
 
				 
			
 
				 size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
			
 
				 
			
 
				+#ifndef ZSTD_NO_UNUSED_FUNCTIONS
			
 
				 /* FSE_compress_wksp() :
			
 
				  * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
			
 
				  * `wkspSize` size must be `(1<<tableLog)`.
			
@@ -643,7 +647,7 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src
 
				     size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));
			
 
				 
			
 
				     /* init conditions */
			
 
				-    if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
			
 
				+    if (wkspSize < FSE_COMPRESS_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
			
 
				     if (srcSize <= 1) return 0;  /* Not compressible */
			
 
				     if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
			
 
				     if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
			
@@ -656,7 +660,7 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src
 
				     }
			
 
				 
			
 
				     tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
			
 
				-    CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) );
			
 
				+    CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue, /* useLowProbCount */ srcSize >= 2048) );
			
 
				 
			
 
				     /* Write table description header */
			
 
				     {   CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
			
@@ -678,13 +682,16 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src
 
				 
			
 
				 typedef struct {
			
 
				     FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
			
 
				-    BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
			
 
				+    union {
			
 
				+      U32 hist_wksp[HIST_WKSP_SIZE_U32];
			
 
				+      BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
			
 
				+    } workspace;
			
 
				 } fseWkspMax_t;
			
 
				 
			
 
				 size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
			
 
				 {
			
 
				     fseWkspMax_t scratchBuffer;
			
 
				-    DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE));   /* compilation failures here means scratchBuffer is not large enough */
			
 
				+    DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_COMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE));   /* compilation failures here means scratchBuffer is not large enough */
			
 
				     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
			
 
				     return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
			
 
				 }
			
@@ -693,6 +700,6 @@ size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcS
 
				 {
			
 
				     return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
			
 
				 }
			
 
				-
			
 
				+#endif
			
 
				 
			
 
				 #endif   /* FSE_COMMONDEFS_ONLY */
			
--- a/Utilities/cmzstd/lib/compress/hist.c
+++ b/Utilities/cmzstd/lib/compress/hist.c
@@ -1,7 +1,7 @@
 
				 /* ******************************************************************
			
 
				  * hist : Histogram functions
			
 
				  * part of Finite State Entropy project
			
 
				- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  *
			
 
				  *  You can contact the author at :
			
 
				  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
			
@@ -34,7 +34,7 @@ unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
 
				     unsigned maxSymbolValue = *maxSymbolValuePtr;
			
 
				     unsigned largestCount=0;
			
 
				 
			
 
				-    memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
			
 
				+    ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
			
 
				     if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
			
 
				 
			
 
				     while (ip<end) {
			
@@ -60,9 +60,9 @@ typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
 
				  * this design makes better use of OoO cpus,
			
 
				  * and is noticeably faster when some values are heavily repeated.
			
 
				  * But it needs some additional workspace for intermediate tables.
			
 
				- * `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32.
			
 
				+ * `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32.
			
 
				  * @return : largest histogram frequency,
			
 
				- *           or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
			
 
				+ *           or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */
			
 
				 static size_t HIST_count_parallel_wksp(
			
 
				                                 unsigned* count, unsigned* maxSymbolValuePtr,
			
 
				                                 const void* source, size_t sourceSize,
			
@@ -71,22 +71,21 @@ static size_t HIST_count_parallel_wksp(
 
				 {
			
 
				     const BYTE* ip = (const BYTE*)source;
			
 
				     const BYTE* const iend = ip+sourceSize;
			
 
				-    unsigned maxSymbolValue = *maxSymbolValuePtr;
			
 
				+    size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count);
			
 
				     unsigned max=0;
			
 
				     U32* const Counting1 = workSpace;
			
 
				     U32* const Counting2 = Counting1 + 256;
			
 
				     U32* const Counting3 = Counting2 + 256;
			
 
				     U32* const Counting4 = Counting3 + 256;
			
 
				 
			
 
				-    memset(workSpace, 0, 4*256*sizeof(unsigned));
			
 
				-
			
 
				     /* safety checks */
			
 
				+    assert(*maxSymbolValuePtr <= 255);
			
 
				     if (!sourceSize) {
			
 
				-        memset(count, 0, maxSymbolValue + 1);
			
 
				+        ZSTD_memset(count, 0, countSize);
			
 
				         *maxSymbolValuePtr = 0;
			
 
				         return 0;
			
 
				     }
			
 
				-    if (!maxSymbolValue) maxSymbolValue = 255;            /* 0 == default */
			
 
				+    ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned));
			
 
				 
			
 
				     /* by stripes of 16 bytes */
			
 
				     {   U32 cached = MEM_read32(ip); ip += 4;
			
@@ -118,21 +117,18 @@ static size_t HIST_count_parallel_wksp(
 
				     /* finish last symbols */
			
 
				     while (ip<iend) Counting1[*ip++]++;
			
 
				 
			
 
				-    if (check) {   /* verify stats will fit into destination table */
			
 
				-        U32 s; for (s=255; s>maxSymbolValue; s--) {
			
 
				-            Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
			
 
				-            if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
			
 
				-    }   }
			
 
				-
			
 
				     {   U32 s;
			
 
				-        if (maxSymbolValue > 255) maxSymbolValue = 255;
			
 
				-        for (s=0; s<=maxSymbolValue; s++) {
			
 
				-            count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
			
 
				-            if (count[s] > max) max = count[s];
			
 
				+        for (s=0; s<256; s++) {
			
 
				+            Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
			
 
				+            if (Counting1[s] > max) max = Counting1[s];
			
 
				     }   }
			
 
				 
			
 
				-    while (!count[maxSymbolValue]) maxSymbolValue--;
			
 
				-    *maxSymbolValuePtr = maxSymbolValue;
			
 
				+    {   unsigned maxSymbolValue = 255;
			
 
				+        while (!Counting1[maxSymbolValue]) maxSymbolValue--;
			
 
				+        if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall);
			
 
				+        *maxSymbolValuePtr = maxSymbolValue;
			
 
				+        ZSTD_memmove(count, Counting1, countSize);   /* in case count & Counting1 are overlapping */
			
 
				+    }
			
 
				     return (size_t)max;
			
 
				 }
			
 
				 
			
@@ -152,14 +148,6 @@ size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
 
				     return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
			
 
				 }
			
 
				 
			
 
				-/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
			
 
				-size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
			
 
				-                     const void* source, size_t sourceSize)
			
 
				-{
			
 
				-    unsigned tmpCounters[HIST_WKSP_SIZE_U32];
			
 
				-    return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters));
			
 
				-}
			
 
				-
			
 
				 /* HIST_count_wksp() :
			
 
				  * Same as HIST_count(), but using an externally provided scratch buffer.
			
 
				  * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
			
@@ -175,9 +163,19 @@ size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
 
				     return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
			
 
				 }
			
 
				 
			
 
				+#ifndef ZSTD_NO_UNUSED_FUNCTIONS
			
 
				+/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
			
 
				+size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
			
 
				+                     const void* source, size_t sourceSize)
			
 
				+{
			
 
				+    unsigned tmpCounters[HIST_WKSP_SIZE_U32];
			
 
				+    return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters));
			
 
				+}
			
 
				+
			
 
				 size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
			
 
				                  const void* src, size_t srcSize)
			
 
				 {
			
 
				     unsigned tmpCounters[HIST_WKSP_SIZE_U32];
			
 
				     return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters));
			
 
				 }
			
 
				+#endif
			
--- a/Utilities/cmzstd/lib/compress/hist.h
+++ b/Utilities/cmzstd/lib/compress/hist.h
@@ -1,7 +1,7 @@
 
				 /* ******************************************************************
			
 
				  * hist : Histogram functions
			
 
				  * part of Finite State Entropy project
			
 
				- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  *
			
 
				  *  You can contact the author at :
			
 
				  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
			
@@ -14,7 +14,7 @@
 
				 ****************************************************************** */
			
 
				 
			
 
				 /* --- dependencies --- */
			
 
				-#include <stddef.h>   /* size_t */
			
 
				+#include "../common/zstd_deps.h"   /* size_t */
			
 
				 
			
 
				 
			
 
				 /* --- simple histogram functions --- */
			
--- a/Utilities/cmzstd/lib/compress/huf_compress.c
+++ b/Utilities/cmzstd/lib/compress/huf_compress.c
@@ -1,6 +1,6 @@
 
				 /* ******************************************************************
			
 
				  * Huffman encoder, part of New Generation Entropy library
			
 
				- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  *
			
 
				  *  You can contact the author at :
			
 
				  *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
			
@@ -23,8 +23,7 @@
 
				 /* **************************************************************
			
 
				 *  Includes
			
 
				 ****************************************************************/
			
 
				-#include <string.h>     /* memcpy, memset */
			
 
				-#include <stdio.h>      /* printf (debug) */
			
 
				+#include "../common/zstd_deps.h"     /* ZSTD_memcpy, ZSTD_memset */
			
 
				 #include "../common/compiler.h"
			
 
				 #include "../common/bitstream.h"
			
 
				 #include "hist.h"
			
@@ -60,7 +59,15 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
 
				  * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
			
 
				  */
			
 
				 #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
			
 
				-static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
			
 
				+
			
 
				+typedef struct {
			
 
				+    FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
			
 
				+    U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
			
 
				+    unsigned count[HUF_TABLELOG_MAX+1];
			
 
				+    S16 norm[HUF_TABLELOG_MAX+1];
			
 
				+} HUF_CompressWeightsWksp;
			
 
				+
			
 
				+static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize)
			
 
				 {
			
 
				     BYTE* const ostart = (BYTE*) dst;
			
 
				     BYTE* op = ostart;
			
@@ -68,33 +75,30 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
 
				 
			
 
				     unsigned maxSymbolValue = HUF_TABLELOG_MAX;
			
 
				     U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
			
 
				+    HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)workspace;
			
 
				 
			
 
				-    FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
			
 
				-    BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
			
 
				-
			
 
				-    unsigned count[HUF_TABLELOG_MAX+1];
			
 
				-    S16 norm[HUF_TABLELOG_MAX+1];
			
 
				+    if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
			
 
				 
			
 
				     /* init conditions */
			
 
				     if (wtSize <= 1) return 0;  /* Not compressible */
			
 
				 
			
 
				     /* Scan input and build symbol stats */
			
 
				-    {   unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize);   /* never fails */
			
 
				+    {   unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize);   /* never fails */
			
 
				         if (maxCount == wtSize) return 1;   /* only a single symbol in src : rle */
			
 
				         if (maxCount == 1) return 0;        /* each symbol present maximum once => not compressible */
			
 
				     }
			
 
				 
			
 
				     tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
			
 
				-    CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) );
			
 
				+    CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
			
 
				 
			
 
				     /* Write table description header */
			
 
				-    {   CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) );
			
 
				+    {   CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );
			
 
				         op += hSize;
			
 
				     }
			
 
				 
			
 
				     /* Compress */
			
 
				-    CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
			
 
				-    {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) );
			
 
				+    CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );
			
 
				+    {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );
			
 
				         if (cSize == 0) return 0;   /* not enough space for compressed data */
			
 
				         op += cSize;
			
 
				     }
			
@@ -103,34 +107,33 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
 
				 }
			
 
				 
			
 
				 
			
 
				-struct HUF_CElt_s {
			
 
				-  U16  val;
			
 
				-  BYTE nbBits;
			
 
				-};   /* typedef'd to HUF_CElt within "huf.h" */
			
 
				-
			
 
				-/*! HUF_writeCTable() :
			
 
				-    `CTable` : Huffman tree to save, using huf representation.
			
 
				-    @return : size of saved CTable */
			
 
				-size_t HUF_writeCTable (void* dst, size_t maxDstSize,
			
 
				-                        const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
			
 
				-{
			
 
				+typedef struct {
			
 
				+    HUF_CompressWeightsWksp wksp;
			
 
				     BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];   /* precomputed conversion table */
			
 
				     BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
			
 
				+} HUF_WriteCTableWksp;
			
 
				+
			
 
				+size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
			
 
				+                            const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
			
 
				+                            void* workspace, size_t workspaceSize)
			
 
				+{
			
 
				     BYTE* op = (BYTE*)dst;
			
 
				     U32 n;
			
 
				+    HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)workspace;
			
 
				 
			
 
				-     /* check conditions */
			
 
				+    /* check conditions */
			
 
				+    if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
			
 
				     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
			
 
				 
			
 
				     /* convert to weight */
			
 
				-    bitsToWeight[0] = 0;
			
 
				+    wksp->bitsToWeight[0] = 0;
			
 
				     for (n=1; n<huffLog+1; n++)
			
 
				-        bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
			
 
				+        wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
			
 
				     for (n=0; n<maxSymbolValue; n++)
			
 
				-        huffWeight[n] = bitsToWeight[CTable[n].nbBits];
			
 
				+        wksp->huffWeight[n] = wksp->bitsToWeight[CTable[n].nbBits];
			
 
				 
			
 
				     /* attempt weights compression by FSE */
			
 
				-    {   CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) );
			
 
				+    {   CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
			
 
				         if ((hSize>1) & (hSize < maxSymbolValue/2)) {   /* FSE compressed */
			
 
				             op[0] = (BYTE)hSize;
			
 
				             return hSize+1;
			
@@ -140,12 +143,22 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
 
				     if (maxSymbolValue > (256-128)) return ERROR(GENERIC);   /* should not happen : likely means source cannot be compressed */
			
 
				     if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall);   /* not enough space within dst buffer */
			
 
				     op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
			
 
				-    huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause msan issue in final combination */
			
 
				+    wksp->huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause msan issue in final combination */
			
 
				     for (n=0; n<maxSymbolValue; n+=2)
			
 
				-        op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
			
 
				+        op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]);
			
 
				     return ((maxSymbolValue+1)/2) + 1;
			
 
				 }
			
 
				 
			
 
				+/*! HUF_writeCTable() :
			
 
				+    `CTable` : Huffman tree to save, using huf representation.
			
 
				+    @return : size of saved CTable */
			
 
				+size_t HUF_writeCTable (void* dst, size_t maxDstSize,
			
 
				+                        const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
			
 
				+{
			
 
				+    HUF_WriteCTableWksp wksp;
			
 
				+    return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp));
			
 
				+}
			
 
				+
			
 
				 
			
 
				 size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
			
 
				 {
			
@@ -156,6 +169,7 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
 
				 
			
 
				     /* get symbol weights */
			
 
				     CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
			
 
				+    *hasZeroWeights = (rankVal[0] > 0);
			
 
				 
			
 
				     /* check result */
			
 
				     if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
			
@@ -164,16 +178,14 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
 
				     /* Prepare base value per rank */
			
 
				     {   U32 n, nextRankStart = 0;
			
 
				         for (n=1; n<=tableLog; n++) {
			
 
				-            U32 current = nextRankStart;
			
 
				+            U32 curr = nextRankStart;
			
 
				             nextRankStart += (rankVal[n] << (n-1));
			
 
				-            rankVal[n] = current;
			
 
				+            rankVal[n] = curr;
			
 
				     }   }
			
 
				 
			
 
				     /* fill nbBits */
			
 
				-    *hasZeroWeights = 0;
			
 
				     {   U32 n; for (n=0; n<nbSymbols; n++) {
			
 
				             const U32 w = huffWeight[n];
			
 
				-            *hasZeroWeights |= (w == 0);
			
 
				             CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0);
			
 
				     }   }
			
 
				 
			
@@ -212,32 +224,63 @@ typedef struct nodeElt_s {
 
				     BYTE nbBits;
			
 
				 } nodeElt;
			
 
				 
			
 
				+/**
			
 
				+ * HUF_setMaxHeight():
			
 
				+ * Enforces maxNbBits on the Huffman tree described in huffNode.
			
 
				+ *
			
 
				+ * It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts
			
 
				+ * the tree to so that it is a valid canonical Huffman tree.
			
 
				+ *
			
 
				+ * @pre               The sum of the ranks of each symbol == 2^largestBits,
			
 
				+ *                    where largestBits == huffNode[lastNonNull].nbBits.
			
 
				+ * @post              The sum of the ranks of each symbol == 2^largestBits,
			
 
				+ *                    where largestBits is the return value <= maxNbBits.
			
 
				+ *
			
 
				+ * @param huffNode    The Huffman tree modified in place to enforce maxNbBits.
			
 
				+ * @param lastNonNull The symbol with the lowest count in the Huffman tree.
			
 
				+ * @param maxNbBits   The maximum allowed number of bits, which the Huffman tree
			
 
				+ *                    may not respect. After this function the Huffman tree will
			
 
				+ *                    respect maxNbBits.
			
 
				+ * @return            The maximum number of bits of the Huffman tree after adjustment,
			
 
				+ *                    necessarily no more than maxNbBits.
			
 
				+ */
			
 
				 static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
			
 
				 {
			
 
				     const U32 largestBits = huffNode[lastNonNull].nbBits;
			
 
				-    if (largestBits <= maxNbBits) return largestBits;   /* early exit : no elt > maxNbBits */
			
 
				+    /* early exit : no elt > maxNbBits, so the tree is already valid. */
			
 
				+    if (largestBits <= maxNbBits) return largestBits;
			
 
				 
			
 
				     /* there are several too large elements (at least >= 2) */
			
 
				     {   int totalCost = 0;
			
 
				         const U32 baseCost = 1 << (largestBits - maxNbBits);
			
 
				         int n = (int)lastNonNull;
			
 
				 
			
 
				+        /* Adjust any ranks > maxNbBits to maxNbBits.
			
 
				+         * Compute totalCost, which is how far the sum of the ranks is
			
 
				+         * we are over 2^largestBits after adjust the offending ranks.
			
 
				+         */
			
 
				         while (huffNode[n].nbBits > maxNbBits) {
			
 
				             totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
			
 
				             huffNode[n].nbBits = (BYTE)maxNbBits;
			
 
				-            n --;
			
 
				-        }  /* n stops at huffNode[n].nbBits <= maxNbBits */
			
 
				-        while (huffNode[n].nbBits == maxNbBits) n--;   /* n end at index of smallest symbol using < maxNbBits */
			
 
				+            n--;
			
 
				+        }
			
 
				+        /* n stops at huffNode[n].nbBits <= maxNbBits */
			
 
				+        assert(huffNode[n].nbBits <= maxNbBits);
			
 
				+        /* n end at index of smallest symbol using < maxNbBits */
			
 
				+        while (huffNode[n].nbBits == maxNbBits) --n;
			
 
				 
			
 
				-        /* renorm totalCost */
			
 
				-        totalCost >>= (largestBits - maxNbBits);  /* note : totalCost is necessarily a multiple of baseCost */
			
 
				+        /* renorm totalCost from 2^largestBits to 2^maxNbBits
			
 
				+         * note : totalCost is necessarily a multiple of baseCost */
			
 
				+        assert((totalCost & (baseCost - 1)) == 0);
			
 
				+        totalCost >>= (largestBits - maxNbBits);
			
 
				+        assert(totalCost > 0);
			
 
				 
			
 
				         /* repay normalized cost */
			
 
				         {   U32 const noSymbol = 0xF0F0F0F0;
			
 
				             U32 rankLast[HUF_TABLELOG_MAX+2];
			
 
				 
			
 
				-            /* Get pos of last (smallest) symbol per rank */
			
 
				-            memset(rankLast, 0xF0, sizeof(rankLast));
			
 
				+            /* Get pos of last (smallest = lowest cum. count) symbol per rank */
			
 
				+            ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
			
 
				             {   U32 currentNbBits = maxNbBits;
			
 
				                 int pos;
			
 
				                 for (pos=n ; pos >= 0; pos--) {
			
@@ -247,34 +290,65 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
 
				             }   }
			
 
				 
			
 
				             while (totalCost > 0) {
			
 
				+                /* Try to reduce the next power of 2 above totalCost because we
			
 
				+                 * gain back half the rank.
			
 
				+                 */
			
 
				                 U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1;
			
 
				                 for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
			
 
				                     U32 const highPos = rankLast[nBitsToDecrease];
			
 
				                     U32 const lowPos = rankLast[nBitsToDecrease-1];
			
 
				                     if (highPos == noSymbol) continue;
			
 
				+                    /* Decrease highPos if no symbols of lowPos or if it is
			
 
				+                     * not cheaper to remove 2 lowPos than highPos.
			
 
				+                     */
			
 
				                     if (lowPos == noSymbol) break;
			
 
				                     {   U32 const highTotal = huffNode[highPos].count;
			
 
				                         U32 const lowTotal = 2 * huffNode[lowPos].count;
			
 
				                         if (highTotal <= lowTotal) break;
			
 
				                 }   }
			
 
				                 /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
			
 
				+                assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1);
			
 
				                 /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
			
 
				                 while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
			
 
				-                    nBitsToDecrease ++;
			
 
				+                    nBitsToDecrease++;
			
 
				+                assert(rankLast[nBitsToDecrease] != noSymbol);
			
 
				+                /* Increase the number of bits to gain back half the rank cost. */
			
 
				                 totalCost -= 1 << (nBitsToDecrease-1);
			
 
				+                huffNode[rankLast[nBitsToDecrease]].nbBits++;
			
 
				+
			
 
				+                /* Fix up the new rank.
			
 
				+                 * If the new rank was empty, this symbol is now its smallest.
			
 
				+                 * Otherwise, this symbol will be the largest in the new rank so no adjustment.
			
 
				+                 */
			
 
				                 if (rankLast[nBitsToDecrease-1] == noSymbol)
			
 
				-                    rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];   /* this rank is no longer empty */
			
 
				-                huffNode[rankLast[nBitsToDecrease]].nbBits ++;
			
 
				+                    rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];
			
 
				+                /* Fix up the old rank.
			
 
				+                 * If the symbol was at position 0, meaning it was the highest weight symbol in the tree,
			
 
				+                 * it must be the only symbol in its rank, so the old rank now has no symbols.
			
 
				+                 * Otherwise, since the Huffman nodes are sorted by count, the previous position is now
			
 
				+                 * the smallest node in the rank. If the previous position belongs to a different rank,
			
 
				+                 * then the rank is now empty.
			
 
				+                 */
			
 
				                 if (rankLast[nBitsToDecrease] == 0)    /* special case, reached largest symbol */
			
 
				                     rankLast[nBitsToDecrease] = noSymbol;
			
 
				                 else {
			
 
				                     rankLast[nBitsToDecrease]--;
			
 
				                     if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
			
 
				                         rankLast[nBitsToDecrease] = noSymbol;   /* this rank is now empty */
			
 
				-            }   }   /* while (totalCost > 0) */
			
 
				-
			
 
				+                }
			
 
				+            }   /* while (totalCost > 0) */
			
 
				+
			
 
				+            /* If we've removed too much weight, then we have to add it back.
			
 
				+             * To avoid overshooting again, we only adjust the smallest rank.
			
 
				+             * We take the largest nodes from the lowest rank 0 and move them
			
 
				+             * to rank 1. There's guaranteed to be enough rank 0 symbols because
			
 
				+             * TODO.
			
 
				+             */
			
 
				             while (totalCost < 0) {  /* Sometimes, cost correction overshoot */
			
 
				-                if (rankLast[1] == noSymbol) {  /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
			
 
				+                /* special case : no rank 1 symbol (using maxNbBits-1);
			
 
				+                 * let's create one from largest rank 0 (using maxNbBits).
			
 
				+                 */
			
 
				+                if (rankLast[1] == noSymbol) {
			
 
				                     while (huffNode[n].nbBits == maxNbBits) n--;
			
 
				                     huffNode[n+1].nbBits--;
			
 
				                     assert(n >= 0);
			
@@ -285,14 +359,16 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
 
				                 huffNode[ rankLast[1] + 1 ].nbBits--;
			
 
				                 rankLast[1]++;
			
 
				                 totalCost ++;
			
 
				-    }   }   }   /* there are several too large elements (at least >= 2) */
			
 
				+            }
			
 
				+        }   /* repay normalized cost */
			
 
				+    }   /* there are several too large elements (at least >= 2) */
			
 
				 
			
 
				     return maxNbBits;
			
 
				 }
			
 
				 
			
 
				 typedef struct {
			
 
				     U32 base;
			
 
				-    U32 current;
			
 
				+    U32 curr;
			
 
				 } rankPos;
			
 
				 
			
 
				 typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
			
@@ -304,21 +380,45 @@ typedef struct {
 
				   rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
			
 
				 } HUF_buildCTable_wksp_tables;
			
 
				 
			
 
				+/**
			
 
				+ * HUF_sort():
			
 
				+ * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
			
 
				+ *
			
 
				+ * @param[out] huffNode       Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
			
 
				+ *                            Must have (maxSymbolValue + 1) entries.
			
 
				+ * @param[in]  count          Histogram of the symbols.
			
 
				+ * @param[in]  maxSymbolValue Maximum symbol value.
			
 
				+ * @param      rankPosition   This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
			
 
				+ */
			
 
				 static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition)
			
 
				 {
			
 
				-    U32 n;
			
 
				-
			
 
				-    memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
			
 
				-    for (n=0; n<=maxSymbolValue; n++) {
			
 
				-        U32 r = BIT_highbit32(count[n] + 1);
			
 
				-        rankPosition[r].base ++;
			
 
				+    int n;
			
 
				+    int const maxSymbolValue1 = (int)maxSymbolValue + 1;
			
 
				+
			
 
				+    /* Compute base and set curr to base.
			
 
				+     * For symbol s let lowerRank = BIT_highbit32(count[n]+1) and rank = lowerRank + 1.
			
 
				+     * Then 2^lowerRank <= count[n]+1 <= 2^rank.
			
 
				+     * We attribute each symbol to lowerRank's base value, because we want to know where
			
 
				+     * each rank begins in the output, so for rank R we want to count ranks R+1 and above.
			
 
				+     */
			
 
				+    ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
			
 
				+    for (n = 0; n < maxSymbolValue1; ++n) {
			
 
				+        U32 lowerRank = BIT_highbit32(count[n] + 1);
			
 
				+        rankPosition[lowerRank].base++;
			
 
				     }
			
 
				-    for (n=30; n>0; n--) rankPosition[n-1].base += rankPosition[n].base;
			
 
				-    for (n=0; n<32; n++) rankPosition[n].current = rankPosition[n].base;
			
 
				-    for (n=0; n<=maxSymbolValue; n++) {
			
 
				+    assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);
			
 
				+    for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {
			
 
				+        rankPosition[n-1].base += rankPosition[n].base;
			
 
				+        rankPosition[n-1].curr = rankPosition[n-1].base;
			
 
				+    }
			
 
				+    /* Sort */
			
 
				+    for (n = 0; n < maxSymbolValue1; ++n) {
			
 
				         U32 const c = count[n];
			
 
				         U32 const r = BIT_highbit32(c+1) + 1;
			
 
				-        U32 pos = rankPosition[r].current++;
			
 
				+        U32 pos = rankPosition[r].curr++;
			
 
				+        /* Insert into the correct position in the rank.
			
 
				+         * We have at most 256 symbols, so this insertion should be fine.
			
 
				+         */
			
 
				         while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) {
			
 
				             huffNode[pos] = huffNode[pos-1];
			
 
				             pos--;
			
@@ -335,28 +435,20 @@ static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValu
 
				  */
			
 
				 #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
			
 
				 
			
 
				-size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
			
 
				+/* HUF_buildTree():
			
 
				+ * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree.
			
 
				+ *
			
 
				+ * @param huffNode        The array sorted by HUF_sort(). Builds the Huffman tree in this array.
			
 
				+ * @param maxSymbolValue  The maximum symbol value.
			
 
				+ * @return                The smallest node in the Huffman tree (by count).
			
 
				+ */
			
 
				+static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
			
 
				 {
			
 
				-    HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace;
			
 
				-    nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
			
 
				-    nodeElt* const huffNode = huffNode0+1;
			
 
				+    nodeElt* const huffNode0 = huffNode - 1;
			
 
				     int nonNullRank;
			
 
				     int lowS, lowN;
			
 
				     int nodeNb = STARTNODE;
			
 
				     int n, nodeRoot;
			
 
				-
			
 
				-    /* safety checks */
			
 
				-    if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
			
 
				-    if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
			
 
				-      return ERROR(workSpace_tooSmall);
			
 
				-    if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
			
 
				-    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
			
 
				-      return ERROR(maxSymbolValue_tooLarge);
			
 
				-    memset(huffNode0, 0, sizeof(huffNodeTable));
			
 
				-
			
 
				-    /* sort, decreasing order */
			
 
				-    HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
			
 
				-
			
 
				     /* init for parents */
			
 
				     nonNullRank = (int)maxSymbolValue;
			
 
				     while(huffNode[nonNullRank].count == 0) nonNullRank--;
			
@@ -383,42 +475,72 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbo
 
				     for (n=0; n<=nonNullRank; n++)
			
 
				         huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
			
 
				 
			
 
				+    return nonNullRank;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * HUF_buildCTableFromTree():
			
 
				+ * Build the CTable given the Huffman tree in huffNode.
			
 
				+ *
			
 
				+ * @param[out] CTable         The output Huffman CTable.
			
 
				+ * @param      huffNode       The Huffman tree.
			
 
				+ * @param      nonNullRank    The last and smallest node in the Huffman tree.
			
 
				+ * @param      maxSymbolValue The maximum symbol value.
			
 
				+ * @param      maxNbBits      The exact maximum number of bits used in the Huffman tree.
			
 
				+ */
			
 
				+static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
			
 
				+{
			
 
				+    /* fill result into ctable (val, nbBits) */
			
 
				+    int n;
			
 
				+    U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
			
 
				+    U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
			
 
				+    int const alphabetSize = (int)(maxSymbolValue + 1);
			
 
				+    for (n=0; n<=nonNullRank; n++)
			
 
				+        nbPerRank[huffNode[n].nbBits]++;
			
 
				+    /* determine starting value per rank */
			
 
				+    {   U16 min = 0;
			
 
				+        for (n=(int)maxNbBits; n>0; n--) {
			
 
				+            valPerRank[n] = min;      /* get starting value within each rank */
			
 
				+            min += nbPerRank[n];
			
 
				+            min >>= 1;
			
 
				+    }   }
			
 
				+    for (n=0; n<alphabetSize; n++)
			
 
				+        CTable[huffNode[n].byte].nbBits = huffNode[n].nbBits;   /* push nbBits per symbol, symbol order */
			
 
				+    for (n=0; n<alphabetSize; n++)
			
 
				+        CTable[n].val = valPerRank[CTable[n].nbBits]++;   /* assign value within rank, symbol order */
			
 
				+}
			
 
				+
			
 
				+size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
			
 
				+{
			
 
				+    HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace;
			
 
				+    nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
			
 
				+    nodeElt* const huffNode = huffNode0+1;
			
 
				+    int nonNullRank;
			
 
				+
			
 
				+    /* safety checks */
			
 
				+    if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
			
 
				+    if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
			
 
				+      return ERROR(workSpace_tooSmall);
			
 
				+    if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
			
 
				+    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
			
 
				+      return ERROR(maxSymbolValue_tooLarge);
			
 
				+    ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
			
 
				+
			
 
				+    /* sort, decreasing order */
			
 
				+    HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
			
 
				+
			
 
				+    /* build tree */
			
 
				+    nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
			
 
				+
			
 
				     /* enforce maxTableLog */
			
 
				     maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
			
 
				+    if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC);   /* check fit into table */
			
 
				 
			
 
				-    /* fill result into tree (val, nbBits) */
			
 
				-    {   U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
			
 
				-        U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
			
 
				-        int const alphabetSize = (int)(maxSymbolValue + 1);
			
 
				-        if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC);   /* check fit into table */
			
 
				-        for (n=0; n<=nonNullRank; n++)
			
 
				-            nbPerRank[huffNode[n].nbBits]++;
			
 
				-        /* determine stating value per rank */
			
 
				-        {   U16 min = 0;
			
 
				-            for (n=(int)maxNbBits; n>0; n--) {
			
 
				-                valPerRank[n] = min;      /* get starting value within each rank */
			
 
				-                min += nbPerRank[n];
			
 
				-                min >>= 1;
			
 
				-        }   }
			
 
				-        for (n=0; n<alphabetSize; n++)
			
 
				-            tree[huffNode[n].byte].nbBits = huffNode[n].nbBits;   /* push nbBits per symbol, symbol order */
			
 
				-        for (n=0; n<alphabetSize; n++)
			
 
				-            tree[n].val = valPerRank[tree[n].nbBits]++;   /* assign value within rank, symbol order */
			
 
				-    }
			
 
				+    HUF_buildCTableFromTree(tree, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
			
 
				 
			
 
				     return maxNbBits;
			
 
				 }
			
 
				 
			
 
				-/** HUF_buildCTable() :
			
 
				- * @return : maxNbBits
			
 
				- *  Note : count is used before tree is written, so they can safely overlap
			
 
				- */
			
 
				-size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
			
 
				-{
			
 
				-    HUF_buildCTable_wksp_tables workspace;
			
 
				-    return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace));
			
 
				-}
			
 
				-
			
 
				 size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
			
 
				 {
			
 
				     size_t nbBits = 0;
			
@@ -629,29 +751,33 @@ static size_t HUF_compressCTable_internal(
 
				 typedef struct {
			
 
				     unsigned count[HUF_SYMBOLVALUE_MAX + 1];
			
 
				     HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
			
 
				-    HUF_buildCTable_wksp_tables buildCTable_wksp;
			
 
				+    union {
			
 
				+        HUF_buildCTable_wksp_tables buildCTable_wksp;
			
 
				+        HUF_WriteCTableWksp writeCTable_wksp;
			
 
				+    } wksps;
			
 
				 } HUF_compress_tables_t;
			
 
				 
			
 
				 /* HUF_compress_internal() :
			
 
				- * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
			
 
				+ * `workSpace_align4` must be aligned on 4-bytes boundaries,
			
 
				+ * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U32 unsigned */
			
 
				 static size_t
			
 
				 HUF_compress_internal (void* dst, size_t dstSize,
			
 
				                  const void* src, size_t srcSize,
			
 
				                        unsigned maxSymbolValue, unsigned huffLog,
			
 
				                        HUF_nbStreams_e nbStreams,
			
 
				-                       void* workSpace, size_t wkspSize,
			
 
				+                       void* workSpace_align4, size_t wkspSize,
			
 
				                        HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
			
 
				                  const int bmi2)
			
 
				 {
			
 
				-    HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
			
 
				+    HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace_align4;
			
 
				     BYTE* const ostart = (BYTE*)dst;
			
 
				     BYTE* const oend = ostart + dstSize;
			
 
				     BYTE* op = ostart;
			
 
				 
			
 
				     HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE);
			
 
				+    assert(((size_t)workSpace_align4 & 3) == 0);   /* must be aligned on 4-bytes boundaries */
			
 
				 
			
 
				     /* checks & inits */
			
 
				-    if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
			
 
				     if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
			
 
				     if (!srcSize) return 0;  /* Uncompressed */
			
 
				     if (!dstSize) return 0;  /* cannot fit anything within dst budget */
			
@@ -669,7 +795,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
 
				     }
			
 
				 
			
 
				     /* Scan input and build symbol stats */
			
 
				-    {   CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) );
			
 
				+    {   CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace_align4, wkspSize) );
			
 
				         if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; }   /* single symbol, rle */
			
 
				         if (largest <= (srcSize >> 7)+4) return 0;   /* heuristic : probably not compressible enough */
			
 
				     }
			
@@ -691,16 +817,17 @@ HUF_compress_internal (void* dst, size_t dstSize,
 
				     huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
			
 
				     {   size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
			
 
				                                             maxSymbolValue, huffLog,
			
 
				-                                            &table->buildCTable_wksp, sizeof(table->buildCTable_wksp));
			
 
				+                                            &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
			
 
				         CHECK_F(maxBits);
			
 
				         huffLog = (U32)maxBits;
			
 
				         /* Zero unused symbols in CTable, so we can check it for validity */
			
 
				-        memset(table->CTable + (maxSymbolValue + 1), 0,
			
 
				+        ZSTD_memset(table->CTable + (maxSymbolValue + 1), 0,
			
 
				                sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
			
 
				     }
			
 
				 
			
 
				     /* Write table description header */
			
 
				-    {   CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
			
 
				+    {   CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
			
 
				+                                              &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );
			
 
				         /* Check if using previous huffman table is beneficial */
			
 
				         if (repeat && *repeat != HUF_repeat_none) {
			
 
				             size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
			
@@ -716,7 +843,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
 
				         op += hSize;
			
 
				         if (repeat) { *repeat = HUF_repeat_none; }
			
 
				         if (oldHufTable)
			
 
				-            memcpy(oldHufTable, table->CTable, sizeof(table->CTable));  /* Save new table */
			
 
				+            ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable));  /* Save new table */
			
 
				     }
			
 
				     return HUF_compressCTable_internal(ostart, op, oend,
			
 
				                                        src, srcSize,
			
@@ -747,14 +874,6 @@ size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
 
				                                  repeat, preferRepeat, bmi2);
			
 
				 }
			
 
				 
			
 
				-size_t HUF_compress1X (void* dst, size_t dstSize,
			
 
				-                 const void* src, size_t srcSize,
			
 
				-                 unsigned maxSymbolValue, unsigned huffLog)
			
 
				-{
			
 
				-    unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
			
 
				-    return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
			
 
				-}
			
 
				-
			
 
				 /* HUF_compress4X_repeat():
			
 
				  * compress input using 4 streams.
			
 
				  * provide workspace to generate compression tables */
			
@@ -784,6 +903,25 @@ size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
 
				                                  hufTable, repeat, preferRepeat, bmi2);
			
 
				 }
			
 
				 
			
 
				+#ifndef ZSTD_NO_UNUSED_FUNCTIONS
			
 
				+/** HUF_buildCTable() :
			
 
				+ * @return : maxNbBits
			
 
				+ *  Note : count is used before tree is written, so they can safely overlap
			
 
				+ */
			
 
				+size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
			
 
				+{
			
 
				+    HUF_buildCTable_wksp_tables workspace;
			
 
				+    return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace));
			
 
				+}
			
 
				+
			
 
				+size_t HUF_compress1X (void* dst, size_t dstSize,
			
 
				+                 const void* src, size_t srcSize,
			
 
				+                 unsigned maxSymbolValue, unsigned huffLog)
			
 
				+{
			
 
				+    unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
			
 
				+    return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
			
 
				+}
			
 
				+
			
 
				 size_t HUF_compress2 (void* dst, size_t dstSize,
			
 
				                 const void* src, size_t srcSize,
			
 
				                 unsigned maxSymbolValue, unsigned huffLog)
			
@@ -796,3 +934,4 @@ size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSi
 
				 {
			
 
				     return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
			
 
				 }
			
 
				+#endif
			
--- a/Utilities/cmzstd/lib/compress/zstd_compress.c
+++ b/Utilities/cmzstd/lib/compress/zstd_compress.c
--- a/Utilities/cmzstd/lib/compress/zstd_compress_internal.h
+++ b/Utilities/cmzstd/lib/compress/zstd_compress_internal.h
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -28,7 +28,6 @@
 
				 extern "C" {
			
 
				 #endif
			
 
				 
			
 
				-
			
 
				 /*-*************************************
			
 
				 *  Constants
			
 
				 ***************************************/
			
@@ -64,7 +63,7 @@ typedef struct {
 
				 } ZSTD_localDict;
			
 
				 
			
 
				 typedef struct {
			
 
				-    U32 CTable[HUF_CTABLE_SIZE_U32(255)];
			
 
				+    HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)];
			
 
				     HUF_repeat repeatMode;
			
 
				 } ZSTD_hufCTables_t;
			
 
				 
			
@@ -82,11 +81,75 @@ typedef struct {
 
				     ZSTD_fseCTables_t fse;
			
 
				 } ZSTD_entropyCTables_t;
			
 
				 
			
 
				+/***********************************************
			
 
				+*  Entropy buffer statistics structs and funcs *
			
 
				+***********************************************/
			
 
				+/** ZSTD_hufCTablesMetadata_t :
			
 
				+ *  Stores Literals Block Type for a super-block in hType, and
			
 
				+ *  huffman tree description in hufDesBuffer.
			
 
				+ *  hufDesSize refers to the size of huffman tree description in bytes.
			
 
				+ *  This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
			
 
				 typedef struct {
			
 
				-    U32 off;
			
 
				-    U32 len;
			
 
				+    symbolEncodingType_e hType;
			
 
				+    BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
			
 
				+    size_t hufDesSize;
			
 
				+} ZSTD_hufCTablesMetadata_t;
			
 
				+
			
 
				+/** ZSTD_fseCTablesMetadata_t :
			
 
				+ *  Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
			
 
				+ *  fse tables in fseTablesBuffer.
			
 
				+ *  fseTablesSize refers to the size of fse tables in bytes.
			
 
				+ *  This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
			
 
				+typedef struct {
			
 
				+    symbolEncodingType_e llType;
			
 
				+    symbolEncodingType_e ofType;
			
 
				+    symbolEncodingType_e mlType;
			
 
				+    BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
			
 
				+    size_t fseTablesSize;
			
 
				+    size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
			
 
				+} ZSTD_fseCTablesMetadata_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+    ZSTD_hufCTablesMetadata_t hufMetadata;
			
 
				+    ZSTD_fseCTablesMetadata_t fseMetadata;
			
 
				+} ZSTD_entropyCTablesMetadata_t;
			
 
				+
			
 
				+/** ZSTD_buildBlockEntropyStats() :
			
 
				+ *  Builds entropy for the block.
			
 
				+ *  @return : 0 on success or error code */
			
 
				+size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
			
 
				+                             const ZSTD_entropyCTables_t* prevEntropy,
			
 
				+                                   ZSTD_entropyCTables_t* nextEntropy,
			
 
				+                             const ZSTD_CCtx_params* cctxParams,
			
 
				+                                   ZSTD_entropyCTablesMetadata_t* entropyMetadata,
			
 
				+                                   void* workspace, size_t wkspSize);
			
 
				+
			
 
				+/*********************************
			
 
				+*  Compression internals structs *
			
 
				+*********************************/
			
 
				+
			
 
				+typedef struct {
			
 
				+    U32 off;            /* Offset code (offset + ZSTD_REP_MOVE) for the match */
			
 
				+    U32 len;            /* Raw length of match */
			
 
				 } ZSTD_match_t;
			
 
				 
			
 
				+typedef struct {
			
 
				+    U32 offset;         /* Offset of sequence */
			
 
				+    U32 litLength;      /* Length of literals prior to match */
			
 
				+    U32 matchLength;    /* Raw length of match */
			
 
				+} rawSeq;
			
 
				+
			
 
				+typedef struct {
			
 
				+  rawSeq* seq;          /* The start of the sequences */
			
 
				+  size_t pos;           /* The index in seq where reading stopped. pos <= size. */
			
 
				+  size_t posInSequence; /* The position within the sequence at seq[pos] where reading
			
 
				+                           stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */
			
 
				+  size_t size;          /* The number of sequences. <= capacity. */
			
 
				+  size_t capacity;      /* The capacity starting from `seq` pointer */
			
 
				+} rawSeqStore_t;
			
 
				+
			
 
				+UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
			
 
				+
			
 
				 typedef struct {
			
 
				     int price;
			
 
				     U32 off;
			
@@ -125,14 +188,21 @@ typedef struct {
 
				 } ZSTD_compressedBlockState_t;
			
 
				 
			
 
				 typedef struct {
			
 
				-    BYTE const* nextSrc;    /* next block here to continue on current prefix */
			
 
				-    BYTE const* base;       /* All regular indexes relative to this position */
			
 
				-    BYTE const* dictBase;   /* extDict indexes relative to this position */
			
 
				-    U32 dictLimit;          /* below that point, need extDict */
			
 
				-    U32 lowLimit;           /* below that point, no more valid data */
			
 
				+    BYTE const* nextSrc;       /* next block here to continue on current prefix */
			
 
				+    BYTE const* base;          /* All regular indexes relative to this position */
			
 
				+    BYTE const* dictBase;      /* extDict indexes relative to this position */
			
 
				+    U32 dictLimit;             /* below that point, need extDict */
			
 
				+    U32 lowLimit;              /* below that point, no more valid data */
			
 
				+    U32 nbOverflowCorrections; /* Number of times overflow correction has run since
			
 
				+                                * ZSTD_window_init(). Useful for debugging coredumps
			
 
				+                                * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY.
			
 
				+                                */
			
 
				 } ZSTD_window_t;
			
 
				 
			
 
				 typedef struct ZSTD_matchState_t ZSTD_matchState_t;
			
 
				+
			
 
				+#define ZSTD_ROW_HASH_CACHE_SIZE 8       /* Size of prefetching hash cache for row-based matchfinder */
			
 
				+
			
 
				 struct ZSTD_matchState_t {
			
 
				     ZSTD_window_t window;   /* State for window round buffer management */
			
 
				     U32 loadedDictEnd;      /* index of end of dictionary, within context's referential.
			
@@ -144,12 +214,24 @@ struct ZSTD_matchState_t {
 
				                              */
			
 
				     U32 nextToUpdate;       /* index from which to continue table update */
			
 
				     U32 hashLog3;           /* dispatch table for matches of len==3 : larger == faster, more memory */
			
 
				+
			
 
				+    U32 rowHashLog;                          /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
			
 
				+    U16* tagTable;                           /* For row-based matchFinder: A row-based table containing the hashes and head index. */
			
 
				+    U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
			
 
				+
			
 
				     U32* hashTable;
			
 
				     U32* hashTable3;
			
 
				     U32* chainTable;
			
 
				+
			
 
				+    U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */
			
 
				+
			
 
				+    int dedicatedDictSearch;  /* Indicates whether this matchState is using the
			
 
				+                               * dedicated dictionary search structure.
			
 
				+                               */
			
 
				     optState_t opt;         /* optimal parser state */
			
 
				     const ZSTD_matchState_t* dictMatchState;
			
 
				     ZSTD_compressionParameters cParams;
			
 
				+    const rawSeqStore_t* ldmSeqStore;
			
 
				 };
			
 
				 
			
 
				 typedef struct {
			
@@ -163,13 +245,22 @@ typedef struct {
 
				     U32 checksum;
			
 
				 } ldmEntry_t;
			
 
				 
			
 
				+typedef struct {
			
 
				+    BYTE const* split;
			
 
				+    U32 hash;
			
 
				+    U32 checksum;
			
 
				+    ldmEntry_t* bucket;
			
 
				+} ldmMatchCandidate_t;
			
 
				+
			
 
				+#define LDM_BATCH_SIZE 64
			
 
				+
			
 
				 typedef struct {
			
 
				     ZSTD_window_t window;   /* State for the window round buffer management */
			
 
				     ldmEntry_t* hashTable;
			
 
				     U32 loadedDictEnd;
			
 
				     BYTE* bucketOffsets;    /* Next position in bucket to insert entry */
			
 
				-    U64 hashPower;          /* Used to compute the rolling hash.
			
 
				-                             * Depends on ldmParams.minMatchLength */
			
 
				+    size_t splitIndices[LDM_BATCH_SIZE];
			
 
				+    ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE];
			
 
				 } ldmState_t;
			
 
				 
			
 
				 typedef struct {
			
@@ -181,19 +272,6 @@ typedef struct {
 
				     U32 windowLog;          /* Window log for the LDM */
			
 
				 } ldmParams_t;
			
 
				 
			
 
				-typedef struct {
			
 
				-    U32 offset;
			
 
				-    U32 litLength;
			
 
				-    U32 matchLength;
			
 
				-} rawSeq;
			
 
				-
			
 
				-typedef struct {
			
 
				-  rawSeq* seq;     /* The start of the sequences */
			
 
				-  size_t pos;      /* The position where reading stopped. <= size. */
			
 
				-  size_t size;     /* The number of sequences. <= capacity. */
			
 
				-  size_t capacity; /* The capacity starting from `seq` pointer */
			
 
				-} rawSeqStore_t;
			
 
				-
			
 
				 typedef struct {
			
 
				     int collectSequences;
			
 
				     ZSTD_Sequence* seqStart;
			
@@ -228,17 +306,52 @@ struct ZSTD_CCtx_params_s {
 
				     /* Long distance matching parameters */
			
 
				     ldmParams_t ldmParams;
			
 
				 
			
 
				+    /* Dedicated dict search algorithm trigger */
			
 
				+    int enableDedicatedDictSearch;
			
 
				+
			
 
				+    /* Input/output buffer modes */
			
 
				+    ZSTD_bufferMode_e inBufferMode;
			
 
				+    ZSTD_bufferMode_e outBufferMode;
			
 
				+
			
 
				+    /* Sequence compression API */
			
 
				+    ZSTD_sequenceFormat_e blockDelimiters;
			
 
				+    int validateSequences;
			
 
				+
			
 
				+    /* Block splitting */
			
 
				+    int splitBlocks;
			
 
				+
			
 
				+    /* Param for deciding whether to use row-based matchfinder */
			
 
				+    ZSTD_useRowMatchFinderMode_e useRowMatchFinder;
			
 
				+
			
 
				+    /* Always load a dictionary in ext-dict mode (not prefix mode)? */
			
 
				+    int deterministicRefPrefix;
			
 
				+
			
 
				     /* Internal use, for createCCtxParams() and freeCCtxParams() only */
			
 
				     ZSTD_customMem customMem;
			
 
				 };  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
			
 
				 
			
 
				+#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
			
 
				+#define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
			
 
				+
			
 
				+/**
			
 
				+ * Indicates whether this compression proceeds directly from user-provided
			
 
				+ * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
			
 
				+ * whether the context needs to buffer the input/output (ZSTDb_buffered).
			
 
				+ */
			
 
				+typedef enum {
			
 
				+    ZSTDb_not_buffered,
			
 
				+    ZSTDb_buffered
			
 
				+} ZSTD_buffered_policy_e;
			
 
				+
			
 
				 struct ZSTD_CCtx_s {
			
 
				     ZSTD_compressionStage_e stage;
			
 
				     int cParamsChanged;                  /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
			
 
				     int bmi2;                            /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
			
 
				     ZSTD_CCtx_params requestedParams;
			
 
				     ZSTD_CCtx_params appliedParams;
			
 
				+    ZSTD_CCtx_params simpleApiParams;    /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */
			
 
				     U32   dictID;
			
 
				+    size_t dictContentSize;
			
 
				 
			
 
				     ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
			
 
				     size_t blockSize;
			
@@ -247,6 +360,7 @@ struct ZSTD_CCtx_s {
 
				     unsigned long long producedCSize;
			
 
				     XXH64_state_t xxhState;
			
 
				     ZSTD_customMem customMem;
			
 
				+    ZSTD_threadPool* pool;
			
 
				     size_t staticSize;
			
 
				     SeqCollector seqCollector;
			
 
				     int isFirstBlock;
			
@@ -258,7 +372,10 @@ struct ZSTD_CCtx_s {
 
				     size_t maxNbLdmSequences;
			
 
				     rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
			
 
				     ZSTD_blockState_t blockState;
			
 
				-    U32* entropyWorkspace;  /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
			
 
				+    U32* entropyWorkspace;  /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
			
 
				+
			
 
				+    /* Wether we are streaming or not */
			
 
				+    ZSTD_buffered_policy_e bufferedPolicy;
			
 
				 
			
 
				     /* streaming */
			
 
				     char*  inBuff;
			
@@ -273,6 +390,10 @@ struct ZSTD_CCtx_s {
 
				     ZSTD_cStreamStage streamStage;
			
 
				     U32    frameEnded;
			
 
				 
			
 
				+    /* Stable in/out buffer verification */
			
 
				+    ZSTD_inBuffer expectedInBuffer;
			
 
				+    size_t expectedOutBufferSize;
			
 
				+
			
 
				     /* Dictionary */
			
 
				     ZSTD_localDict localDict;
			
 
				     const ZSTD_CDict* cdict;
			
@@ -282,17 +403,46 @@ struct ZSTD_CCtx_s {
 
				 #ifdef ZSTD_MULTITHREAD
			
 
				     ZSTDMT_CCtx* mtctx;
			
 
				 #endif
			
 
				+
			
 
				+    /* Tracing */
			
 
				+#if ZSTD_TRACE
			
 
				+    ZSTD_TraceCtx traceCtx;
			
 
				+#endif
			
 
				 };
			
 
				 
			
 
				 typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
			
 
				 
			
 
				-typedef enum { ZSTD_noDict = 0, ZSTD_extDict = 1, ZSTD_dictMatchState = 2 } ZSTD_dictMode_e;
			
 
				-
			
 
				+typedef enum {
			
 
				+    ZSTD_noDict = 0,
			
 
				+    ZSTD_extDict = 1,
			
 
				+    ZSTD_dictMatchState = 2,
			
 
				+    ZSTD_dedicatedDictSearch = 3
			
 
				+} ZSTD_dictMode_e;
			
 
				+
			
 
				+typedef enum {
			
 
				+    ZSTD_cpm_noAttachDict = 0,  /* Compression with ZSTD_noDict or ZSTD_extDict.
			
 
				+                                 * In this mode we use both the srcSize and the dictSize
			
 
				+                                 * when selecting and adjusting parameters.
			
 
				+                                 */
			
 
				+    ZSTD_cpm_attachDict = 1,    /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch.
			
 
				+                                 * In this mode we only take the srcSize into account when selecting
			
 
				+                                 * and adjusting parameters.
			
 
				+                                 */
			
 
				+    ZSTD_cpm_createCDict = 2,   /* Creating a CDict.
			
 
				+                                 * In this mode we take both the source size and the dictionary size
			
 
				+                                 * into account when selecting and adjusting the parameters.
			
 
				+                                 */
			
 
				+    ZSTD_cpm_unknown = 3,       /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
			
 
				+                                 * We don't know what these parameters are for. We default to the legacy
			
 
				+                                 * behavior of taking both the source size and the dict size into account
			
 
				+                                 * when selecting and adjusting parameters.
			
 
				+                                 */
			
 
				+} ZSTD_cParamMode_e;
			
 
				 
			
 
				 typedef size_t (*ZSTD_blockCompressor) (
			
 
				         ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
			
 
				         void const* src, size_t srcSize);
			
 
				-ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode);
			
 
				+ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_useRowMatchFinderMode_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
			
 
				 
			
 
				 
			
 
				 MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
			
@@ -345,7 +495,7 @@ MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 con
 
				             newReps.rep[1] = rep[0];
			
 
				             newReps.rep[0] = currentOffset;
			
 
				         } else {   /* repCode == 0 */
			
 
				-            memcpy(&newReps, rep, sizeof(newReps));
			
 
				+            ZSTD_memcpy(&newReps, rep, sizeof(newReps));
			
 
				         }
			
 
				     }
			
 
				     return newReps;
			
@@ -372,7 +522,7 @@ MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const voi
 
				     RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
			
 
				                     dstSize_tooSmall, "dst buf too small for uncompressed block");
			
 
				     MEM_writeLE24(dst, cBlockHeader24);
			
 
				-    memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
			
 
				+    ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
			
 
				     return ZSTD_blockHeaderSize + srcSize;
			
 
				 }
			
 
				 
			
@@ -469,8 +619,8 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera
 
				 
			
 
				     /* literal Length */
			
 
				     if (litLength>0xFFFF) {
			
 
				-        assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
			
 
				-        seqStorePtr->longLengthID = 1;
			
 
				+        assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
			
 
				+        seqStorePtr->longLengthType = ZSTD_llt_literalLength;
			
 
				         seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
			
 
				     }
			
 
				     seqStorePtr->sequences[0].litLength = (U16)litLength;
			
@@ -480,8 +630,8 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera
 
				 
			
 
				     /* match Length */
			
 
				     if (mlBase>0xFFFF) {
			
 
				-        assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
			
 
				-        seqStorePtr->longLengthID = 2;
			
 
				+        assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
			
 
				+        seqStorePtr->longLengthType = ZSTD_llt_matchLength;
			
 
				         seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
			
 
				     }
			
 
				     seqStorePtr->sequences[0].matchLength = (U16)mlBase;
			
@@ -498,8 +648,12 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
 
				     if (MEM_isLittleEndian()) {
			
 
				         if (MEM_64bits()) {
			
 
				 #       if defined(_MSC_VER) && defined(_WIN64)
			
 
				-            unsigned long r = 0;
			
 
				-            return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0;
			
 
				+#           if STATIC_BMI2
			
 
				+                return _tzcnt_u64(val) >> 3;
			
 
				+#           else
			
 
				+                unsigned long r = 0;
			
 
				+                return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0;
			
 
				+#           endif
			
 
				 #       elif defined(__GNUC__) && (__GNUC__ >= 4)
			
 
				             return (__builtin_ctzll((U64)val) >> 3);
			
 
				 #       else
			
@@ -530,8 +684,12 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
 
				     } else {  /* Big Endian CPU */
			
 
				         if (MEM_64bits()) {
			
 
				 #       if defined(_MSC_VER) && defined(_WIN64)
			
 
				-            unsigned long r = 0;
			
 
				-            return _BitScanReverse64( &r, val ) ? (unsigned)(r >> 3) : 0;
			
 
				+#           if STATIC_BMI2
			
 
				+			    return _lzcnt_u64(val) >> 3;
			
 
				+#           else
			
 
				+			    unsigned long r = 0;
			
 
				+			    return _BitScanReverse64(&r, (U64)val) ? (unsigned)(r >> 3) : 0;
			
 
				+#           endif
			
 
				 #       elif defined(__GNUC__) && (__GNUC__ >= 4)
			
 
				             return (__builtin_clzll(val) >> 3);
			
 
				 #       else
			
@@ -626,7 +784,8 @@ static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
 
				 static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
			
 
				 static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
			
 
				 
			
 
				-MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
			
 
				+MEM_STATIC FORCE_INLINE_ATTR
			
 
				+size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
			
 
				 {
			
 
				     switch(mls)
			
 
				     {
			
@@ -723,6 +882,13 @@ MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
 
				     window->dictLimit = end;
			
 
				 }
			
 
				 
			
 
				+MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
			
 
				+{
			
 
				+    return window.dictLimit == 1 &&
			
 
				+           window.lowLimit == 1 &&
			
 
				+           (window.nextSrc - window.base) == 1;
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * ZSTD_window_hasExtDict():
			
 
				  * Returns non-zero if the window has a non-empty extDict.
			
@@ -742,20 +908,74 @@ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
 
				     return ZSTD_window_hasExtDict(ms->window) ?
			
 
				         ZSTD_extDict :
			
 
				         ms->dictMatchState != NULL ?
			
 
				-            ZSTD_dictMatchState :
			
 
				+            (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) :
			
 
				             ZSTD_noDict;
			
 
				 }
			
 
				 
			
 
				+/* Defining this macro to non-zero tells zstd to run the overflow correction
			
 
				+ * code much more frequently. This is very inefficient, and should only be
			
 
				+ * used for tests and fuzzers.
			
 
				+ */
			
 
				+#ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
			
 
				+#  ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
			
 
				+#    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1
			
 
				+#  else
			
 
				+#    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0
			
 
				+#  endif
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+ * ZSTD_window_canOverflowCorrect():
			
 
				+ * Returns non-zero if the indices are large enough for overflow correction
			
 
				+ * to work correctly without impacting compression ratio.
			
 
				+ */
			
 
				+MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
			
 
				+                                              U32 cycleLog,
			
 
				+                                              U32 maxDist,
			
 
				+                                              U32 loadedDictEnd,
			
 
				+                                              void const* src)
			
 
				+{
			
 
				+    U32 const cycleSize = 1u << cycleLog;
			
 
				+    U32 const curr = (U32)((BYTE const*)src - window.base);
			
 
				+    U32 const minIndexToOverflowCorrect = cycleSize + MAX(maxDist, cycleSize);
			
 
				+
			
 
				+    /* Adjust the min index to backoff the overflow correction frequency,
			
 
				+     * so we don't waste too much CPU in overflow correction. If this
			
 
				+     * computation overflows we don't really care, we just need to make
			
 
				+     * sure it is at least minIndexToOverflowCorrect.
			
 
				+     */
			
 
				+    U32 const adjustment = window.nbOverflowCorrections + 1;
			
 
				+    U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment,
			
 
				+                                  minIndexToOverflowCorrect);
			
 
				+    U32 const indexLargeEnough = curr > adjustedIndex;
			
 
				+
			
 
				+    /* Only overflow correct early if the dictionary is invalidated already,
			
 
				+     * so we don't hurt compression ratio.
			
 
				+     */
			
 
				+    U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd;
			
 
				+
			
 
				+    return indexLargeEnough && dictionaryInvalidated;
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * ZSTD_window_needOverflowCorrection():
			
 
				  * Returns non-zero if the indices are getting too large and need overflow
			
 
				  * protection.
			
 
				  */
			
 
				 MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
			
 
				+                                                  U32 cycleLog,
			
 
				+                                                  U32 maxDist,
			
 
				+                                                  U32 loadedDictEnd,
			
 
				+                                                  void const* src,
			
 
				                                                   void const* srcEnd)
			
 
				 {
			
 
				-    U32 const current = (U32)((BYTE const*)srcEnd - window.base);
			
 
				-    return current > ZSTD_CURRENT_MAX;
			
 
				+    U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
			
 
				+    if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
			
 
				+        if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) {
			
 
				+            return 1;
			
 
				+        }
			
 
				+    }
			
 
				+    return curr > ZSTD_CURRENT_MAX;
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -766,7 +986,6 @@ MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
 
				  *
			
 
				  * The least significant cycleLog bits of the indices must remain the same,
			
 
				  * which may be 0. Every index up to maxDist in the past must be valid.
			
 
				- * NOTE: (maxDist & cycleMask) must be zero.
			
 
				  */
			
 
				 MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
			
 
				                                            U32 maxDist, void const* src)
			
@@ -790,17 +1009,25 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
 
				      * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
			
 
				      *    windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
			
 
				      */
			
 
				-    U32 const cycleMask = (1U << cycleLog) - 1;
			
 
				-    U32 const current = (U32)((BYTE const*)src - window->base);
			
 
				-    U32 const currentCycle0 = current & cycleMask;
			
 
				+    U32 const cycleSize = 1u << cycleLog;
			
 
				+    U32 const cycleMask = cycleSize - 1;
			
 
				+    U32 const curr = (U32)((BYTE const*)src - window->base);
			
 
				+    U32 const currentCycle0 = curr & cycleMask;
			
 
				     /* Exclude zero so that newCurrent - maxDist >= 1. */
			
 
				-    U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0;
			
 
				-    U32 const newCurrent = currentCycle1 + maxDist;
			
 
				-    U32 const correction = current - newCurrent;
			
 
				-    assert((maxDist & cycleMask) == 0);
			
 
				-    assert(current > newCurrent);
			
 
				-    /* Loose bound, should be around 1<<29 (see above) */
			
 
				-    assert(correction > 1<<28);
			
 
				+    U32 const currentCycle1 = currentCycle0 == 0 ? cycleSize : currentCycle0;
			
 
				+    U32 const newCurrent = currentCycle1 + MAX(maxDist, cycleSize);
			
 
				+    U32 const correction = curr - newCurrent;
			
 
				+    /* maxDist must be a power of two so that:
			
 
				+     *   (newCurrent & cycleMask) == (curr & cycleMask)
			
 
				+     * This is required to not corrupt the chains / binary tree.
			
 
				+     */
			
 
				+    assert((maxDist & (maxDist - 1)) == 0);
			
 
				+    assert((curr & cycleMask) == (newCurrent & cycleMask));
			
 
				+    assert(curr > newCurrent);
			
 
				+    if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
			
 
				+        /* Loose bound, should be around 1<<29 (see above) */
			
 
				+        assert(correction > 1<<28);
			
 
				+    }
			
 
				 
			
 
				     window->base += correction;
			
 
				     window->dictBase += correction;
			
@@ -816,6 +1043,8 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
 
				     assert(window->lowLimit <= newCurrent);
			
 
				     assert(window->dictLimit <= newCurrent);
			
 
				 
			
 
				+    ++window->nbOverflowCorrections;
			
 
				+
			
 
				     DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
			
 
				              window->lowLimit);
			
 
				     return correction;
			
@@ -919,12 +1148,13 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window,
 
				 }
			
 
				 
			
 
				 MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
			
 
				-    memset(window, 0, sizeof(*window));
			
 
				+    ZSTD_memset(window, 0, sizeof(*window));
			
 
				     window->base = (BYTE const*)"";
			
 
				     window->dictBase = (BYTE const*)"";
			
 
				     window->dictLimit = 1;    /* start from 1, so that 1st position is valid */
			
 
				     window->lowLimit = 1;     /* it ensures first and later CCtx usages compress the same */
			
 
				     window->nextSrc = window->base + 1;   /* see issue #1241 */
			
 
				+    window->nbOverflowCorrections = 0;
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -935,7 +1165,8 @@ MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
 
				  * Returns non-zero if the segment is contiguous.
			
 
				  */
			
 
				 MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
			
 
				-                                  void const* src, size_t srcSize)
			
 
				+                                  void const* src, size_t srcSize,
			
 
				+                                  int forceNonContiguous)
			
 
				 {
			
 
				     BYTE const* const ip = (BYTE const*)src;
			
 
				     U32 contiguous = 1;
			
@@ -945,7 +1176,7 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
 
				     assert(window->base != NULL);
			
 
				     assert(window->dictBase != NULL);
			
 
				     /* Check if blocks follow each other */
			
 
				-    if (src != window->nextSrc) {
			
 
				+    if (src != window->nextSrc || forceNonContiguous) {
			
 
				         /* not contiguous */
			
 
				         size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
			
 
				         DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
			
@@ -973,12 +1204,16 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
 
				 /**
			
 
				  * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
			
 
				  */
			
 
				-MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog)
			
 
				+MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
			
 
				 {
			
 
				     U32    const maxDistance = 1U << windowLog;
			
 
				     U32    const lowestValid = ms->window.lowLimit;
			
 
				-    U32    const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
			
 
				+    U32    const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
			
 
				     U32    const isDictionary = (ms->loadedDictEnd != 0);
			
 
				+    /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
			
 
				+     * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
			
 
				+     * valid for the entire block. So this check is sufficient to find the lowest valid match index.
			
 
				+     */
			
 
				     U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
			
 
				     return matchLowest;
			
 
				 }
			
@@ -986,12 +1221,15 @@ MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current
 
				 /**
			
 
				  * Returns the lowest allowed match index in the prefix.
			
 
				  */
			
 
				-MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog)
			
 
				+MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
			
 
				 {
			
 
				     U32    const maxDistance = 1U << windowLog;
			
 
				     U32    const lowestValid = ms->window.dictLimit;
			
 
				-    U32    const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
			
 
				+    U32    const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
			
 
				     U32    const isDictionary = (ms->loadedDictEnd != 0);
			
 
				+    /* When computing the lowest prefix index we need to take the dictionary into account to handle
			
 
				+     * the edge case where the dictionary and the source are contiguous in memory.
			
 
				+     */
			
 
				     U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
			
 
				     return matchLowest;
			
 
				 }
			
@@ -1045,7 +1283,6 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
 
				  * assumptions : magic number supposed already checked
			
 
				  *               and dictSize >= 8 */
			
 
				 size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
			
 
				-                         short* offcodeNCount, unsigned* offcodeMaxValue,
			
 
				                          const void* const dict, size_t dictSize);
			
 
				 
			
 
				 void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
			
@@ -1061,7 +1298,7 @@ void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
 
				  * Note: srcSizeHint == 0 means 0!
			
 
				  */
			
 
				 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
			
 
				-        const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize);
			
 
				+        const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
			
 
				 
			
 
				 /*! ZSTD_initCStream_internal() :
			
 
				  *  Private use only. Init streaming operation.
			
@@ -1122,4 +1359,9 @@ size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSe
 
				  *  condition for correct operation : hashLog > 1 */
			
 
				 U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
			
 
				 
			
 
				+/** ZSTD_CCtx_trace() :
			
 
				+ *  Trace the end of a compression call.
			
 
				+ */
			
 
				+void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
			
 
				+
			
 
				 #endif /* ZSTD_COMPRESS_H */
			
--- a/Utilities/cmzstd/lib/compress/zstd_compress_literals.c
+++ b/Utilities/cmzstd/lib/compress/zstd_compress_literals.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -15,7 +15,7 @@
 
				 
			
 
				 size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
			
 
				 {
			
 
				-    BYTE* const ostart = (BYTE* const)dst;
			
 
				+    BYTE* const ostart = (BYTE*)dst;
			
 
				     U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
			
 
				 
			
 
				     RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
			
@@ -35,14 +35,14 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src,
 
				             assert(0);
			
 
				     }
			
 
				 
			
 
				-    memcpy(ostart + flSize, src, srcSize);
			
 
				+    ZSTD_memcpy(ostart + flSize, src, srcSize);
			
 
				     DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
			
 
				     return srcSize + flSize;
			
 
				 }
			
 
				 
			
 
				 size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
			
 
				 {
			
 
				-    BYTE* const ostart = (BYTE* const)dst;
			
 
				+    BYTE* const ostart = (BYTE*)dst;
			
 
				     U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
			
 
				 
			
 
				     (void)dstCapacity;  /* dstCapacity already guaranteed to be >=4, hence large enough */
			
@@ -86,7 +86,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
 
				                 disableLiteralCompression, (U32)srcSize);
			
 
				 
			
 
				     /* Prepare nextEntropy assuming reusing the existing table */
			
 
				-    memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
			
 
				+    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
			
 
				 
			
 
				     if (disableLiteralCompression)
			
 
				         return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
			
@@ -117,12 +117,12 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
 
				         }
			
 
				     }
			
 
				 
			
 
				-    if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
			
 
				-        memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
			
 
				+    if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
			
 
				+        ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
			
 
				         return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
			
 
				     }
			
 
				     if (cLitSize==1) {
			
 
				-        memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
			
 
				+        ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
			
 
				         return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
			
 
				     }
			
 
				 
			
--- a/Utilities/cmzstd/lib/compress/zstd_compress_literals.h
+++ b/Utilities/cmzstd/lib/compress/zstd_compress_literals.h
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
--- a/Utilities/cmzstd/lib/compress/zstd_compress_sequences.c
+++ b/Utilities/cmzstd/lib/compress/zstd_compress_sequences.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -50,6 +50,19 @@ static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
 
				   return maxSymbolValue;
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * Returns true if we should use ncount=-1 else we should
			
 
				+ * use ncount=1 for low probability symbols instead.
			
 
				+ */
			
 
				+static unsigned ZSTD_useLowProbCount(size_t const nbSeq)
			
 
				+{
			
 
				+    /* Heuristic: This should cover most blocks <= 16K and
			
 
				+     * start to fade out after 16K to about 32K depending on
			
 
				+     * comprssibility.
			
 
				+     */
			
 
				+    return nbSeq >= 2048;
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * Returns the cost in bytes of encoding the normalized count header.
			
 
				  * Returns an error if any of the helper functions return an error.
			
@@ -60,7 +73,7 @@ static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
 
				     BYTE wksp[FSE_NCOUNTBOUND];
			
 
				     S16 norm[MaxSeq + 1];
			
 
				     const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
			
 
				-    FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max), "");
			
 
				+    FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max, ZSTD_useLowProbCount(nbSeq)), "");
			
 
				     return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
			
 
				 }
			
 
				 
			
@@ -72,6 +85,8 @@ static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t
 
				 {
			
 
				     unsigned cost = 0;
			
 
				     unsigned s;
			
 
				+
			
 
				+    assert(total > 0);
			
 
				     for (s = 0; s <= max; ++s) {
			
 
				         unsigned norm = (unsigned)((256 * count[s]) / total);
			
 
				         if (count[s] != 0 && norm == 0)
			
@@ -219,6 +234,11 @@ ZSTD_selectEncodingType(
 
				     return set_compressed;
			
 
				 }
			
 
				 
			
 
				+typedef struct {
			
 
				+    S16 norm[MaxSeq + 1];
			
 
				+    U32 wksp[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(MaxSeq, MaxFSELog)];
			
 
				+} ZSTD_BuildCTableWksp;
			
 
				+
			
 
				 size_t
			
 
				 ZSTD_buildCTable(void* dst, size_t dstCapacity,
			
 
				                 FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
			
@@ -239,13 +259,13 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
 
				         *op = codeTable[0];
			
 
				         return 1;
			
 
				     case set_repeat:
			
 
				-        memcpy(nextCTable, prevCTable, prevCTableSize);
			
 
				+        ZSTD_memcpy(nextCTable, prevCTable, prevCTableSize);
			
 
				         return 0;
			
 
				     case set_basic:
			
 
				         FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), "");  /* note : could be pre-calculated */
			
 
				         return 0;
			
 
				     case set_compressed: {
			
 
				-        S16 norm[MaxSeq + 1];
			
 
				+        ZSTD_BuildCTableWksp* wksp = (ZSTD_BuildCTableWksp*)entropyWorkspace;
			
 
				         size_t nbSeq_1 = nbSeq;
			
 
				         const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
			
 
				         if (count[codeTable[nbSeq-1]] > 1) {
			
@@ -253,10 +273,12 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
 
				             nbSeq_1--;
			
 
				         }
			
 
				         assert(nbSeq_1 > 1);
			
 
				-        FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max), "");
			
 
				-        {   size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog);   /* overflow protected */
			
 
				+        assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp));
			
 
				+        (void)entropyWorkspaceSize;
			
 
				+        FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "");
			
 
				+        {   size_t const NCountSize = FSE_writeNCount(op, oend - op, wksp->norm, max, tableLog);   /* overflow protected */
			
 
				             FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
			
 
				-            FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize), "");
			
 
				+            FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "");
			
 
				             return NCountSize;
			
 
				         }
			
 
				     }
			
--- a/Utilities/cmzstd/lib/compress/zstd_compress_sequences.h
+++ b/Utilities/cmzstd/lib/compress/zstd_compress_sequences.h
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
--- a/Utilities/cmzstd/lib/compress/zstd_compress_superblock.c
+++ b/Utilities/cmzstd/lib/compress/zstd_compress_superblock.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -15,288 +15,10 @@
 
				 
			
 
				 #include "../common/zstd_internal.h"  /* ZSTD_getSequenceLength */
			
 
				 #include "hist.h"                     /* HIST_countFast_wksp */
			
 
				-#include "zstd_compress_internal.h"
			
 
				+#include "zstd_compress_internal.h"   /* ZSTD_[huf|fse|entropy]CTablesMetadata_t */
			
 
				 #include "zstd_compress_sequences.h"
			
 
				 #include "zstd_compress_literals.h"
			
 
				 
			
 
				-/*-*************************************
			
 
				-*  Superblock entropy buffer structs
			
 
				-***************************************/
			
 
				-/** ZSTD_hufCTablesMetadata_t :
			
 
				- *  Stores Literals Block Type for a super-block in hType, and
			
 
				- *  huffman tree description in hufDesBuffer.
			
 
				- *  hufDesSize refers to the size of huffman tree description in bytes.
			
 
				- *  This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */
			
 
				-typedef struct {
			
 
				-    symbolEncodingType_e hType;
			
 
				-    BYTE hufDesBuffer[500]; /* TODO give name to this value */
			
 
				-    size_t hufDesSize;
			
 
				-} ZSTD_hufCTablesMetadata_t;
			
 
				-
			
 
				-/** ZSTD_fseCTablesMetadata_t :
			
 
				- *  Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
			
 
				- *  fse tables in fseTablesBuffer.
			
 
				- *  fseTablesSize refers to the size of fse tables in bytes.
			
 
				- *  This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */
			
 
				-typedef struct {
			
 
				-    symbolEncodingType_e llType;
			
 
				-    symbolEncodingType_e ofType;
			
 
				-    symbolEncodingType_e mlType;
			
 
				-    BYTE fseTablesBuffer[500]; /* TODO give name to this value */
			
 
				-    size_t fseTablesSize;
			
 
				-    size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */
			
 
				-} ZSTD_fseCTablesMetadata_t;
			
 
				-
			
 
				-typedef struct {
			
 
				-    ZSTD_hufCTablesMetadata_t hufMetadata;
			
 
				-    ZSTD_fseCTablesMetadata_t fseMetadata;
			
 
				-} ZSTD_entropyCTablesMetadata_t;
			
 
				-
			
 
				-
			
 
				-/** ZSTD_buildSuperBlockEntropy_literal() :
			
 
				- *  Builds entropy for the super-block literals.
			
 
				- *  Stores literals block type (raw, rle, compressed, repeat) and
			
 
				- *  huffman description table to hufMetadata.
			
 
				- *  @return : size of huffman description table or error code */
			
 
				-static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize,
			
 
				-                                            const ZSTD_hufCTables_t* prevHuf,
			
 
				-                                                  ZSTD_hufCTables_t* nextHuf,
			
 
				-                                                  ZSTD_hufCTablesMetadata_t* hufMetadata,
			
 
				-                                                  const int disableLiteralsCompression,
			
 
				-                                                  void* workspace, size_t wkspSize)
			
 
				-{
			
 
				-    BYTE* const wkspStart = (BYTE*)workspace;
			
 
				-    BYTE* const wkspEnd = wkspStart + wkspSize;
			
 
				-    BYTE* const countWkspStart = wkspStart;
			
 
				-    unsigned* const countWksp = (unsigned*)workspace;
			
 
				-    const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
			
 
				-    BYTE* const nodeWksp = countWkspStart + countWkspSize;
			
 
				-    const size_t nodeWkspSize = wkspEnd-nodeWksp;
			
 
				-    unsigned maxSymbolValue = 255;
			
 
				-    unsigned huffLog = HUF_TABLELOG_DEFAULT;
			
 
				-    HUF_repeat repeat = prevHuf->repeatMode;
			
 
				-
			
 
				-    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize);
			
 
				-
			
 
				-    /* Prepare nextEntropy assuming reusing the existing table */
			
 
				-    memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
			
 
				-
			
 
				-    if (disableLiteralsCompression) {
			
 
				-        DEBUGLOG(5, "set_basic - disabled");
			
 
				-        hufMetadata->hType = set_basic;
			
 
				-        return 0;
			
 
				-    }
			
 
				-
			
 
				-    /* small ? don't even attempt compression (speed opt) */
			
 
				-#   define COMPRESS_LITERALS_SIZE_MIN 63
			
 
				-    {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
			
 
				-        if (srcSize <= minLitSize) {
			
 
				-            DEBUGLOG(5, "set_basic - too small");
			
 
				-            hufMetadata->hType = set_basic;
			
 
				-            return 0;
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    /* Scan input and build symbol stats */
			
 
				-    {   size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
			
 
				-        FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
			
 
				-        if (largest == srcSize) {
			
 
				-            DEBUGLOG(5, "set_rle");
			
 
				-            hufMetadata->hType = set_rle;
			
 
				-            return 0;
			
 
				-        }
			
 
				-        if (largest <= (srcSize >> 7)+4) {
			
 
				-            DEBUGLOG(5, "set_basic - no gain");
			
 
				-            hufMetadata->hType = set_basic;
			
 
				-            return 0;
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    /* Validate the previous Huffman table */
			
 
				-    if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
			
 
				-        repeat = HUF_repeat_none;
			
 
				-    }
			
 
				-
			
 
				-    /* Build Huffman Tree */
			
 
				-    memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
			
 
				-    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
			
 
				-    {   size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
			
 
				-                                                    maxSymbolValue, huffLog,
			
 
				-                                                    nodeWksp, nodeWkspSize);
			
 
				-        FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
			
 
				-        huffLog = (U32)maxBits;
			
 
				-        {   /* Build and write the CTable */
			
 
				-            size_t const newCSize = HUF_estimateCompressedSize(
			
 
				-                    (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
			
 
				-            size_t const hSize = HUF_writeCTable(
			
 
				-                    hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
			
 
				-                    (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog);
			
 
				-            /* Check against repeating the previous CTable */
			
 
				-            if (repeat != HUF_repeat_none) {
			
 
				-                size_t const oldCSize = HUF_estimateCompressedSize(
			
 
				-                        (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
			
 
				-                if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
			
 
				-                    DEBUGLOG(5, "set_repeat - smaller");
			
 
				-                    memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
			
 
				-                    hufMetadata->hType = set_repeat;
			
 
				-                    return 0;
			
 
				-                }
			
 
				-            }
			
 
				-            if (newCSize + hSize >= srcSize) {
			
 
				-                DEBUGLOG(5, "set_basic - no gains");
			
 
				-                memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
			
 
				-                hufMetadata->hType = set_basic;
			
 
				-                return 0;
			
 
				-            }
			
 
				-            DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
			
 
				-            hufMetadata->hType = set_compressed;
			
 
				-            nextHuf->repeatMode = HUF_repeat_check;
			
 
				-            return hSize;
			
 
				-        }
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-/** ZSTD_buildSuperBlockEntropy_sequences() :
			
 
				- *  Builds entropy for the super-block sequences.
			
 
				- *  Stores symbol compression modes and fse table to fseMetadata.
			
 
				- *  @return : size of fse tables or error code */
			
 
				-static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr,
			
 
				-                                              const ZSTD_fseCTables_t* prevEntropy,
			
 
				-                                                    ZSTD_fseCTables_t* nextEntropy,
			
 
				-                                              const ZSTD_CCtx_params* cctxParams,
			
 
				-                                                    ZSTD_fseCTablesMetadata_t* fseMetadata,
			
 
				-                                                    void* workspace, size_t wkspSize)
			
 
				-{
			
 
				-    BYTE* const wkspStart = (BYTE*)workspace;
			
 
				-    BYTE* const wkspEnd = wkspStart + wkspSize;
			
 
				-    BYTE* const countWkspStart = wkspStart;
			
 
				-    unsigned* const countWksp = (unsigned*)workspace;
			
 
				-    const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned);
			
 
				-    BYTE* const cTableWksp = countWkspStart + countWkspSize;
			
 
				-    const size_t cTableWkspSize = wkspEnd-cTableWksp;
			
 
				-    ZSTD_strategy const strategy = cctxParams->cParams.strategy;
			
 
				-    FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
			
 
				-    FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
			
 
				-    FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
			
 
				-    const BYTE* const ofCodeTable = seqStorePtr->ofCode;
			
 
				-    const BYTE* const llCodeTable = seqStorePtr->llCode;
			
 
				-    const BYTE* const mlCodeTable = seqStorePtr->mlCode;
			
 
				-    size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
			
 
				-    BYTE* const ostart = fseMetadata->fseTablesBuffer;
			
 
				-    BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
			
 
				-    BYTE* op = ostart;
			
 
				-
			
 
				-    assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE));
			
 
				-    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq);
			
 
				-    memset(workspace, 0, wkspSize);
			
 
				-
			
 
				-    fseMetadata->lastCountSize = 0;
			
 
				-    /* convert length/distances into codes */
			
 
				-    ZSTD_seqToCodes(seqStorePtr);
			
 
				-    /* build CTable for Literal Lengths */
			
 
				-    {   U32 LLtype;
			
 
				-        unsigned max = MaxLL;
			
 
				-        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize);  /* can't fail */
			
 
				-        DEBUGLOG(5, "Building LL table");
			
 
				-        nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
			
 
				-        LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
			
 
				-                                        countWksp, max, mostFrequent, nbSeq,
			
 
				-                                        LLFSELog, prevEntropy->litlengthCTable,
			
 
				-                                        LL_defaultNorm, LL_defaultNormLog,
			
 
				-                                        ZSTD_defaultAllowed, strategy);
			
 
				-        assert(set_basic < set_compressed && set_rle < set_compressed);
			
 
				-        assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
			
 
				-        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
			
 
				-                                                    countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
			
 
				-                                                    prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable),
			
 
				-                                                    cTableWksp, cTableWkspSize);
			
 
				-            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
			
 
				-            if (LLtype == set_compressed)
			
 
				-                fseMetadata->lastCountSize = countSize;
			
 
				-            op += countSize;
			
 
				-            fseMetadata->llType = (symbolEncodingType_e) LLtype;
			
 
				-    }   }
			
 
				-    /* build CTable for Offsets */
			
 
				-    {   U32 Offtype;
			
 
				-        unsigned max = MaxOff;
			
 
				-        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize);  /* can't fail */
			
 
				-        /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
			
 
				-        ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
			
 
				-        DEBUGLOG(5, "Building OF table");
			
 
				-        nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
			
 
				-        Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
			
 
				-                                        countWksp, max, mostFrequent, nbSeq,
			
 
				-                                        OffFSELog, prevEntropy->offcodeCTable,
			
 
				-                                        OF_defaultNorm, OF_defaultNormLog,
			
 
				-                                        defaultPolicy, strategy);
			
 
				-        assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
			
 
				-        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
			
 
				-                                                    countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
			
 
				-                                                    prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable),
			
 
				-                                                    cTableWksp, cTableWkspSize);
			
 
				-            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
			
 
				-            if (Offtype == set_compressed)
			
 
				-                fseMetadata->lastCountSize = countSize;
			
 
				-            op += countSize;
			
 
				-            fseMetadata->ofType = (symbolEncodingType_e) Offtype;
			
 
				-    }   }
			
 
				-    /* build CTable for MatchLengths */
			
 
				-    {   U32 MLtype;
			
 
				-        unsigned max = MaxML;
			
 
				-        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize);   /* can't fail */
			
 
				-        DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
			
 
				-        nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
			
 
				-        MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
			
 
				-                                        countWksp, max, mostFrequent, nbSeq,
			
 
				-                                        MLFSELog, prevEntropy->matchlengthCTable,
			
 
				-                                        ML_defaultNorm, ML_defaultNormLog,
			
 
				-                                        ZSTD_defaultAllowed, strategy);
			
 
				-        assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
			
 
				-        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
			
 
				-                                                    countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
			
 
				-                                                    prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable),
			
 
				-                                                    cTableWksp, cTableWkspSize);
			
 
				-            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
			
 
				-            if (MLtype == set_compressed)
			
 
				-                fseMetadata->lastCountSize = countSize;
			
 
				-            op += countSize;
			
 
				-            fseMetadata->mlType = (symbolEncodingType_e) MLtype;
			
 
				-    }   }
			
 
				-    assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer));
			
 
				-    return op-ostart;
			
 
				-}
			
 
				-
			
 
				-
			
 
				-/** ZSTD_buildSuperBlockEntropy() :
			
 
				- *  Builds entropy for the super-block.
			
 
				- *  @return : 0 on success or error code */
			
 
				-static size_t
			
 
				-ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
			
 
				-                      const ZSTD_entropyCTables_t* prevEntropy,
			
 
				-                            ZSTD_entropyCTables_t* nextEntropy,
			
 
				-                      const ZSTD_CCtx_params* cctxParams,
			
 
				-                            ZSTD_entropyCTablesMetadata_t* entropyMetadata,
			
 
				-                            void* workspace, size_t wkspSize)
			
 
				-{
			
 
				-    size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
			
 
				-    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy");
			
 
				-    entropyMetadata->hufMetadata.hufDesSize =
			
 
				-        ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize,
			
 
				-                                            &prevEntropy->huf, &nextEntropy->huf,
			
 
				-                                            &entropyMetadata->hufMetadata,
			
 
				-                                            ZSTD_disableLiteralsCompression(cctxParams),
			
 
				-                                            workspace, wkspSize);
			
 
				-    FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed");
			
 
				-    entropyMetadata->fseMetadata.fseTablesSize =
			
 
				-        ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr,
			
 
				-                                              &prevEntropy->fse, &nextEntropy->fse,
			
 
				-                                              cctxParams,
			
 
				-                                              &entropyMetadata->fseMetadata,
			
 
				-                                              workspace, wkspSize);
			
 
				-    FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed");
			
 
				-    return 0;
			
 
				-}
			
 
				-
			
 
				 /** ZSTD_compressSubBlock_literal() :
			
 
				  *  Compresses literals section for a sub-block.
			
 
				  *  When we have to write the Huffman table we will sometimes choose a header
			
@@ -304,7 +26,7 @@ ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
 
				  *  before we know the table size + compressed size, so we have a bound on the
			
 
				  *  table size. If we guessed incorrectly, we fall back to uncompressed literals.
			
 
				  *
			
 
				- *  We write the header when writeEntropy=1 and set entropyWrriten=1 when we succeeded
			
 
				+ *  We write the header when writeEntropy=1 and set entropyWritten=1 when we succeeded
			
 
				  *  in writing the header, otherwise it is set to 0.
			
 
				  *
			
 
				  *  hufMetadata->hType has literals block type info.
			
@@ -348,7 +70,7 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
 
				     assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat);
			
 
				 
			
 
				     if (writeEntropy && hufMetadata->hType == set_compressed) {
			
 
				-        memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize);
			
 
				+        ZSTD_memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize);
			
 
				         op += hufMetadata->hufDesSize;
			
 
				         cLitSize += hufMetadata->hufDesSize;
			
 
				         DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize);
			
@@ -474,7 +196,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
 
				         const U32 MLtype = fseMetadata->mlType;
			
 
				         DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize);
			
 
				         *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
			
 
				-        memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize);
			
 
				+        ZSTD_memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize);
			
 
				         op += fseMetadata->fseTablesSize;
			
 
				     } else {
			
 
				         const U32 repeat = set_repeat;
			
@@ -603,7 +325,7 @@ static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
 
				                         const BYTE* codeTable, unsigned maxCode,
			
 
				                         size_t nbSeq, const FSE_CTable* fseCTable,
			
 
				                         const U32* additionalBits,
			
 
				-                        short const* defaultNorm, U32 defaultNormLog,
			
 
				+                        short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
			
 
				                         void* workspace, size_t wkspSize)
			
 
				 {
			
 
				     unsigned* const countWksp = (unsigned*)workspace;
			
@@ -615,7 +337,11 @@ static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
 
				 
			
 
				     HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize);  /* can't fail */
			
 
				     if (type == set_basic) {
			
 
				-        cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max);
			
 
				+        /* We selected this encoding type, so it must be valid. */
			
 
				+        assert(max <= defaultMax);
			
 
				+        cSymbolTypeSizeEstimateInBits = max <= defaultMax
			
 
				+                ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max)
			
 
				+                : ERROR(GENERIC);
			
 
				     } else if (type == set_rle) {
			
 
				         cSymbolTypeSizeEstimateInBits = 0;
			
 
				     } else if (type == set_compressed || type == set_repeat) {
			
@@ -639,19 +365,20 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
 
				                                                   void* workspace, size_t wkspSize,
			
 
				                                                   int writeEntropy)
			
 
				 {
			
 
				-    size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
			
 
				+    size_t const sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
			
 
				     size_t cSeqSizeEstimate = 0;
			
 
				+    if (nbSeq == 0) return sequencesSectionHeaderSize;
			
 
				     cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
			
 
				                                          nbSeq, fseTables->offcodeCTable, NULL,
			
 
				-                                         OF_defaultNorm, OF_defaultNormLog,
			
 
				+                                         OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
			
 
				                                          workspace, wkspSize);
			
 
				     cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL,
			
 
				                                          nbSeq, fseTables->litlengthCTable, LL_bits,
			
 
				-                                         LL_defaultNorm, LL_defaultNormLog,
			
 
				+                                         LL_defaultNorm, LL_defaultNormLog, MaxLL,
			
 
				                                          workspace, wkspSize);
			
 
				     cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML,
			
 
				                                          nbSeq, fseTables->matchlengthCTable, ML_bits,
			
 
				-                                         ML_defaultNorm, ML_defaultNormLog,
			
 
				+                                         ML_defaultNorm, ML_defaultNormLog, MaxML,
			
 
				                                          workspace, wkspSize);
			
 
				     if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
			
 
				     return cSeqSizeEstimate + sequencesSectionHeaderSize;
			
@@ -790,7 +517,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
 
				     } while (!lastSequence);
			
 
				     if (writeLitEntropy) {
			
 
				         DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
			
 
				-        memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
			
 
				+        ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
			
 
				     }
			
 
				     if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
			
 
				         /* If we haven't written our entropy tables, then we've violated our contract and
			
@@ -809,11 +536,11 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
 
				         if (sp < send) {
			
 
				             seqDef const* seq;
			
 
				             repcodes_t rep;
			
 
				-            memcpy(&rep, prevCBlock->rep, sizeof(rep)); 
			
 
				+            ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
			
 
				             for (seq = sstart; seq < sp; ++seq) {
			
 
				                 rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
			
 
				             }
			
 
				-            memcpy(nextCBlock->rep, &rep, sizeof(rep));
			
 
				+            ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
			
 
				         }
			
 
				     }
			
 
				     DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
			
@@ -826,12 +553,12 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
 
				                                unsigned lastBlock) {
			
 
				     ZSTD_entropyCTablesMetadata_t entropyMetadata;
			
 
				 
			
 
				-    FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore,
			
 
				+    FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore,
			
 
				           &zc->blockState.prevCBlock->entropy,
			
 
				           &zc->blockState.nextCBlock->entropy,
			
 
				           &zc->appliedParams,
			
 
				           &entropyMetadata,
			
 
				-          zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
			
 
				+          zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
			
 
				 
			
 
				     return ZSTD_compressSubBlock_multi(&zc->seqStore,
			
 
				             zc->blockState.prevCBlock,
			
@@ -841,5 +568,5 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
 
				             dst, dstCapacity,
			
 
				             src, srcSize,
			
 
				             zc->bmi2, lastBlock,
			
 
				-            zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
			
 
				+            zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
			
 
				 }
			
--- a/Utilities/cmzstd/lib/compress/zstd_compress_superblock.h
+++ b/Utilities/cmzstd/lib/compress/zstd_compress_superblock.h
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
--- a/Utilities/cmzstd/lib/compress/zstd_cwksp.h
+++ b/Utilities/cmzstd/lib/compress/zstd_cwksp.h
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -35,6 +35,10 @@ extern "C" {
 
				 #define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128
			
 
				 #endif
			
 
				 
			
 
				+
			
 
				+/* Set our tables and aligneds to align by 64 bytes */
			
 
				+#define ZSTD_CWKSP_ALIGNMENT_BYTES 64
			
 
				+
			
 
				 /*-*************************************
			
 
				 *  Structures
			
 
				 ***************************************/
			
@@ -44,6 +48,16 @@ typedef enum {
 
				     ZSTD_cwksp_alloc_aligned
			
 
				 } ZSTD_cwksp_alloc_phase_e;
			
 
				 
			
 
				+/**
			
 
				+ * Used to describe whether the workspace is statically allocated (and will not
			
 
				+ * necessarily ever be freed), or if it's dynamically allocated and we can
			
 
				+ * expect a well-formed caller to free this.
			
 
				+ */
			
 
				+typedef enum {
			
 
				+    ZSTD_cwksp_dynamic_alloc,
			
 
				+    ZSTD_cwksp_static_alloc
			
 
				+} ZSTD_cwksp_static_alloc_e;
			
 
				+
			
 
				 /**
			
 
				  * Zstd fits all its internal datastructures into a single continuous buffer,
			
 
				  * so that it only needs to perform a single OS allocation (or so that a buffer
			
@@ -92,7 +106,7 @@ typedef enum {
 
				  *
			
 
				  * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict,
			
 
				  *   so that literally everything fits in a single buffer. Note: if present,
			
 
				- *   this must be the first object in the workspace, since ZSTD_free{CCtx,
			
 
				+ *   this must be the first object in the workspace, since ZSTD_customFree{CCtx,
			
 
				  *   CDict}() rely on a pointer comparison to see whether one or two frees are
			
 
				  *   required.
			
 
				  *
			
@@ -107,10 +121,11 @@ typedef enum {
 
				  * - Tables: these are any of several different datastructures (hash tables,
			
 
				  *   chain tables, binary trees) that all respect a common format: they are
			
 
				  *   uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
			
 
				- *   Their sizes depend on the cparams.
			
 
				+ *   Their sizes depend on the cparams. These tables are 64-byte aligned.
			
 
				  *
			
 
				  * - Aligned: these buffers are used for various purposes that require 4 byte
			
 
				- *   alignment, but don't require any initialization before they're used.
			
 
				+ *   alignment, but don't require any initialization before they're used. These
			
 
				+ *   buffers are each aligned to 64 bytes.
			
 
				  *
			
 
				  * - Buffers: these buffers are used for various purposes that don't require
			
 
				  *   any alignment or initialization before they're used. This means they can
			
@@ -123,8 +138,7 @@ typedef enum {
 
				  *
			
 
				  * 1. Objects
			
 
				  * 2. Buffers
			
 
				- * 3. Aligned
			
 
				- * 4. Tables
			
 
				+ * 3. Aligned/Tables
			
 
				  *
			
 
				  * Attempts to reserve objects of different types out of order will fail.
			
 
				  */
			
@@ -137,9 +151,10 @@ typedef struct {
 
				     void* tableValidEnd;
			
 
				     void* allocStart;
			
 
				 
			
 
				-    int allocFailed;
			
 
				+    BYTE allocFailed;
			
 
				     int workspaceOversizedDuration;
			
 
				     ZSTD_cwksp_alloc_phase_e phase;
			
 
				+    ZSTD_cwksp_static_alloc_e isStatic;
			
 
				 } ZSTD_cwksp;
			
 
				 
			
 
				 /*-*************************************
			
@@ -176,39 +191,123 @@ MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
 
				  * Since tables aren't currently redzoned, you don't need to call through this
			
 
				  * to figure out how much space you need for the matchState tables. Everything
			
 
				  * else is though.
			
 
				+ *
			
 
				+ * Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned_alloc_size().
			
 
				  */
			
 
				 MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
			
 
				-#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
			
 
				+    if (size == 0)
			
 
				+        return 0;
			
 
				+#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
			
 
				     return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
			
 
				 #else
			
 
				     return size;
			
 
				 #endif
			
 
				 }
			
 
				 
			
 
				-MEM_STATIC void ZSTD_cwksp_internal_advance_phase(
			
 
				+/**
			
 
				+ * Returns an adjusted alloc size that is the nearest larger multiple of 64 bytes.
			
 
				+ * Used to determine the number of bytes required for a given "aligned".
			
 
				+ */
			
 
				+MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) {
			
 
				+    return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, ZSTD_CWKSP_ALIGNMENT_BYTES));
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Returns the amount of additional space the cwksp must allocate
			
 
				+ * for internal purposes (currently only alignment).
			
 
				+ */
			
 
				+MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) {
			
 
				+    /* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes
			
 
				+     * to align the beginning of tables section, as well as another n_2=[0, 63] bytes
			
 
				+     * to align the beginning of the aligned secion.
			
 
				+     *
			
 
				+     * n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and
			
 
				+     * aligneds being sized in multiples of 64 bytes.
			
 
				+     */
			
 
				+    size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES;
			
 
				+    return slackSpace;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/**
			
 
				+ * Return the number of additional bytes required to align a pointer to the given number of bytes.
			
 
				+ * alignBytes must be a power of two.
			
 
				+ */
			
 
				+MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignBytes) {
			
 
				+    size_t const alignBytesMask = alignBytes - 1;
			
 
				+    size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask;
			
 
				+    assert((alignBytes & alignBytesMask) == 0);
			
 
				+    assert(bytes != ZSTD_CWKSP_ALIGNMENT_BYTES);
			
 
				+    return bytes;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Internal function. Do not use directly.
			
 
				+ * Reserves the given number of bytes within the aligned/buffer segment of the wksp, which
			
 
				+ * counts from the end of the wksp. (as opposed to the object/table segment)
			
 
				+ *
			
 
				+ * Returns a pointer to the beginning of that space.
			
 
				+ */
			
 
				+MEM_STATIC void* ZSTD_cwksp_reserve_internal_buffer_space(ZSTD_cwksp* ws, size_t const bytes) {
			
 
				+    void* const alloc = (BYTE*)ws->allocStart - bytes;
			
 
				+    void* const bottom = ws->tableEnd;
			
 
				+    DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
			
 
				+        alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
			
 
				+    ZSTD_cwksp_assert_internal_consistency(ws);
			
 
				+    assert(alloc >= bottom);
			
 
				+    if (alloc < bottom) {
			
 
				+        DEBUGLOG(4, "cwksp: alloc failed!");
			
 
				+        ws->allocFailed = 1;
			
 
				+        return NULL;
			
 
				+    }
			
 
				+    if (alloc < ws->tableValidEnd) {
			
 
				+        ws->tableValidEnd = alloc;
			
 
				+    }
			
 
				+    ws->allocStart = alloc;
			
 
				+    return alloc;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Moves the cwksp to the next phase, and does any necessary allocations.
			
 
				+ * Returns a 0 on success, or zstd error
			
 
				+ */
			
 
				+MEM_STATIC size_t ZSTD_cwksp_internal_advance_phase(
			
 
				         ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) {
			
 
				     assert(phase >= ws->phase);
			
 
				     if (phase > ws->phase) {
			
 
				+        /* Going from allocating objects to allocating buffers */
			
 
				         if (ws->phase < ZSTD_cwksp_alloc_buffers &&
			
 
				                 phase >= ZSTD_cwksp_alloc_buffers) {
			
 
				             ws->tableValidEnd = ws->objectEnd;
			
 
				         }
			
 
				+
			
 
				+        /* Going from allocating buffers to allocating aligneds/tables */
			
 
				         if (ws->phase < ZSTD_cwksp_alloc_aligned &&
			
 
				                 phase >= ZSTD_cwksp_alloc_aligned) {
			
 
				-            /* If unaligned allocations down from a too-large top have left us
			
 
				-             * unaligned, we need to realign our alloc ptr. Technically, this
			
 
				-             * can consume space that is unaccounted for in the neededSpace
			
 
				-             * calculation. However, I believe this can only happen when the
			
 
				-             * workspace is too large, and specifically when it is too large
			
 
				-             * by a larger margin than the space that will be consumed. */
			
 
				-            /* TODO: cleaner, compiler warning friendly way to do this??? */
			
 
				-            ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1));
			
 
				-            if (ws->allocStart < ws->tableValidEnd) {
			
 
				-                ws->tableValidEnd = ws->allocStart;
			
 
				+            {   /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */
			
 
				+                size_t const bytesToAlign =
			
 
				+                    ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES);
			
 
				+                DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign);
			
 
				+                ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */
			
 
				+                RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign),
			
 
				+                                memory_allocation, "aligned phase - alignment initial allocation failed!");
			
 
				+            }
			
 
				+            {   /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */
			
 
				+                void* const alloc = ws->objectEnd;
			
 
				+                size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES);
			
 
				+                void* const end = (BYTE*)alloc + bytesToAlign;
			
 
				+                DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign);
			
 
				+                RETURN_ERROR_IF(end > ws->workspaceEnd, memory_allocation,
			
 
				+                                "table phase - alignment initial allocation failed!");
			
 
				+                ws->objectEnd = end;
			
 
				+                ws->tableEnd = end;
			
 
				+                ws->tableValidEnd = end;
			
 
				             }
			
 
				         }
			
 
				         ws->phase = phase;
			
 
				+        ZSTD_cwksp_assert_internal_consistency(ws);
			
 
				     }
			
 
				+    return 0;
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -224,34 +323,26 @@ MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) {
 
				 MEM_STATIC void* ZSTD_cwksp_reserve_internal(
			
 
				         ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) {
			
 
				     void* alloc;
			
 
				-    void* bottom = ws->tableEnd;
			
 
				-    ZSTD_cwksp_internal_advance_phase(ws, phase);
			
 
				-    alloc = (BYTE *)ws->allocStart - bytes;
			
 
				+    if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase)) || bytes == 0) {
			
 
				+        return NULL;
			
 
				+    }
			
 
				 
			
 
				-#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
			
 
				+#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
			
 
				     /* over-reserve space */
			
 
				-    alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
			
 
				+    bytes += 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
			
 
				 #endif
			
 
				 
			
 
				-    DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
			
 
				-        alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
			
 
				-    ZSTD_cwksp_assert_internal_consistency(ws);
			
 
				-    assert(alloc >= bottom);
			
 
				-    if (alloc < bottom) {
			
 
				-        DEBUGLOG(4, "cwksp: alloc failed!");
			
 
				-        ws->allocFailed = 1;
			
 
				-        return NULL;
			
 
				-    }
			
 
				-    if (alloc < ws->tableValidEnd) {
			
 
				-        ws->tableValidEnd = alloc;
			
 
				-    }
			
 
				-    ws->allocStart = alloc;
			
 
				+    alloc = ZSTD_cwksp_reserve_internal_buffer_space(ws, bytes);
			
 
				 
			
 
				-#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
			
 
				+#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
			
 
				     /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
			
 
				      * either size. */
			
 
				-    alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
			
 
				-    __asan_unpoison_memory_region(alloc, bytes);
			
 
				+    if (alloc) {
			
 
				+        alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
			
 
				+        if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
			
 
				+            __asan_unpoison_memory_region(alloc, bytes);
			
 
				+        }
			
 
				+    }
			
 
				 #endif
			
 
				 
			
 
				     return alloc;
			
@@ -265,28 +356,36 @@ MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) {
 
				 }
			
 
				 
			
 
				 /**
			
 
				- * Reserves and returns memory sized on and aligned on sizeof(unsigned).
			
 
				+ * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
			
 
				  */
			
 
				 MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) {
			
 
				-    assert((bytes & (sizeof(U32)-1)) == 0);
			
 
				-    return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned);
			
 
				+    void* ptr = ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES),
			
 
				+                                            ZSTD_cwksp_alloc_aligned);
			
 
				+    assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
			
 
				+    return ptr;
			
 
				 }
			
 
				 
			
 
				 /**
			
 
				- * Aligned on sizeof(unsigned). These buffers have the special property that
			
 
				+ * Aligned on 64 bytes. These buffers have the special property that
			
 
				  * their values remain constrained, allowing us to re-use them without
			
 
				  * memset()-ing them.
			
 
				  */
			
 
				 MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
			
 
				     const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
			
 
				-    void* alloc = ws->tableEnd;
			
 
				-    void* end = (BYTE *)alloc + bytes;
			
 
				-    void* top = ws->allocStart;
			
 
				+    void* alloc;
			
 
				+    void* end;
			
 
				+    void* top;
			
 
				+
			
 
				+    if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
			
 
				+        return NULL;
			
 
				+    }
			
 
				+    alloc = ws->tableEnd;
			
 
				+    end = (BYTE *)alloc + bytes;
			
 
				+    top = ws->allocStart;
			
 
				 
			
 
				     DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining",
			
 
				         alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
			
 
				     assert((bytes & (sizeof(U32)-1)) == 0);
			
 
				-    ZSTD_cwksp_internal_advance_phase(ws, phase);
			
 
				     ZSTD_cwksp_assert_internal_consistency(ws);
			
 
				     assert(end <= top);
			
 
				     if (end > top) {
			
@@ -296,10 +395,14 @@ MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
 
				     }
			
 
				     ws->tableEnd = end;
			
 
				 
			
 
				-#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
			
 
				-    __asan_unpoison_memory_region(alloc, bytes);
			
 
				+#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
			
 
				+    if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
			
 
				+        __asan_unpoison_memory_region(alloc, bytes);
			
 
				+    }
			
 
				 #endif
			
 
				 
			
 
				+    assert((bytes & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0);
			
 
				+    assert(((size_t)alloc & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
			
 
				     return alloc;
			
 
				 }
			
 
				 
			
@@ -311,7 +414,7 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
 
				     void* alloc = ws->objectEnd;
			
 
				     void* end = (BYTE*)alloc + roundedBytes;
			
 
				 
			
 
				-#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
			
 
				+#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
			
 
				     /* over-reserve space */
			
 
				     end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
			
 
				 #endif
			
@@ -332,11 +435,13 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
 
				     ws->tableEnd = end;
			
 
				     ws->tableValidEnd = end;
			
 
				 
			
 
				-#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
			
 
				+#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
			
 
				     /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
			
 
				      * either size. */
			
 
				     alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
			
 
				-    __asan_unpoison_memory_region(alloc, bytes);
			
 
				+    if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
			
 
				+        __asan_unpoison_memory_region(alloc, bytes);
			
 
				+    }
			
 
				 #endif
			
 
				 
			
 
				     return alloc;
			
@@ -345,7 +450,7 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
 
				 MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) {
			
 
				     DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
			
 
				 
			
 
				-#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
			
 
				+#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
			
 
				     /* To validate that the table re-use logic is sound, and that we don't
			
 
				      * access table space that we haven't cleaned, we re-"poison" the table
			
 
				      * space every time we mark it dirty. */
			
@@ -380,7 +485,7 @@ MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
 
				     assert(ws->tableValidEnd >= ws->objectEnd);
			
 
				     assert(ws->tableValidEnd <= ws->allocStart);
			
 
				     if (ws->tableValidEnd < ws->tableEnd) {
			
 
				-        memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
			
 
				+        ZSTD_memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
			
 
				     }
			
 
				     ZSTD_cwksp_mark_tables_clean(ws);
			
 
				 }
			
@@ -392,8 +497,12 @@ MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
 
				 MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) {
			
 
				     DEBUGLOG(4, "cwksp: clearing tables!");
			
 
				 
			
 
				-#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
			
 
				-    {
			
 
				+#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
			
 
				+    /* We don't do this when the workspace is statically allocated, because
			
 
				+     * when that is the case, we have no capability to hook into the end of the
			
 
				+     * workspace's lifecycle to unpoison the memory.
			
 
				+     */
			
 
				+    if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
			
 
				         size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
			
 
				         __asan_poison_memory_region(ws->objectEnd, size);
			
 
				     }
			
@@ -410,7 +519,7 @@ MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) {
 
				 MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
			
 
				     DEBUGLOG(4, "cwksp: clearing!");
			
 
				 
			
 
				-#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
			
 
				+#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
			
 
				     /* To validate that the context re-use logic is sound, and that we don't
			
 
				      * access stuff that this compression hasn't initialized, we re-"poison"
			
 
				      * the workspace (or at least the non-static, non-table parts of it)
			
@@ -421,8 +530,12 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
 
				     }
			
 
				 #endif
			
 
				 
			
 
				-#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
			
 
				-    {
			
 
				+#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
			
 
				+    /* We don't do this when the workspace is statically allocated, because
			
 
				+     * when that is the case, we have no capability to hook into the end of the
			
 
				+     * workspace's lifecycle to unpoison the memory.
			
 
				+     */
			
 
				+    if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
			
 
				         size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd;
			
 
				         __asan_poison_memory_region(ws->objectEnd, size);
			
 
				     }
			
@@ -442,7 +555,7 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
 
				  * Any existing values in the workspace are ignored (the previously managed
			
 
				  * buffer, if present, must be separately freed).
			
 
				  */
			
 
				-MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) {
			
 
				+MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_cwksp_static_alloc_e isStatic) {
			
 
				     DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size);
			
 
				     assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */
			
 
				     ws->workspace = start;
			
@@ -450,39 +563,45 @@ MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) {
 
				     ws->objectEnd = ws->workspace;
			
 
				     ws->tableValidEnd = ws->objectEnd;
			
 
				     ws->phase = ZSTD_cwksp_alloc_objects;
			
 
				+    ws->isStatic = isStatic;
			
 
				     ZSTD_cwksp_clear(ws);
			
 
				     ws->workspaceOversizedDuration = 0;
			
 
				     ZSTD_cwksp_assert_internal_consistency(ws);
			
 
				 }
			
 
				 
			
 
				 MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) {
			
 
				-    void* workspace = ZSTD_malloc(size, customMem);
			
 
				+    void* workspace = ZSTD_customMalloc(size, customMem);
			
 
				     DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size);
			
 
				     RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!");
			
 
				-    ZSTD_cwksp_init(ws, workspace, size);
			
 
				+    ZSTD_cwksp_init(ws, workspace, size, ZSTD_cwksp_dynamic_alloc);
			
 
				     return 0;
			
 
				 }
			
 
				 
			
 
				 MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) {
			
 
				     void *ptr = ws->workspace;
			
 
				     DEBUGLOG(4, "cwksp: freeing workspace");
			
 
				-    memset(ws, 0, sizeof(ZSTD_cwksp));
			
 
				-    ZSTD_free(ptr, customMem);
			
 
				+    ZSTD_memset(ws, 0, sizeof(ZSTD_cwksp));
			
 
				+    ZSTD_customFree(ptr, customMem);
			
 
				 }
			
 
				 
			
 
				 /**
			
 
				  * Moves the management of a workspace from one cwksp to another. The src cwksp
			
 
				- * is left in an invalid state (src must be re-init()'ed before its used again).
			
 
				+ * is left in an invalid state (src must be re-init()'ed before it's used again).
			
 
				  */
			
 
				 MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) {
			
 
				     *dst = *src;
			
 
				-    memset(src, 0, sizeof(ZSTD_cwksp));
			
 
				+    ZSTD_memset(src, 0, sizeof(ZSTD_cwksp));
			
 
				 }
			
 
				 
			
 
				 MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
			
 
				     return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
			
 
				 }
			
 
				 
			
 
				+MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) {
			
 
				+    return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace)
			
 
				+         + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart);
			
 
				+}
			
 
				+
			
 
				 MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
			
 
				     return ws->allocFailed;
			
 
				 }
			
@@ -491,6 +610,24 @@ MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
 
				 *  Functions Checking Free Space
			
 
				 ***************************************/
			
 
				 
			
 
				+/* ZSTD_alignmentSpaceWithinBounds() :
			
 
				+ * Returns if the estimated space needed for a wksp is within an acceptable limit of the
			
 
				+ * actual amount of space used.
			
 
				+ */
			
 
				+MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws,
			
 
				+                                                        size_t const estimatedSpace, int resizedWorkspace) {
			
 
				+    if (resizedWorkspace) {
			
 
				+        /* Resized/newly allocated wksp should have exact bounds */
			
 
				+        return ZSTD_cwksp_used(ws) == estimatedSpace;
			
 
				+    } else {
			
 
				+        /* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes
			
 
				+         * than estimatedSpace. See the comments in zstd_cwksp.h for details.
			
 
				+         */
			
 
				+        return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				 MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) {
			
 
				     return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd);
			
 
				 }
			
--- a/Utilities/cmzstd/lib/compress/zstd_double_fast.c
+++ b/Utilities/cmzstd/lib/compress/zstd_double_fast.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -31,15 +31,15 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
 
				      * is empty.
			
 
				      */
			
 
				     for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
			
 
				-        U32 const current = (U32)(ip - base);
			
 
				+        U32 const curr = (U32)(ip - base);
			
 
				         U32 i;
			
 
				         for (i = 0; i < fastHashFillStep; ++i) {
			
 
				             size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
			
 
				             size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
			
 
				             if (i == 0)
			
 
				-                hashSmall[smHash] = current + i;
			
 
				+                hashSmall[smHash] = curr + i;
			
 
				             if (i == 0 || hashLarge[lgHash] == 0)
			
 
				-                hashLarge[lgHash] = current + i;
			
 
				+                hashLarge[lgHash] = curr + i;
			
 
				             /* Only load extra positions for ZSTD_dtlm_full */
			
 
				             if (dtlm == ZSTD_dtlm_fast)
			
 
				                 break;
			
@@ -108,9 +108,9 @@ size_t ZSTD_compressBlock_doubleFast_generic(
 
				     /* init */
			
 
				     ip += (dictAndPrefixLength == 0);
			
 
				     if (dictMode == ZSTD_noDict) {
			
 
				-        U32 const current = (U32)(ip - base);
			
 
				-        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
			
 
				-        U32 const maxRep = current - windowLow;
			
 
				+        U32 const curr = (U32)(ip - base);
			
 
				+        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
			
 
				+        U32 const maxRep = curr - windowLow;
			
 
				         if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
			
 
				         if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
			
 
				     }
			
@@ -129,17 +129,17 @@ size_t ZSTD_compressBlock_doubleFast_generic(
 
				         size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
			
 
				         size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
			
 
				         size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
			
 
				-        U32 const current = (U32)(ip-base);
			
 
				+        U32 const curr = (U32)(ip-base);
			
 
				         U32 const matchIndexL = hashLong[h2];
			
 
				         U32 matchIndexS = hashSmall[h];
			
 
				         const BYTE* matchLong = base + matchIndexL;
			
 
				         const BYTE* match = base + matchIndexS;
			
 
				-        const U32 repIndex = current + 1 - offset_1;
			
 
				+        const U32 repIndex = curr + 1 - offset_1;
			
 
				         const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
			
 
				                             && repIndex < prefixLowestIndex) ?
			
 
				                                dictBase + (repIndex - dictIndexDelta) :
			
 
				                                base + repIndex;
			
 
				-        hashLong[h2] = hashSmall[h] = current;   /* update hash tables */
			
 
				+        hashLong[h2] = hashSmall[h] = curr;   /* update hash tables */
			
 
				 
			
 
				         /* check dictMatchState repcode */
			
 
				         if (dictMode == ZSTD_dictMatchState
			
@@ -177,7 +177,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
 
				 
			
 
				             if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
			
 
				                 mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
			
 
				-                offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
			
 
				+                offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
			
 
				                 while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
			
 
				                 goto _match_found;
			
 
				         }   }
			
@@ -209,7 +209,7 @@ _search_next_long:
 
				             size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
			
 
				             U32 const matchIndexL3 = hashLong[hl3];
			
 
				             const BYTE* matchL3 = base + matchIndexL3;
			
 
				-            hashLong[hl3] = current + 1;
			
 
				+            hashLong[hl3] = curr + 1;
			
 
				 
			
 
				             /* check prefix long +1 match */
			
 
				             if (matchIndexL3 > prefixLowestIndex) {
			
@@ -228,7 +228,7 @@ _search_next_long:
 
				                 if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
			
 
				                     mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
			
 
				                     ip++;
			
 
				-                    offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
			
 
				+                    offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
			
 
				                     while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
			
 
				                     goto _match_found;
			
 
				         }   }   }
			
@@ -236,7 +236,7 @@ _search_next_long:
 
				         /* if no long +1 match, explore the short match we found */
			
 
				         if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
			
 
				             mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
			
 
				-            offset = (U32)(current - matchIndexS);
			
 
				+            offset = (U32)(curr - matchIndexS);
			
 
				             while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
			
 
				         } else {
			
 
				             mLength = ZSTD_count(ip+4, match+4, iend) + 4;
			
@@ -260,7 +260,7 @@ _match_stored:
 
				         if (ip <= ilimit) {
			
 
				             /* Complementary insertion */
			
 
				             /* done after iLimit test, as candidates could be > iend-8 */
			
 
				-            {   U32 const indexToInsert = current+2;
			
 
				+            {   U32 const indexToInsert = curr+2;
			
 
				                 hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
			
 
				                 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
			
 
				                 hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
			
@@ -401,15 +401,15 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
 
				         const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
			
 
				         const BYTE* matchLong = matchLongBase + matchLongIndex;
			
 
				 
			
 
				-        const U32 current = (U32)(ip-base);
			
 
				-        const U32 repIndex = current + 1 - offset_1;   /* offset_1 expected <= current +1 */
			
 
				+        const U32 curr = (U32)(ip-base);
			
 
				+        const U32 repIndex = curr + 1 - offset_1;   /* offset_1 expected <= curr +1 */
			
 
				         const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
			
 
				         const BYTE* const repMatch = repBase + repIndex;
			
 
				         size_t mLength;
			
 
				-        hashSmall[hSmall] = hashLong[hLong] = current;   /* update hash table */
			
 
				+        hashSmall[hSmall] = hashLong[hLong] = curr;   /* update hash table */
			
 
				 
			
 
				         if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
			
 
				-            & (repIndex > dictStartIndex))
			
 
				+            & (offset_1 < curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
			
 
				           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
			
 
				             const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
			
 
				             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
			
@@ -421,7 +421,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
 
				                 const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
			
 
				                 U32 offset;
			
 
				                 mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
			
 
				-                offset = current - matchLongIndex;
			
 
				+                offset = curr - matchLongIndex;
			
 
				                 while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */
			
 
				                 offset_2 = offset_1;
			
 
				                 offset_1 = offset;
			
@@ -433,19 +433,19 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
 
				                 const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
			
 
				                 const BYTE* match3 = match3Base + matchIndex3;
			
 
				                 U32 offset;
			
 
				-                hashLong[h3] = current + 1;
			
 
				+                hashLong[h3] = curr + 1;
			
 
				                 if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
			
 
				                     const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
			
 
				                     const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
			
 
				                     mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
			
 
				                     ip++;
			
 
				-                    offset = current+1 - matchIndex3;
			
 
				+                    offset = curr+1 - matchIndex3;
			
 
				                     while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
			
 
				                 } else {
			
 
				                     const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
			
 
				                     const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
			
 
				                     mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
			
 
				-                    offset = current - matchIndex;
			
 
				+                    offset = curr - matchIndex;
			
 
				                     while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
			
 
				                 }
			
 
				                 offset_2 = offset_1;
			
@@ -464,7 +464,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
 
				         if (ip <= ilimit) {
			
 
				             /* Complementary insertion */
			
 
				             /* done after iLimit test, as candidates could be > iend-8 */
			
 
				-            {   U32 const indexToInsert = current+2;
			
 
				+            {   U32 const indexToInsert = curr+2;
			
 
				                 hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
			
 
				                 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
			
 
				                 hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
			
@@ -477,7 +477,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
 
				                 U32 const repIndex2 = current2 - offset_2;
			
 
				                 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
			
 
				                 if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3)   /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
			
 
				-                    & (repIndex2 > dictStartIndex))
			
 
				+                    & (offset_2 < current2 - dictStartIndex))
			
 
				                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
			
 
				                     const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
			
 
				                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
			
--- a/Utilities/cmzstd/lib/compress/zstd_double_fast.h
+++ b/Utilities/cmzstd/lib/compress/zstd_double_fast.h
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
--- a/Utilities/cmzstd/lib/compress/zstd_fast.c
+++ b/Utilities/cmzstd/lib/compress/zstd_fast.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -29,16 +29,16 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
 
				      * Insert the other positions if their hash entry is empty.
			
 
				      */
			
 
				     for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
			
 
				-        U32 const current = (U32)(ip - base);
			
 
				+        U32 const curr = (U32)(ip - base);
			
 
				         size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
			
 
				-        hashTable[hash0] = current;
			
 
				+        hashTable[hash0] = curr;
			
 
				         if (dtlm == ZSTD_dtlm_fast) continue;
			
 
				         /* Only load extra positions for ZSTD_dtlm_full */
			
 
				         {   U32 p;
			
 
				             for (p = 1; p < fastHashFillStep; ++p) {
			
 
				                 size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
			
 
				                 if (hashTable[hash] == 0) {  /* not yet filled */
			
 
				-                    hashTable[hash] = current + p;
			
 
				+                    hashTable[hash] = curr + p;
			
 
				     }   }   }   }
			
 
				 }
			
 
				 
			
@@ -72,9 +72,9 @@ ZSTD_compressBlock_fast_generic(
 
				     DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
			
 
				     ip0 += (ip0 == prefixStart);
			
 
				     ip1 = ip0 + 1;
			
 
				-    {   U32 const current = (U32)(ip0 - base);
			
 
				-        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
			
 
				-        U32 const maxRep = current - windowLow;
			
 
				+    {   U32 const curr = (U32)(ip0 - base);
			
 
				+        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
			
 
				+        U32 const maxRep = curr - windowLow;
			
 
				         if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
			
 
				         if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
			
 
				     }
			
@@ -242,7 +242,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
 
				     assert(endIndex - prefixStartIndex <= maxDistance);
			
 
				     (void)maxDistance; (void)endIndex;   /* these variables are not used when assert() is disabled */
			
 
				 
			
 
				-    /* ensure there will be no no underflow
			
 
				+    /* ensure there will be no underflow
			
 
				      * when translating a dict index into a local index */
			
 
				     assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
			
 
				 
			
@@ -258,14 +258,14 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
 
				     while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
			
 
				         size_t mLength;
			
 
				         size_t const h = ZSTD_hashPtr(ip, hlog, mls);
			
 
				-        U32 const current = (U32)(ip-base);
			
 
				+        U32 const curr = (U32)(ip-base);
			
 
				         U32 const matchIndex = hashTable[h];
			
 
				         const BYTE* match = base + matchIndex;
			
 
				-        const U32 repIndex = current + 1 - offset_1;
			
 
				+        const U32 repIndex = curr + 1 - offset_1;
			
 
				         const BYTE* repMatch = (repIndex < prefixStartIndex) ?
			
 
				                                dictBase + (repIndex - dictIndexDelta) :
			
 
				                                base + repIndex;
			
 
				-        hashTable[h] = current;   /* update hash table */
			
 
				+        hashTable[h] = curr;   /* update hash table */
			
 
				 
			
 
				         if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
			
 
				           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
			
@@ -284,7 +284,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
 
				                 continue;
			
 
				             } else {
			
 
				                 /* found a dict match */
			
 
				-                U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
			
 
				+                U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta);
			
 
				                 mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
			
 
				                 while (((ip>anchor) & (dictMatch>dictStart))
			
 
				                      && (ip[-1] == dictMatch[-1])) {
			
@@ -316,8 +316,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
 
				 
			
 
				         if (ip <= ilimit) {
			
 
				             /* Fill Table */
			
 
				-            assert(base+current+2 > istart);  /* check base overflow */
			
 
				-            hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;  /* here because current+2 could be > iend-8 */
			
 
				+            assert(base+curr+2 > istart);  /* check base overflow */
			
 
				+            hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;  /* here because curr+2 could be > iend-8 */
			
 
				             hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
			
 
				 
			
 
				             /* check immediate repcode */
			
@@ -410,15 +410,15 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
 
				         const U32    matchIndex = hashTable[h];
			
 
				         const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
			
 
				         const BYTE*  match = matchBase + matchIndex;
			
 
				-        const U32    current = (U32)(ip-base);
			
 
				-        const U32    repIndex = current + 1 - offset_1;
			
 
				+        const U32    curr = (U32)(ip-base);
			
 
				+        const U32    repIndex = curr + 1 - offset_1;
			
 
				         const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
			
 
				         const BYTE* const repMatch = repBase + repIndex;
			
 
				-        hashTable[h] = current;   /* update hash table */
			
 
				-        DEBUGLOG(7, "offset_1 = %u , current = %u", offset_1, current);
			
 
				-        assert(offset_1 <= current +1);   /* check repIndex */
			
 
				+        hashTable[h] = curr;   /* update hash table */
			
 
				+        DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
			
 
				 
			
 
				-        if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
			
 
				+        if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
			
 
				+             & (offset_1 < curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
			
 
				            && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
			
 
				             const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
			
 
				             size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
			
@@ -435,7 +435,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
 
				             }
			
 
				             {   const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
			
 
				                 const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
			
 
				-                U32 const offset = current - matchIndex;
			
 
				+                U32 const offset = curr - matchIndex;
			
 
				                 size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
			
 
				                 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
			
 
				                 offset_2 = offset_1; offset_1 = offset;  /* update offset history */
			
@@ -446,14 +446,14 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
 
				 
			
 
				         if (ip <= ilimit) {
			
 
				             /* Fill Table */
			
 
				-            hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;
			
 
				+            hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;
			
 
				             hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
			
 
				             /* check immediate repcode */
			
 
				             while (ip <= ilimit) {
			
 
				                 U32 const current2 = (U32)(ip-base);
			
 
				                 U32 const repIndex2 = current2 - offset_2;
			
 
				                 const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
			
 
				-                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex))  /* intentional overflow */
			
 
				+                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 < curr - dictStartIndex))  /* intentional overflow */
			
 
				                    && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
			
 
				                     const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
			
 
				                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
			
--- a/Utilities/cmzstd/lib/compress/zstd_fast.h
+++ b/Utilities/cmzstd/lib/compress/zstd_fast.h
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
--- a/Utilities/cmzstd/lib/compress/zstd_lazy.c
+++ b/Utilities/cmzstd/lib/compress/zstd_lazy.c
--- a/Utilities/cmzstd/lib/compress/zstd_lazy.h
+++ b/Utilities/cmzstd/lib/compress/zstd_lazy.h
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -17,7 +17,18 @@ extern "C" {
 
				 
			
 
				 #include "zstd_compress_internal.h"
			
 
				 
			
 
				+/**
			
 
				+ * Dedicated Dictionary Search Structure bucket log. In the
			
 
				+ * ZSTD_dedicatedDictSearch mode, the hashTable has
			
 
				+ * 2 ** ZSTD_LAZY_DDSS_BUCKET_LOG entries in each bucket, rather than just
			
 
				+ * one.
			
 
				+ */
			
 
				+#define ZSTD_LAZY_DDSS_BUCKET_LOG 2
			
 
				+
			
 
				 U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
			
 
				+void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);
			
 
				+
			
 
				+void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip);
			
 
				 
			
 
				 void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue);  /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
			
 
				 
			
@@ -33,6 +44,15 @@ size_t ZSTD_compressBlock_lazy(
 
				 size_t ZSTD_compressBlock_greedy(
			
 
				         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
			
 
				         void const* src, size_t srcSize);
			
 
				+size_t ZSTD_compressBlock_lazy2_row(
			
 
				+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
			
 
				+        void const* src, size_t srcSize);
			
 
				+size_t ZSTD_compressBlock_lazy_row(
			
 
				+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
			
 
				+        void const* src, size_t srcSize);
			
 
				+size_t ZSTD_compressBlock_greedy_row(
			
 
				+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
			
 
				+        void const* src, size_t srcSize);
			
 
				 
			
 
				 size_t ZSTD_compressBlock_btlazy2_dictMatchState(
			
 
				         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
			
@@ -46,6 +66,34 @@ size_t ZSTD_compressBlock_lazy_dictMatchState(
 
				 size_t ZSTD_compressBlock_greedy_dictMatchState(
			
 
				         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
			
 
				         void const* src, size_t srcSize);
			
 
				+size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
			
 
				+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
			
 
				+        void const* src, size_t srcSize);
			
 
				+size_t ZSTD_compressBlock_lazy_dictMatchState_row(
			
 
				+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
			
 
				+        void const* src, size_t srcSize);
			
 
				+size_t ZSTD_compressBlock_greedy_dictMatchState_row(
			
 
				+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
			
 
				+        void const* src, size_t srcSize);
			
 
				+
			
 
				+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
			
 
				+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
			
 
				+        void const* src, size_t srcSize);
			
 
				+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
			
 
				+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
			
 
				+        void const* src, size_t srcSize);
			
 
				+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
			
 
				+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
			
 
				+        void const* src, size_t srcSize);
			
 
				+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
			
 
				+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
			
 
				+        void const* src, size_t srcSize);
			
 
				+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
			
 
				+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
			
 
				+        void const* src, size_t srcSize);
			
 
				+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
			
 
				+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
			
 
				+        void const* src, size_t srcSize);
			
 
				 
			
 
				 size_t ZSTD_compressBlock_greedy_extDict(
			
 
				         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
			
@@ -56,9 +104,19 @@ size_t ZSTD_compressBlock_lazy_extDict(
 
				 size_t ZSTD_compressBlock_lazy2_extDict(
			
 
				         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
			
 
				         void const* src, size_t srcSize);
			
 
				+size_t ZSTD_compressBlock_greedy_extDict_row(
			
 
				+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
			
 
				+        void const* src, size_t srcSize);
			
 
				+size_t ZSTD_compressBlock_lazy_extDict_row(
			
 
				+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
			
 
				+        void const* src, size_t srcSize);
			
 
				+size_t ZSTD_compressBlock_lazy2_extDict_row(
			
 
				+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
			
 
				+        void const* src, size_t srcSize);
			
 
				 size_t ZSTD_compressBlock_btlazy2_extDict(
			
 
				         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
			
 
				         void const* src, size_t srcSize);
			
 
				+        
			
 
				 
			
 
				 #if defined (__cplusplus)
			
 
				 }
			
--- a/Utilities/cmzstd/lib/compress/zstd_ldm.c
+++ b/Utilities/cmzstd/lib/compress/zstd_ldm.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -11,13 +11,126 @@
 
				 #include "zstd_ldm.h"
			
 
				 
			
 
				 #include "../common/debug.h"
			
 
				+#include "../common/xxhash.h"
			
 
				 #include "zstd_fast.h"          /* ZSTD_fillHashTable() */
			
 
				 #include "zstd_double_fast.h"   /* ZSTD_fillDoubleHashTable() */
			
 
				+#include "zstd_ldm_geartab.h"
			
 
				 
			
 
				 #define LDM_BUCKET_SIZE_LOG 3
			
 
				 #define LDM_MIN_MATCH_LENGTH 64
			
 
				 #define LDM_HASH_RLOG 7
			
 
				-#define LDM_HASH_CHAR_OFFSET 10
			
 
				+
			
 
				+typedef struct {
			
 
				+    U64 rolling;
			
 
				+    U64 stopMask;
			
 
				+} ldmRollingHashState_t;
			
 
				+
			
 
				+/** ZSTD_ldm_gear_init():
			
 
				+ *
			
 
				+ * Initializes the rolling hash state such that it will honor the
			
 
				+ * settings in params. */
			
 
				+static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* params)
			
 
				+{
			
 
				+    unsigned maxBitsInMask = MIN(params->minMatchLength, 64);
			
 
				+    unsigned hashRateLog = params->hashRateLog;
			
 
				+
			
 
				+    state->rolling = ~(U32)0;
			
 
				+
			
 
				+    /* The choice of the splitting criterion is subject to two conditions:
			
 
				+     *   1. it has to trigger on average every 2^(hashRateLog) bytes;
			
 
				+     *   2. ideally, it has to depend on a window of minMatchLength bytes.
			
 
				+     *
			
 
				+     * In the gear hash algorithm, bit n depends on the last n bytes;
			
 
				+     * so in order to obtain a good quality splitting criterion it is
			
 
				+     * preferable to use bits with high weight.
			
 
				+     *
			
 
				+     * To match condition 1 we use a mask with hashRateLog bits set
			
 
				+     * and, because of the previous remark, we make sure these bits
			
 
				+     * have the highest possible weight while still respecting
			
 
				+     * condition 2.
			
 
				+     */
			
 
				+    if (hashRateLog > 0 && hashRateLog <= maxBitsInMask) {
			
 
				+        state->stopMask = (((U64)1 << hashRateLog) - 1) << (maxBitsInMask - hashRateLog);
			
 
				+    } else {
			
 
				+        /* In this degenerate case we simply honor the hash rate. */
			
 
				+        state->stopMask = ((U64)1 << hashRateLog) - 1;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+/** ZSTD_ldm_gear_reset()
			
 
				+ * Feeds [data, data + minMatchLength) into the hash without registering any
			
 
				+ * splits. This effectively resets the hash state. This is used when skipping
			
 
				+ * over data, either at the beginning of a block, or skipping sections.
			
 
				+ */
			
 
				+static void ZSTD_ldm_gear_reset(ldmRollingHashState_t* state,
			
 
				+                                BYTE const* data, size_t minMatchLength)
			
 
				+{
			
 
				+    U64 hash = state->rolling;
			
 
				+    size_t n = 0;
			
 
				+
			
 
				+#define GEAR_ITER_ONCE() do {                                  \
			
 
				+        hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
			
 
				+        n += 1;                                                \
			
 
				+    } while (0)
			
 
				+    while (n + 3 < minMatchLength) {
			
 
				+        GEAR_ITER_ONCE();
			
 
				+        GEAR_ITER_ONCE();
			
 
				+        GEAR_ITER_ONCE();
			
 
				+        GEAR_ITER_ONCE();
			
 
				+    }
			
 
				+    while (n < minMatchLength) {
			
 
				+        GEAR_ITER_ONCE();
			
 
				+    }
			
 
				+#undef GEAR_ITER_ONCE
			
 
				+}
			
 
				+
			
 
				+/** ZSTD_ldm_gear_feed():
			
 
				+ *
			
 
				+ * Registers in the splits array all the split points found in the first
			
 
				+ * size bytes following the data pointer. This function terminates when
			
 
				+ * either all the data has been processed or LDM_BATCH_SIZE splits are
			
 
				+ * present in the splits array.
			
 
				+ *
			
 
				+ * Precondition: The splits array must not be full.
			
 
				+ * Returns: The number of bytes processed. */
			
 
				+static size_t ZSTD_ldm_gear_feed(ldmRollingHashState_t* state,
			
 
				+                                 BYTE const* data, size_t size,
			
 
				+                                 size_t* splits, unsigned* numSplits)
			
 
				+{
			
 
				+    size_t n;
			
 
				+    U64 hash, mask;
			
 
				+
			
 
				+    hash = state->rolling;
			
 
				+    mask = state->stopMask;
			
 
				+    n = 0;
			
 
				+
			
 
				+#define GEAR_ITER_ONCE() do { \
			
 
				+        hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
			
 
				+        n += 1; \
			
 
				+        if (UNLIKELY((hash & mask) == 0)) { \
			
 
				+            splits[*numSplits] = n; \
			
 
				+            *numSplits += 1; \
			
 
				+            if (*numSplits == LDM_BATCH_SIZE) \
			
 
				+                goto done; \
			
 
				+        } \
			
 
				+    } while (0)
			
 
				+
			
 
				+    while (n + 3 < size) {
			
 
				+        GEAR_ITER_ONCE();
			
 
				+        GEAR_ITER_ONCE();
			
 
				+        GEAR_ITER_ONCE();
			
 
				+        GEAR_ITER_ONCE();
			
 
				+    }
			
 
				+    while (n < size) {
			
 
				+        GEAR_ITER_ONCE();
			
 
				+    }
			
 
				+
			
 
				+#undef GEAR_ITER_ONCE
			
 
				+
			
 
				+done:
			
 
				+    state->rolling = hash;
			
 
				+    return n;
			
 
				+}
			
 
				 
			
 
				 void ZSTD_ldm_adjustParameters(ldmParams_t* params,
			
 
				                                ZSTD_compressionParameters const* cParams)
			
@@ -27,13 +140,6 @@ void ZSTD_ldm_adjustParameters(ldmParams_t* params,
 
				     DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
			
 
				     if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
			
 
				     if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
			
 
				-    if (cParams->strategy >= ZSTD_btopt) {
			
 
				-      /* Get out of the way of the optimal parser */
			
 
				-      U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength);
			
 
				-      assert(minMatch >= ZSTD_LDM_MINMATCH_MIN);
			
 
				-      assert(minMatch <= ZSTD_LDM_MINMATCH_MAX);
			
 
				-      params->minMatchLength = minMatch;
			
 
				-    }
			
 
				     if (params->hashLog == 0) {
			
 
				         params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
			
 
				         assert(params->hashLog <= ZSTD_HASHLOG_MAX);
			
@@ -61,41 +167,6 @@ size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
 
				     return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
			
 
				 }
			
 
				 
			
 
				-/** ZSTD_ldm_getSmallHash() :
			
 
				- *  numBits should be <= 32
			
 
				- *  If numBits==0, returns 0.
			
 
				- *  @return : the most significant numBits of value. */
			
 
				-static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits)
			
 
				-{
			
 
				-    assert(numBits <= 32);
			
 
				-    return numBits == 0 ? 0 : (U32)(value >> (64 - numBits));
			
 
				-}
			
 
				-
			
 
				-/** ZSTD_ldm_getChecksum() :
			
 
				- *  numBitsToDiscard should be <= 32
			
 
				- *  @return : the next most significant 32 bits after numBitsToDiscard */
			
 
				-static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard)
			
 
				-{
			
 
				-    assert(numBitsToDiscard <= 32);
			
 
				-    return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF;
			
 
				-}
			
 
				-
			
 
				-/** ZSTD_ldm_getTag() ;
			
 
				- *  Given the hash, returns the most significant numTagBits bits
			
 
				- *  after (32 + hbits) bits.
			
 
				- *
			
 
				- *  If there are not enough bits remaining, return the last
			
 
				- *  numTagBits bits. */
			
 
				-static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits)
			
 
				-{
			
 
				-    assert(numTagBits < 32 && hbits <= 32);
			
 
				-    if (32 - hbits < numTagBits) {
			
 
				-        return hash & (((U32)1 << numTagBits) - 1);
			
 
				-    } else {
			
 
				-        return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1);
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				 /** ZSTD_ldm_getBucket() :
			
 
				  *  Returns a pointer to the start of the bucket associated with hash. */
			
 
				 static ldmEntry_t* ZSTD_ldm_getBucket(
			
@@ -110,38 +181,12 @@ static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
 
				                                  size_t const hash, const ldmEntry_t entry,
			
 
				                                  ldmParams_t const ldmParams)
			
 
				 {
			
 
				-    BYTE* const bucketOffsets = ldmState->bucketOffsets;
			
 
				-    *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry;
			
 
				-    bucketOffsets[hash]++;
			
 
				-    bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1;
			
 
				-}
			
 
				+    BYTE* const pOffset = ldmState->bucketOffsets + hash;
			
 
				+    unsigned const offset = *pOffset;
			
 
				+
			
 
				+    *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry;
			
 
				+    *pOffset = (BYTE)((offset + 1) & ((1u << ldmParams.bucketSizeLog) - 1));
			
 
				 
			
 
				-/** ZSTD_ldm_makeEntryAndInsertByTag() :
			
 
				- *
			
 
				- *  Gets the small hash, checksum, and tag from the rollingHash.
			
 
				- *
			
 
				- *  If the tag matches (1 << ldmParams.hashRateLog)-1, then
			
 
				- *  creates an ldmEntry from the offset, and inserts it into the hash table.
			
 
				- *
			
 
				- *  hBits is the length of the small hash, which is the most significant hBits
			
 
				- *  of rollingHash. The checksum is the next 32 most significant bits, followed
			
 
				- *  by ldmParams.hashRateLog bits that make up the tag. */
			
 
				-static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
			
 
				-                                             U64 const rollingHash,
			
 
				-                                             U32 const hBits,
			
 
				-                                             U32 const offset,
			
 
				-                                             ldmParams_t const ldmParams)
			
 
				-{
			
 
				-    U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog);
			
 
				-    U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1;
			
 
				-    if (tag == tagMask) {
			
 
				-        U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
			
 
				-        U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
			
 
				-        ldmEntry_t entry;
			
 
				-        entry.offset = offset;
			
 
				-        entry.checksum = checksum;
			
 
				-        ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams);
			
 
				-    }
			
 
				 }
			
 
				 
			
 
				 /** ZSTD_ldm_countBackwardsMatch() :
			
@@ -150,10 +195,10 @@ static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
 
				  *  We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
			
 
				 static size_t ZSTD_ldm_countBackwardsMatch(
			
 
				             const BYTE* pIn, const BYTE* pAnchor,
			
 
				-            const BYTE* pMatch, const BYTE* pBase)
			
 
				+            const BYTE* pMatch, const BYTE* pMatchBase)
			
 
				 {
			
 
				     size_t matchLength = 0;
			
 
				-    while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) {
			
 
				+    while (pIn > pAnchor && pMatch > pMatchBase && pIn[-1] == pMatch[-1]) {
			
 
				         pIn--;
			
 
				         pMatch--;
			
 
				         matchLength++;
			
@@ -161,6 +206,27 @@ static size_t ZSTD_ldm_countBackwardsMatch(
 
				     return matchLength;
			
 
				 }
			
 
				 
			
 
				+/** ZSTD_ldm_countBackwardsMatch_2segments() :
			
 
				+ *  Returns the number of bytes that match backwards from pMatch,
			
 
				+ *  even with the backwards match spanning 2 different segments.
			
 
				+ *
			
 
				+ *  On reaching `pMatchBase`, start counting from mEnd */
			
 
				+static size_t ZSTD_ldm_countBackwardsMatch_2segments(
			
 
				+                    const BYTE* pIn, const BYTE* pAnchor,
			
 
				+                    const BYTE* pMatch, const BYTE* pMatchBase,
			
 
				+                    const BYTE* pExtDictStart, const BYTE* pExtDictEnd)
			
 
				+{
			
 
				+    size_t matchLength = ZSTD_ldm_countBackwardsMatch(pIn, pAnchor, pMatch, pMatchBase);
			
 
				+    if (pMatch - matchLength != pMatchBase || pMatchBase == pExtDictStart) {
			
 
				+        /* If backwards match is entirely in the extDict or prefix, immediately return */
			
 
				+        return matchLength;
			
 
				+    }
			
 
				+    DEBUGLOG(7, "ZSTD_ldm_countBackwardsMatch_2segments: found 2-parts backwards match (length in prefix==%zu)", matchLength);
			
 
				+    matchLength += ZSTD_ldm_countBackwardsMatch(pIn - matchLength, pAnchor, pExtDictEnd, pExtDictStart);
			
 
				+    DEBUGLOG(7, "final backwards match length = %zu", matchLength);
			
 
				+    return matchLength;
			
 
				+}
			
 
				+
			
 
				 /** ZSTD_ldm_fillFastTables() :
			
 
				  *
			
 
				  *  Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
			
@@ -198,43 +264,42 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
 
				     return 0;
			
 
				 }
			
 
				 
			
 
				-/** ZSTD_ldm_fillLdmHashTable() :
			
 
				- *
			
 
				- *  Fills hashTable from (lastHashed + 1) to iend (non-inclusive).
			
 
				- *  lastHash is the rolling hash that corresponds to lastHashed.
			
 
				- *
			
 
				- *  Returns the rolling hash corresponding to position iend-1. */
			
 
				-static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
			
 
				-                                     U64 lastHash, const BYTE* lastHashed,
			
 
				-                                     const BYTE* iend, const BYTE* base,
			
 
				-                                     U32 hBits, ldmParams_t const ldmParams)
			
 
				-{
			
 
				-    U64 rollingHash = lastHash;
			
 
				-    const BYTE* cur = lastHashed + 1;
			
 
				-
			
 
				-    while (cur < iend) {
			
 
				-        rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1],
			
 
				-                                              cur[ldmParams.minMatchLength-1],
			
 
				-                                              state->hashPower);
			
 
				-        ZSTD_ldm_makeEntryAndInsertByTag(state,
			
 
				-                                         rollingHash, hBits,
			
 
				-                                         (U32)(cur - base), ldmParams);
			
 
				-        ++cur;
			
 
				-    }
			
 
				-    return rollingHash;
			
 
				-}
			
 
				-
			
 
				 void ZSTD_ldm_fillHashTable(
			
 
				-            ldmState_t* state, const BYTE* ip,
			
 
				+            ldmState_t* ldmState, const BYTE* ip,
			
 
				             const BYTE* iend, ldmParams_t const* params)
			
 
				 {
			
 
				+    U32 const minMatchLength = params->minMatchLength;
			
 
				+    U32 const hBits = params->hashLog - params->bucketSizeLog;
			
 
				+    BYTE const* const base = ldmState->window.base;
			
 
				+    BYTE const* const istart = ip;
			
 
				+    ldmRollingHashState_t hashState;
			
 
				+    size_t* const splits = ldmState->splitIndices;
			
 
				+    unsigned numSplits;
			
 
				+
			
 
				     DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
			
 
				-    if ((size_t)(iend - ip) >= params->minMatchLength) {
			
 
				-        U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength);
			
 
				-        ZSTD_ldm_fillLdmHashTable(
			
 
				-            state, startingHash, ip, iend - params->minMatchLength, state->window.base,
			
 
				-            params->hashLog - params->bucketSizeLog,
			
 
				-            *params);
			
 
				+
			
 
				+    ZSTD_ldm_gear_init(&hashState, params);
			
 
				+    while (ip < iend) {
			
 
				+        size_t hashed;
			
 
				+        unsigned n;
			
 
				+
			
 
				+        numSplits = 0;
			
 
				+        hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits);
			
 
				+
			
 
				+        for (n = 0; n < numSplits; n++) {
			
 
				+            if (ip + splits[n] >= istart + minMatchLength) {
			
 
				+                BYTE const* const split = ip + splits[n] - minMatchLength;
			
 
				+                U64 const xxhash = XXH64(split, minMatchLength, 0);
			
 
				+                U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
			
 
				+                ldmEntry_t entry;
			
 
				+
			
 
				+                entry.offset = (U32)(split - base);
			
 
				+                entry.checksum = (U32)(xxhash >> 32);
			
 
				+                ZSTD_ldm_insertEntry(ldmState, hash, entry, *params);
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        ip += hashed;
			
 
				     }
			
 
				 }
			
 
				 
			
@@ -246,10 +311,10 @@ void ZSTD_ldm_fillHashTable(
 
				  *  (after a long match, only update tables a limited amount). */
			
 
				 static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
			
 
				 {
			
 
				-    U32 const current = (U32)(anchor - ms->window.base);
			
 
				-    if (current > ms->nextToUpdate + 1024) {
			
 
				+    U32 const curr = (U32)(anchor - ms->window.base);
			
 
				+    if (curr > ms->nextToUpdate + 1024) {
			
 
				         ms->nextToUpdate =
			
 
				-            current - MIN(512, current - ms->nextToUpdate - 1024);
			
 
				+            curr - MIN(512, curr - ms->nextToUpdate - 1024);
			
 
				     }
			
 
				 }
			
 
				 
			
@@ -260,11 +325,8 @@ static size_t ZSTD_ldm_generateSequences_internal(
 
				     /* LDM parameters */
			
 
				     int const extDict = ZSTD_window_hasExtDict(ldmState->window);
			
 
				     U32 const minMatchLength = params->minMatchLength;
			
 
				-    U64 const hashPower = ldmState->hashPower;
			
 
				+    U32 const entsPerBucket = 1U << params->bucketSizeLog;
			
 
				     U32 const hBits = params->hashLog - params->bucketSizeLog;
			
 
				-    U32 const ldmBucketSize = 1U << params->bucketSizeLog;
			
 
				-    U32 const hashRateLog = params->hashRateLog;
			
 
				-    U32 const ldmTagMask = (1U << params->hashRateLog) - 1;
			
 
				     /* Prefix and extDict parameters */
			
 
				     U32 const dictLimit = ldmState->window.dictLimit;
			
 
				     U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
			
@@ -276,45 +338,69 @@ static size_t ZSTD_ldm_generateSequences_internal(
 
				     /* Input bounds */
			
 
				     BYTE const* const istart = (BYTE const*)src;
			
 
				     BYTE const* const iend = istart + srcSize;
			
 
				-    BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE);
			
 
				+    BYTE const* const ilimit = iend - HASH_READ_SIZE;
			
 
				     /* Input positions */
			
 
				     BYTE const* anchor = istart;
			
 
				     BYTE const* ip = istart;
			
 
				-    /* Rolling hash */
			
 
				-    BYTE const* lastHashed = NULL;
			
 
				-    U64 rollingHash = 0;
			
 
				-
			
 
				-    while (ip <= ilimit) {
			
 
				-        size_t mLength;
			
 
				-        U32 const current = (U32)(ip - base);
			
 
				-        size_t forwardMatchLength = 0, backwardMatchLength = 0;
			
 
				-        ldmEntry_t* bestEntry = NULL;
			
 
				-        if (ip != istart) {
			
 
				-            rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0],
			
 
				-                                                  lastHashed[minMatchLength],
			
 
				-                                                  hashPower);
			
 
				-        } else {
			
 
				-            rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength);
			
 
				+    /* Rolling hash state */
			
 
				+    ldmRollingHashState_t hashState;
			
 
				+    /* Arrays for staged-processing */
			
 
				+    size_t* const splits = ldmState->splitIndices;
			
 
				+    ldmMatchCandidate_t* const candidates = ldmState->matchCandidates;
			
 
				+    unsigned numSplits;
			
 
				+
			
 
				+    if (srcSize < minMatchLength)
			
 
				+        return iend - anchor;
			
 
				+
			
 
				+    /* Initialize the rolling hash state with the first minMatchLength bytes */
			
 
				+    ZSTD_ldm_gear_init(&hashState, params);
			
 
				+    ZSTD_ldm_gear_reset(&hashState, ip, minMatchLength);
			
 
				+    ip += minMatchLength;
			
 
				+
			
 
				+    while (ip < ilimit) {
			
 
				+        size_t hashed;
			
 
				+        unsigned n;
			
 
				+
			
 
				+        numSplits = 0;
			
 
				+        hashed = ZSTD_ldm_gear_feed(&hashState, ip, ilimit - ip,
			
 
				+                                    splits, &numSplits);
			
 
				+
			
 
				+        for (n = 0; n < numSplits; n++) {
			
 
				+            BYTE const* const split = ip + splits[n] - minMatchLength;
			
 
				+            U64 const xxhash = XXH64(split, minMatchLength, 0);
			
 
				+            U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
			
 
				+
			
 
				+            candidates[n].split = split;
			
 
				+            candidates[n].hash = hash;
			
 
				+            candidates[n].checksum = (U32)(xxhash >> 32);
			
 
				+            candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *params);
			
 
				+            PREFETCH_L1(candidates[n].bucket);
			
 
				         }
			
 
				-        lastHashed = ip;
			
 
				 
			
 
				-        /* Do not insert and do not look for a match */
			
 
				-        if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) {
			
 
				-           ip++;
			
 
				-           continue;
			
 
				-        }
			
 
				+        for (n = 0; n < numSplits; n++) {
			
 
				+            size_t forwardMatchLength = 0, backwardMatchLength = 0,
			
 
				+                   bestMatchLength = 0, mLength;
			
 
				+            U32 offset;
			
 
				+            BYTE const* const split = candidates[n].split;
			
 
				+            U32 const checksum = candidates[n].checksum;
			
 
				+            U32 const hash = candidates[n].hash;
			
 
				+            ldmEntry_t* const bucket = candidates[n].bucket;
			
 
				+            ldmEntry_t const* cur;
			
 
				+            ldmEntry_t const* bestEntry = NULL;
			
 
				+            ldmEntry_t newEntry;
			
 
				+
			
 
				+            newEntry.offset = (U32)(split - base);
			
 
				+            newEntry.checksum = checksum;
			
 
				+
			
 
				+            /* If a split point would generate a sequence overlapping with
			
 
				+             * the previous one, we merely register it in the hash table and
			
 
				+             * move on */
			
 
				+            if (split < anchor) {
			
 
				+                ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
			
 
				+                continue;
			
 
				+            }
			
 
				 
			
 
				-        /* Get the best entry and compute the match lengths */
			
 
				-        {
			
 
				-            ldmEntry_t* const bucket =
			
 
				-                ZSTD_ldm_getBucket(ldmState,
			
 
				-                                   ZSTD_ldm_getSmallHash(rollingHash, hBits),
			
 
				-                                   *params);
			
 
				-            ldmEntry_t* cur;
			
 
				-            size_t bestMatchLength = 0;
			
 
				-            U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
			
 
				-
			
 
				-            for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
			
 
				+            for (cur = bucket; cur < bucket + entsPerBucket; cur++) {
			
 
				                 size_t curForwardMatchLength, curBackwardMatchLength,
			
 
				                        curTotalMatchLength;
			
 
				                 if (cur->checksum != checksum || cur->offset <= lowestIndex) {
			
@@ -328,30 +414,23 @@ static size_t ZSTD_ldm_generateSequences_internal(
 
				                         cur->offset < dictLimit ? dictEnd : iend;
			
 
				                     BYTE const* const lowMatchPtr =
			
 
				                         cur->offset < dictLimit ? dictStart : lowPrefixPtr;
			
 
				-
			
 
				-                    curForwardMatchLength = ZSTD_count_2segments(
			
 
				-                                                ip, pMatch, iend,
			
 
				-                                                matchEnd, lowPrefixPtr);
			
 
				+                    curForwardMatchLength =
			
 
				+                        ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr);
			
 
				                     if (curForwardMatchLength < minMatchLength) {
			
 
				                         continue;
			
 
				                     }
			
 
				-                    curBackwardMatchLength =
			
 
				-                        ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
			
 
				-                                                     lowMatchPtr);
			
 
				-                    curTotalMatchLength = curForwardMatchLength +
			
 
				-                                          curBackwardMatchLength;
			
 
				+                    curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments(
			
 
				+                            split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd);
			
 
				                 } else { /* !extDict */
			
 
				                     BYTE const* const pMatch = base + cur->offset;
			
 
				-                    curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
			
 
				+                    curForwardMatchLength = ZSTD_count(split, pMatch, iend);
			
 
				                     if (curForwardMatchLength < minMatchLength) {
			
 
				                         continue;
			
 
				                     }
			
 
				                     curBackwardMatchLength =
			
 
				-                        ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
			
 
				-                                                     lowPrefixPtr);
			
 
				-                    curTotalMatchLength = curForwardMatchLength +
			
 
				-                                          curBackwardMatchLength;
			
 
				+                        ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr);
			
 
				                 }
			
 
				+                curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength;
			
 
				 
			
 
				                 if (curTotalMatchLength > bestMatchLength) {
			
 
				                     bestMatchLength = curTotalMatchLength;
			
@@ -360,57 +439,54 @@ static size_t ZSTD_ldm_generateSequences_internal(
 
				                     bestEntry = cur;
			
 
				                 }
			
 
				             }
			
 
				-        }
			
 
				-
			
 
				-        /* No match found -- continue searching */
			
 
				-        if (bestEntry == NULL) {
			
 
				-            ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
			
 
				-                                             hBits, current,
			
 
				-                                             *params);
			
 
				-            ip++;
			
 
				-            continue;
			
 
				-        }
			
 
				 
			
 
				-        /* Match found */
			
 
				-        mLength = forwardMatchLength + backwardMatchLength;
			
 
				-        ip -= backwardMatchLength;
			
 
				+            /* No match found -- insert an entry into the hash table
			
 
				+             * and process the next candidate match */
			
 
				+            if (bestEntry == NULL) {
			
 
				+                ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
			
 
				+                continue;
			
 
				+            }
			
 
				 
			
 
				-        {
			
 
				-            /* Store the sequence:
			
 
				-             * ip = current - backwardMatchLength
			
 
				-             * The match is at (bestEntry->offset - backwardMatchLength)
			
 
				-             */
			
 
				-            U32 const matchIndex = bestEntry->offset;
			
 
				-            U32 const offset = current - matchIndex;
			
 
				-            rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
			
 
				-
			
 
				-            /* Out of sequence storage */
			
 
				-            if (rawSeqStore->size == rawSeqStore->capacity)
			
 
				-                return ERROR(dstSize_tooSmall);
			
 
				-            seq->litLength = (U32)(ip - anchor);
			
 
				-            seq->matchLength = (U32)mLength;
			
 
				-            seq->offset = offset;
			
 
				-            rawSeqStore->size++;
			
 
				-        }
			
 
				+            /* Match found */
			
 
				+            offset = (U32)(split - base) - bestEntry->offset;
			
 
				+            mLength = forwardMatchLength + backwardMatchLength;
			
 
				+            {
			
 
				+                rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
			
 
				+
			
 
				+                /* Out of sequence storage */
			
 
				+                if (rawSeqStore->size == rawSeqStore->capacity)
			
 
				+                    return ERROR(dstSize_tooSmall);
			
 
				+                seq->litLength = (U32)(split - backwardMatchLength - anchor);
			
 
				+                seq->matchLength = (U32)mLength;
			
 
				+                seq->offset = offset;
			
 
				+                rawSeqStore->size++;
			
 
				+            }
			
 
				 
			
 
				-        /* Insert the current entry into the hash table */
			
 
				-        ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
			
 
				-                                         (U32)(lastHashed - base),
			
 
				-                                         *params);
			
 
				+            /* Insert the current entry into the hash table --- it must be
			
 
				+             * done after the previous block to avoid clobbering bestEntry */
			
 
				+            ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
			
 
				 
			
 
				-        assert(ip + backwardMatchLength == lastHashed);
			
 
				+            anchor = split + forwardMatchLength;
			
 
				 
			
 
				-        /* Fill the hash table from lastHashed+1 to ip+mLength*/
			
 
				-        /* Heuristic: don't need to fill the entire table at end of block */
			
 
				-        if (ip + mLength <= ilimit) {
			
 
				-            rollingHash = ZSTD_ldm_fillLdmHashTable(
			
 
				-                              ldmState, rollingHash, lastHashed,
			
 
				-                              ip + mLength, base, hBits, *params);
			
 
				-            lastHashed = ip + mLength - 1;
			
 
				+            /* If we find a match that ends after the data that we've hashed
			
 
				+             * then we have a repeating, overlapping, pattern. E.g. all zeros.
			
 
				+             * If one repetition of the pattern matches our `stopMask` then all
			
 
				+             * repetitions will. We don't need to insert them all into out table,
			
 
				+             * only the first one. So skip over overlapping matches.
			
 
				+             * This is a major speed boost (20x) for compressing a single byte
			
 
				+             * repeated, when that byte ends up in the table.
			
 
				+             */
			
 
				+            if (anchor > ip + hashed) {
			
 
				+                ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength);
			
 
				+                /* Continue the outter loop at anchor (ip + hashed == anchor). */
			
 
				+                ip = anchor - hashed;
			
 
				+                break;
			
 
				+            }
			
 
				         }
			
 
				-        ip += mLength;
			
 
				-        anchor = ip;
			
 
				+
			
 
				+        ip += hashed;
			
 
				     }
			
 
				+
			
 
				     return iend - anchor;
			
 
				 }
			
 
				 
			
@@ -459,7 +535,7 @@ size_t ZSTD_ldm_generateSequences(
 
				 
			
 
				         assert(chunkStart < iend);
			
 
				         /* 1. Perform overflow correction if necessary. */
			
 
				-        if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
			
 
				+        if (ZSTD_window_needOverflowCorrection(ldmState->window, 0, maxDist, ldmState->loadedDictEnd, chunkStart, chunkEnd)) {
			
 
				             U32 const ldmHSize = 1U << params->hashLog;
			
 
				             U32 const correction = ZSTD_window_correctOverflow(
			
 
				                 &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
			
@@ -562,14 +638,32 @@ static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
 
				     return sequence;
			
 
				 }
			
 
				 
			
 
				+void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
			
 
				+    U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
			
 
				+    while (currPos && rawSeqStore->pos < rawSeqStore->size) {
			
 
				+        rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
			
 
				+        if (currPos >= currSeq.litLength + currSeq.matchLength) {
			
 
				+            currPos -= currSeq.litLength + currSeq.matchLength;
			
 
				+            rawSeqStore->pos++;
			
 
				+        } else {
			
 
				+            rawSeqStore->posInSequence = currPos;
			
 
				+            break;
			
 
				+        }
			
 
				+    }
			
 
				+    if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
			
 
				+        rawSeqStore->posInSequence = 0;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
			
 
				     ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
			
 
				+    ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
			
 
				     void const* src, size_t srcSize)
			
 
				 {
			
 
				     const ZSTD_compressionParameters* const cParams = &ms->cParams;
			
 
				     unsigned const minMatch = cParams->minMatch;
			
 
				     ZSTD_blockCompressor const blockCompressor =
			
 
				-        ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
			
 
				+        ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms));
			
 
				     /* Input bounds */
			
 
				     BYTE const* const istart = (BYTE const*)src;
			
 
				     BYTE const* const iend = istart + srcSize;
			
@@ -577,9 +671,18 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
 
				     BYTE const* ip = istart;
			
 
				 
			
 
				     DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
			
 
				+    /* If using opt parser, use LDMs only as candidates rather than always accepting them */
			
 
				+    if (cParams->strategy >= ZSTD_btopt) {
			
 
				+        size_t lastLLSize;
			
 
				+        ms->ldmSeqStore = rawSeqStore;
			
 
				+        lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize);
			
 
				+        ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize);
			
 
				+        return lastLLSize;
			
 
				+    }
			
 
				+
			
 
				     assert(rawSeqStore->pos <= rawSeqStore->size);
			
 
				     assert(rawSeqStore->size <= rawSeqStore->capacity);
			
 
				-    /* Loop through each sequence and apply the block compressor to the lits */
			
 
				+    /* Loop through each sequence and apply the block compressor to the literals */
			
 
				     while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
			
 
				         /* maybeSplitSequence updates rawSeqStore->pos */
			
 
				         rawSeq const sequence = maybeSplitSequence(rawSeqStore,
			
--- a/Utilities/cmzstd/lib/compress/zstd_ldm.h
+++ b/Utilities/cmzstd/lib/compress/zstd_ldm.h
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -66,6 +66,7 @@ size_t ZSTD_ldm_generateSequences(
 
				  */
			
 
				 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
			
 
				             ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
			
 
				+            ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
			
 
				             void const* src, size_t srcSize);
			
 
				 
			
 
				 /**
			
@@ -73,11 +74,17 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
 
				  *
			
 
				  * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`.
			
 
				  * Avoids emitting matches less than `minMatch` bytes.
			
 
				- * Must be called for data with is not passed to ZSTD_ldm_blockCompress().
			
 
				+ * Must be called for data that is not passed to ZSTD_ldm_blockCompress().
			
 
				  */
			
 
				 void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize,
			
 
				     U32 const minMatch);
			
 
				 
			
 
				+/* ZSTD_ldm_skipRawSeqStoreBytes():
			
 
				+ * Moves forward in rawSeqStore by nbBytes, updating fields 'pos' and 'posInSequence'.
			
 
				+ * Not to be used in conjunction with ZSTD_ldm_skipSequences().
			
 
				+ * Must be called for data with is not passed to ZSTD_ldm_blockCompress().
			
 
				+ */
			
 
				+void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes);
			
 
				 
			
 
				 /** ZSTD_ldm_getTableSize() :
			
 
				  *  Estimate the space needed for long distance matching tables or 0 if LDM is
			
--- a/Utilities/cmzstd/lib/compress/zstd_ldm_geartab.h
+++ b/Utilities/cmzstd/lib/compress/zstd_ldm_geartab.h
@@ -0,0 +1,103 @@
 
				+/*
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				+ * All rights reserved.
			
 
				+ *
			
 
				+ * This source code is licensed under both the BSD-style license (found in the
			
 
				+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
			
 
				+ * in the COPYING file in the root directory of this source tree).
			
 
				+ * You may select, at your option, one of the above-listed licenses.
			
 
				+ */
			
 
				+
			
 
				+#ifndef ZSTD_LDM_GEARTAB_H
			
 
				+#define ZSTD_LDM_GEARTAB_H
			
 
				+
			
 
				+static U64 ZSTD_ldm_gearTab[256] = {
			
 
				+    0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc,
			
 
				+    0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05,
			
 
				+    0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e,
			
 
				+    0x9c8528f65badeaca, 0x86563706e2097529, 0x2902475fa375d889,
			
 
				+    0xafb32a9739a5ebe6, 0xce2714da3883e639, 0x21eaf821722e69e,
			
 
				+    0x37b628620b628,    0x49a8d455d88caf5,  0x8556d711e6958140,
			
 
				+    0x4f7ae74fc605c1f,  0x829f0c3468bd3a20, 0x4ffdc885c625179e,
			
 
				+    0x8473de048a3daf1b, 0x51008822b05646b2, 0x69d75d12b2d1cc5f,
			
 
				+    0x8c9d4a19159154bc, 0xc3cc10f4abbd4003, 0xd06ddc1cecb97391,
			
 
				+    0xbe48e6e7ed80302e, 0x3481db31cee03547, 0xacc3f67cdaa1d210,
			
 
				+    0x65cb771d8c7f96cc, 0x8eb27177055723dd, 0xc789950d44cd94be,
			
 
				+    0x934feadc3700b12b, 0x5e485f11edbdf182, 0x1e2e2a46fd64767a,
			
 
				+    0x2969ca71d82efa7c, 0x9d46e9935ebbba2e, 0xe056b67e05e6822b,
			
 
				+    0x94d73f55739d03a0, 0xcd7010bdb69b5a03, 0x455ef9fcd79b82f4,
			
 
				+    0x869cb54a8749c161, 0x38d1a4fa6185d225, 0xb475166f94bbe9bb,
			
 
				+    0xa4143548720959f1, 0x7aed4780ba6b26ba, 0xd0ce264439e02312,
			
 
				+    0x84366d746078d508, 0xa8ce973c72ed17be, 0x21c323a29a430b01,
			
 
				+    0x9962d617e3af80ee, 0xab0ce91d9c8cf75b, 0x530e8ee6d19a4dbc,
			
 
				+    0x2ef68c0cf53f5d72, 0xc03a681640a85506, 0x496e4e9f9c310967,
			
 
				+    0x78580472b59b14a0, 0x273824c23b388577, 0x66bf923ad45cb553,
			
 
				+    0x47ae1a5a2492ba86, 0x35e304569e229659, 0x4765182a46870b6f,
			
 
				+    0x6cbab625e9099412, 0xddac9a2e598522c1, 0x7172086e666624f2,
			
 
				+    0xdf5003ca503b7837, 0x88c0c1db78563d09, 0x58d51865acfc289d,
			
 
				+    0x177671aec65224f1, 0xfb79d8a241e967d7, 0x2be1e101cad9a49a,
			
 
				+    0x6625682f6e29186b, 0x399553457ac06e50, 0x35dffb4c23abb74,
			
 
				+    0x429db2591f54aade, 0xc52802a8037d1009, 0x6acb27381f0b25f3,
			
 
				+    0xf45e2551ee4f823b, 0x8b0ea2d99580c2f7, 0x3bed519cbcb4e1e1,
			
 
				+    0xff452823dbb010a,  0x9d42ed614f3dd267, 0x5b9313c06257c57b,
			
 
				+    0xa114b8008b5e1442, 0xc1fe311c11c13d4b, 0x66e8763ea34c5568,
			
 
				+    0x8b982af1c262f05d, 0xee8876faaa75fbb7, 0x8a62a4d0d172bb2a,
			
 
				+    0xc13d94a3b7449a97, 0x6dbbba9dc15d037c, 0xc786101f1d92e0f1,
			
 
				+    0xd78681a907a0b79b, 0xf61aaf2962c9abb9, 0x2cfd16fcd3cb7ad9,
			
 
				+    0x868c5b6744624d21, 0x25e650899c74ddd7, 0xba042af4a7c37463,
			
 
				+    0x4eb1a539465a3eca, 0xbe09dbf03b05d5ca, 0x774e5a362b5472ba,
			
 
				+    0x47a1221229d183cd, 0x504b0ca18ef5a2df, 0xdffbdfbde2456eb9,
			
 
				+    0x46cd2b2fbee34634, 0xf2aef8fe819d98c3, 0x357f5276d4599d61,
			
 
				+    0x24a5483879c453e3, 0x88026889192b4b9,  0x28da96671782dbec,
			
 
				+    0x4ef37c40588e9aaa, 0x8837b90651bc9fb3, 0xc164f741d3f0e5d6,
			
 
				+    0xbc135a0a704b70ba, 0x69cd868f7622ada,  0xbc37ba89e0b9c0ab,
			
 
				+    0x47c14a01323552f6, 0x4f00794bacee98bb, 0x7107de7d637a69d5,
			
 
				+    0x88af793bb6f2255e, 0xf3c6466b8799b598, 0xc288c616aa7f3b59,
			
 
				+    0x81ca63cf42fca3fd, 0x88d85ace36a2674b, 0xd056bd3792389e7,
			
 
				+    0xe55c396c4e9dd32d, 0xbefb504571e6c0a6, 0x96ab32115e91e8cc,
			
 
				+    0xbf8acb18de8f38d1, 0x66dae58801672606, 0x833b6017872317fb,
			
 
				+    0xb87c16f2d1c92864, 0xdb766a74e58b669c, 0x89659f85c61417be,
			
 
				+    0xc8daad856011ea0c, 0x76a4b565b6fe7eae, 0xa469d085f6237312,
			
 
				+    0xaaf0365683a3e96c, 0x4dbb746f8424f7b8, 0x638755af4e4acc1,
			
 
				+    0x3d7807f5bde64486, 0x17be6d8f5bbb7639, 0x903f0cd44dc35dc,
			
 
				+    0x67b672eafdf1196c, 0xa676ff93ed4c82f1, 0x521d1004c5053d9d,
			
 
				+    0x37ba9ad09ccc9202, 0x84e54d297aacfb51, 0xa0b4b776a143445,
			
 
				+    0x820d471e20b348e,  0x1874383cb83d46dc, 0x97edeec7a1efe11c,
			
 
				+    0xb330e50b1bdc42aa, 0x1dd91955ce70e032, 0xa514cdb88f2939d5,
			
 
				+    0x2791233fd90db9d3, 0x7b670a4cc50f7a9b, 0x77c07d2a05c6dfa5,
			
 
				+    0xe3778b6646d0a6fa, 0xb39c8eda47b56749, 0x933ed448addbef28,
			
 
				+    0xaf846af6ab7d0bf4, 0xe5af208eb666e49,  0x5e6622f73534cd6a,
			
 
				+    0x297daeca42ef5b6e, 0x862daef3d35539a6, 0xe68722498f8e1ea9,
			
 
				+    0x981c53093dc0d572, 0xfa09b0bfbf86fbf5, 0x30b1e96166219f15,
			
 
				+    0x70e7d466bdc4fb83, 0x5a66736e35f2a8e9, 0xcddb59d2b7c1baef,
			
 
				+    0xd6c7d247d26d8996, 0xea4e39eac8de1ba3, 0x539c8bb19fa3aff2,
			
 
				+    0x9f90e4c5fd508d8,  0xa34e5956fbaf3385, 0x2e2f8e151d3ef375,
			
 
				+    0x173691e9b83faec1, 0xb85a8d56bf016379, 0x8382381267408ae3,
			
 
				+    0xb90f901bbdc0096d, 0x7c6ad32933bcec65, 0x76bb5e2f2c8ad595,
			
 
				+    0x390f851a6cf46d28, 0xc3e6064da1c2da72, 0xc52a0c101cfa5389,
			
 
				+    0xd78eaf84a3fbc530, 0x3781b9e2288b997e, 0x73c2f6dea83d05c4,
			
 
				+    0x4228e364c5b5ed7,  0x9d7a3edf0da43911, 0x8edcfeda24686756,
			
 
				+    0x5e7667a7b7a9b3a1, 0x4c4f389fa143791d, 0xb08bc1023da7cddc,
			
 
				+    0x7ab4be3ae529b1cc, 0x754e6132dbe74ff9, 0x71635442a839df45,
			
 
				+    0x2f6fb1643fbe52de, 0x961e0a42cf7a8177, 0xf3b45d83d89ef2ea,
			
 
				+    0xee3de4cf4a6e3e9b, 0xcd6848542c3295e7, 0xe4cee1664c78662f,
			
 
				+    0x9947548b474c68c4, 0x25d73777a5ed8b0b, 0xc915b1d636b7fc,
			
 
				+    0x21c2ba75d9b0d2da, 0x5f6b5dcf608a64a1, 0xdcf333255ff9570c,
			
 
				+    0x633b922418ced4ee, 0xc136dde0b004b34a, 0x58cc83b05d4b2f5a,
			
 
				+    0x5eb424dda28e42d2, 0x62df47369739cd98, 0xb4e0b42485e4ce17,
			
 
				+    0x16e1f0c1f9a8d1e7, 0x8ec3916707560ebf, 0x62ba6e2df2cc9db3,
			
 
				+    0xcbf9f4ff77d83a16, 0x78d9d7d07d2bbcc4, 0xef554ce1e02c41f4,
			
 
				+    0x8d7581127eccf94d, 0xa9b53336cb3c8a05, 0x38c42c0bf45c4f91,
			
 
				+    0x640893cdf4488863, 0x80ec34bc575ea568, 0x39f324f5b48eaa40,
			
 
				+    0xe9d9ed1f8eff527f, 0x9224fc058cc5a214, 0xbaba00b04cfe7741,
			
 
				+    0x309a9f120fcf52af, 0xa558f3ec65626212, 0x424bec8b7adabe2f,
			
 
				+    0x41622513a6aea433, 0xb88da2d5324ca798, 0xd287733b245528a4,
			
 
				+    0x9a44697e6d68aec3, 0x7b1093be2f49bb28, 0x50bbec632e3d8aad,
			
 
				+    0x6cd90723e1ea8283, 0x897b9e7431b02bf3, 0x219efdcb338a7047,
			
 
				+    0x3b0311f0a27c0656, 0xdb17bf91c0db96e7, 0x8cd4fd6b4e85a5b2,
			
 
				+    0xfab071054ba6409d, 0x40d6fe831fa9dfd9, 0xaf358debad7d791e,
			
 
				+    0xeb8d0e25a65e3e58, 0xbbcbd3df14e08580, 0xcf751f27ecdab2b,
			
 
				+    0x2b4da14f2613d8f4
			
 
				+};
			
 
				+
			
 
				+#endif /* ZSTD_LDM_GEARTAB_H */
			
--- a/Utilities/cmzstd/lib/compress/zstd_opt.c
+++ b/Utilities/cmzstd/lib/compress/zstd_opt.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -386,32 +386,32 @@ static U32 ZSTD_insertBt1(
 
				     const BYTE* const dictEnd = dictBase + dictLimit;
			
 
				     const BYTE* const prefixStart = base + dictLimit;
			
 
				     const BYTE* match;
			
 
				-    const U32 current = (U32)(ip-base);
			
 
				-    const U32 btLow = btMask >= current ? 0 : current - btMask;
			
 
				-    U32* smallerPtr = bt + 2*(current&btMask);
			
 
				+    const U32 curr = (U32)(ip-base);
			
 
				+    const U32 btLow = btMask >= curr ? 0 : curr - btMask;
			
 
				+    U32* smallerPtr = bt + 2*(curr&btMask);
			
 
				     U32* largerPtr  = smallerPtr + 1;
			
 
				     U32 dummy32;   /* to be nullified at the end */
			
 
				     U32 const windowLow = ms->window.lowLimit;
			
 
				-    U32 matchEndIdx = current+8+1;
			
 
				+    U32 matchEndIdx = curr+8+1;
			
 
				     size_t bestLength = 8;
			
 
				     U32 nbCompares = 1U << cParams->searchLog;
			
 
				 #ifdef ZSTD_C_PREDICT
			
 
				-    U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
			
 
				-    U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
			
 
				+    U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
			
 
				+    U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
			
 
				     predictedSmall += (predictedSmall>0);
			
 
				     predictedLarge += (predictedLarge>0);
			
 
				 #endif /* ZSTD_C_PREDICT */
			
 
				 
			
 
				-    DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current);
			
 
				+    DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
			
 
				 
			
 
				     assert(ip <= iend-8);   /* required for h calculation */
			
 
				-    hashTable[h] = current;   /* Update Hash Table */
			
 
				+    hashTable[h] = curr;   /* Update Hash Table */
			
 
				 
			
 
				     assert(windowLow > 0);
			
 
				     while (nbCompares-- && (matchIndex >= windowLow)) {
			
 
				         U32* const nextPtr = bt + 2*(matchIndex & btMask);
			
 
				         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
			
 
				-        assert(matchIndex < current);
			
 
				+        assert(matchIndex < curr);
			
 
				 
			
 
				 #ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
			
 
				         const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
			
@@ -474,8 +474,8 @@ static U32 ZSTD_insertBt1(
 
				     *smallerPtr = *largerPtr = 0;
			
 
				     {   U32 positions = 0;
			
 
				         if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384));   /* speed optimization */
			
 
				-        assert(matchEndIdx > current + 8);
			
 
				-        return MAX(positions, matchEndIdx - (current + 8));
			
 
				+        assert(matchEndIdx > curr + 8);
			
 
				+        return MAX(positions, matchEndIdx - (curr + 8));
			
 
				     }
			
 
				 }
			
 
				 
			
@@ -519,7 +519,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
 
				     const ZSTD_compressionParameters* const cParams = &ms->cParams;
			
 
				     U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
			
 
				     const BYTE* const base = ms->window.base;
			
 
				-    U32 const current = (U32)(ip-base);
			
 
				+    U32 const curr = (U32)(ip-base);
			
 
				     U32 const hashLog = cParams->hashLog;
			
 
				     U32 const minMatch = (mls==3) ? 3 : 4;
			
 
				     U32* const hashTable = ms->hashTable;
			
@@ -533,12 +533,12 @@ U32 ZSTD_insertBtAndGetAllMatches (
 
				     U32 const dictLimit = ms->window.dictLimit;
			
 
				     const BYTE* const dictEnd = dictBase + dictLimit;
			
 
				     const BYTE* const prefixStart = base + dictLimit;
			
 
				-    U32 const btLow = (btMask >= current) ? 0 : current - btMask;
			
 
				-    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
			
 
				+    U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
			
 
				+    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
			
 
				     U32 const matchLow = windowLow ? windowLow : 1;
			
 
				-    U32* smallerPtr = bt + 2*(current&btMask);
			
 
				-    U32* largerPtr  = bt + 2*(current&btMask) + 1;
			
 
				-    U32 matchEndIdx = current+8+1;   /* farthest referenced position of any match => detects repetitive patterns */
			
 
				+    U32* smallerPtr = bt + 2*(curr&btMask);
			
 
				+    U32* largerPtr  = bt + 2*(curr&btMask) + 1;
			
 
				+    U32 matchEndIdx = curr+8+1;   /* farthest referenced position of any match => detects repetitive patterns */
			
 
				     U32 dummy32;   /* to be nullified at the end */
			
 
				     U32 mnum = 0;
			
 
				     U32 nbCompares = 1U << cParams->searchLog;
			
@@ -557,7 +557,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
 
				     U32         const dmsBtLow      = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
			
 
				 
			
 
				     size_t bestLength = lengthToBeat-1;
			
 
				-    DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current);
			
 
				+    DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
			
 
				 
			
 
				     /* check repCode */
			
 
				     assert(ll0 <= 1);   /* necessarily 1 or 0 */
			
@@ -565,29 +565,29 @@ U32 ZSTD_insertBtAndGetAllMatches (
 
				         U32 repCode;
			
 
				         for (repCode = ll0; repCode < lastR; repCode++) {
			
 
				             U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
			
 
				-            U32 const repIndex = current - repOffset;
			
 
				+            U32 const repIndex = curr - repOffset;
			
 
				             U32 repLen = 0;
			
 
				-            assert(current >= dictLimit);
			
 
				-            if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) {  /* equivalent to `current > repIndex >= dictLimit` */
			
 
				+            assert(curr >= dictLimit);
			
 
				+            if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) {  /* equivalent to `curr > repIndex >= dictLimit` */
			
 
				                 /* We must validate the repcode offset because when we're using a dictionary the
			
 
				                  * valid offset range shrinks when the dictionary goes out of bounds.
			
 
				                  */
			
 
				                 if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
			
 
				                     repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
			
 
				                 }
			
 
				-            } else {  /* repIndex < dictLimit || repIndex >= current */
			
 
				+            } else {  /* repIndex < dictLimit || repIndex >= curr */
			
 
				                 const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
			
 
				                                              dmsBase + repIndex - dmsIndexDelta :
			
 
				                                              dictBase + repIndex;
			
 
				-                assert(current >= windowLow);
			
 
				+                assert(curr >= windowLow);
			
 
				                 if ( dictMode == ZSTD_extDict
			
 
				-                  && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow)  /* equivalent to `current > repIndex >= windowLow` */
			
 
				+                  && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow)  /* equivalent to `curr > repIndex >= windowLow` */
			
 
				                      & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
			
 
				                   && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
			
 
				                     repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
			
 
				                 }
			
 
				                 if (dictMode == ZSTD_dictMatchState
			
 
				-                  && ( ((repOffset-1) /*intentional overflow*/ < current - (dmsLowLimit + dmsIndexDelta))  /* equivalent to `current > repIndex >= dmsLowLimit` */
			
 
				+                  && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta))  /* equivalent to `curr > repIndex >= dmsLowLimit` */
			
 
				                      & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
			
 
				                   && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
			
 
				                     repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
			
@@ -609,7 +609,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
 
				     if ((mls == 3) /*static*/ && (bestLength < mls)) {
			
 
				         U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
			
 
				         if ((matchIndex3 >= matchLow)
			
 
				-          & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
			
 
				+          & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
			
 
				             size_t mlen;
			
 
				             if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
			
 
				                 const BYTE* const match = base + matchIndex3;
			
@@ -624,26 +624,26 @@ U32 ZSTD_insertBtAndGetAllMatches (
 
				                 DEBUGLOG(8, "found small match with hlog3, of length %u",
			
 
				                             (U32)mlen);
			
 
				                 bestLength = mlen;
			
 
				-                assert(current > matchIndex3);
			
 
				+                assert(curr > matchIndex3);
			
 
				                 assert(mnum==0);  /* no prior solution */
			
 
				-                matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE;
			
 
				+                matches[0].off = (curr - matchIndex3) + ZSTD_REP_MOVE;
			
 
				                 matches[0].len = (U32)mlen;
			
 
				                 mnum = 1;
			
 
				                 if ( (mlen > sufficient_len) |
			
 
				                      (ip+mlen == iLimit) ) {  /* best possible length */
			
 
				-                    ms->nextToUpdate = current+1;  /* skip insertion */
			
 
				+                    ms->nextToUpdate = curr+1;  /* skip insertion */
			
 
				                     return 1;
			
 
				         }   }   }
			
 
				         /* no dictMatchState lookup: dicts don't have a populated HC3 table */
			
 
				     }
			
 
				 
			
 
				-    hashTable[h] = current;   /* Update Hash Table */
			
 
				+    hashTable[h] = curr;   /* Update Hash Table */
			
 
				 
			
 
				     while (nbCompares-- && (matchIndex >= matchLow)) {
			
 
				         U32* const nextPtr = bt + 2*(matchIndex & btMask);
			
 
				         const BYTE* match;
			
 
				         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
			
 
				-        assert(current > matchIndex);
			
 
				+        assert(curr > matchIndex);
			
 
				 
			
 
				         if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
			
 
				             assert(matchIndex+matchLength >= dictLimit);  /* ensure the condition is correct when !extDict */
			
@@ -660,12 +660,12 @@ U32 ZSTD_insertBtAndGetAllMatches (
 
				 
			
 
				         if (matchLength > bestLength) {
			
 
				             DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
			
 
				-                    (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
			
 
				+                    (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
			
 
				             assert(matchEndIdx > matchIndex);
			
 
				             if (matchLength > matchEndIdx - matchIndex)
			
 
				                 matchEndIdx = matchIndex + (U32)matchLength;
			
 
				             bestLength = matchLength;
			
 
				-            matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
			
 
				+            matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
			
 
				             matches[mnum].len = (U32)matchLength;
			
 
				             mnum++;
			
 
				             if ( (matchLength > ZSTD_OPT_NUM)
			
@@ -708,11 +708,11 @@ U32 ZSTD_insertBtAndGetAllMatches (
 
				             if (matchLength > bestLength) {
			
 
				                 matchIndex = dictMatchIndex + dmsIndexDelta;
			
 
				                 DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
			
 
				-                        (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
			
 
				+                        (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
			
 
				                 if (matchLength > matchEndIdx - matchIndex)
			
 
				                     matchEndIdx = matchIndex + (U32)matchLength;
			
 
				                 bestLength = matchLength;
			
 
				-                matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
			
 
				+                matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
			
 
				                 matches[mnum].len = (U32)matchLength;
			
 
				                 mnum++;
			
 
				                 if ( (matchLength > ZSTD_OPT_NUM)
			
@@ -733,7 +733,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
 
				         }
			
 
				     }
			
 
				 
			
 
				-    assert(matchEndIdx > current+8);
			
 
				+    assert(matchEndIdx > curr+8);
			
 
				     ms->nextToUpdate = matchEndIdx - 8;  /* skip repetitive patterns */
			
 
				     return mnum;
			
 
				 }
			
@@ -764,6 +764,140 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
 
				     }
			
 
				 }
			
 
				 
			
 
				+/*************************
			
 
				+*  LDM helper functions  *
			
 
				+*************************/
			
 
				+
			
 
				+/* Struct containing info needed to make decision about ldm inclusion */
			
 
				+typedef struct {
			
 
				+    rawSeqStore_t seqStore;         /* External match candidates store for this block */
			
 
				+    U32 startPosInBlock;            /* Start position of the current match candidate */
			
 
				+    U32 endPosInBlock;              /* End position of the current match candidate */
			
 
				+    U32 offset;                     /* Offset of the match candidate */
			
 
				+} ZSTD_optLdm_t;
			
 
				+
			
 
				+/* ZSTD_optLdm_skipRawSeqStoreBytes():
			
 
				+ * Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'.
			
 
				+ */
			
 
				+static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
			
 
				+    U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
			
 
				+    while (currPos && rawSeqStore->pos < rawSeqStore->size) {
			
 
				+        rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
			
 
				+        if (currPos >= currSeq.litLength + currSeq.matchLength) {
			
 
				+            currPos -= currSeq.litLength + currSeq.matchLength;
			
 
				+            rawSeqStore->pos++;
			
 
				+        } else {
			
 
				+            rawSeqStore->posInSequence = currPos;
			
 
				+            break;
			
 
				+        }
			
 
				+    }
			
 
				+    if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
			
 
				+        rawSeqStore->posInSequence = 0;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+/* ZSTD_opt_getNextMatchAndUpdateSeqStore():
			
 
				+ * Calculates the beginning and end of the next match in the current block.
			
 
				+ * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
			
 
				+ */
			
 
				+static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
			
 
				+                                                   U32 blockBytesRemaining) {
			
 
				+    rawSeq currSeq;
			
 
				+    U32 currBlockEndPos;
			
 
				+    U32 literalsBytesRemaining;
			
 
				+    U32 matchBytesRemaining;
			
 
				+
			
 
				+    /* Setting match end position to MAX to ensure we never use an LDM during this block */
			
 
				+    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
			
 
				+        optLdm->startPosInBlock = UINT_MAX;
			
 
				+        optLdm->endPosInBlock = UINT_MAX;
			
 
				+        return;
			
 
				+    }
			
 
				+    /* Calculate appropriate bytes left in matchLength and litLength after adjusting
			
 
				+       based on ldmSeqStore->posInSequence */
			
 
				+    currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
			
 
				+    assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
			
 
				+    currBlockEndPos = currPosInBlock + blockBytesRemaining;
			
 
				+    literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
			
 
				+            currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
			
 
				+            0;
			
 
				+    matchBytesRemaining = (literalsBytesRemaining == 0) ?
			
 
				+            currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
			
 
				+            currSeq.matchLength;
			
 
				+
			
 
				+    /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
			
 
				+    if (literalsBytesRemaining >= blockBytesRemaining) {
			
 
				+        optLdm->startPosInBlock = UINT_MAX;
			
 
				+        optLdm->endPosInBlock = UINT_MAX;
			
 
				+        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
			
 
				+        return;
			
 
				+    }
			
 
				+
			
 
				+    /* Matches may be < MINMATCH by this process. In that case, we will reject them
			
 
				+       when we are deciding whether or not to add the ldm */
			
 
				+    optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
			
 
				+    optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
			
 
				+    optLdm->offset = currSeq.offset;
			
 
				+
			
 
				+    if (optLdm->endPosInBlock > currBlockEndPos) {
			
 
				+        /* Match ends after the block ends, we can't use the whole match */
			
 
				+        optLdm->endPosInBlock = currBlockEndPos;
			
 
				+        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
			
 
				+    } else {
			
 
				+        /* Consume nb of bytes equal to size of sequence left */
			
 
				+        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+/* ZSTD_optLdm_maybeAddMatch():
			
 
				+ * Adds a match if it's long enough, based on it's 'matchStartPosInBlock'
			
 
				+ * and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches'
			
 
				+ */
			
 
				+static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
			
 
				+                                      ZSTD_optLdm_t* optLdm, U32 currPosInBlock) {
			
 
				+    U32 posDiff = currPosInBlock - optLdm->startPosInBlock;
			
 
				+    /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
			
 
				+    U32 candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
			
 
				+    U32 candidateOffCode = optLdm->offset + ZSTD_REP_MOVE;
			
 
				+
			
 
				+    /* Ensure that current block position is not outside of the match */
			
 
				+    if (currPosInBlock < optLdm->startPosInBlock
			
 
				+      || currPosInBlock >= optLdm->endPosInBlock
			
 
				+      || candidateMatchLength < MINMATCH) {
			
 
				+        return;
			
 
				+    }
			
 
				+
			
 
				+    if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
			
 
				+        DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
			
 
				+                 candidateOffCode, candidateMatchLength, currPosInBlock);
			
 
				+        matches[*nbMatches].len = candidateMatchLength;
			
 
				+        matches[*nbMatches].off = candidateOffCode;
			
 
				+        (*nbMatches)++;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+/* ZSTD_optLdm_processMatchCandidate():
			
 
				+ * Wrapper function to update ldm seq store and call ldm functions as necessary.
			
 
				+ */
			
 
				+static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches,
			
 
				+                                              U32 currPosInBlock, U32 remainingBytes) {
			
 
				+    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
			
 
				+        return;
			
 
				+    }
			
 
				+
			
 
				+    if (currPosInBlock >= optLdm->endPosInBlock) {
			
 
				+        if (currPosInBlock > optLdm->endPosInBlock) {
			
 
				+            /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
			
 
				+             * at the end of a match from the ldm seq store, and will often be some bytes
			
 
				+             * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
			
 
				+             */
			
 
				+            U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
			
 
				+            ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
			
 
				+        } 
			
 
				+        ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
			
 
				+    }
			
 
				+    ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
			
 
				+}
			
 
				 
			
 
				 /*-*******************************
			
 
				 *  Optimal parser
			
@@ -817,6 +951,11 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
 
				     ZSTD_optimal_t* const opt = optStatePtr->priceTable;
			
 
				     ZSTD_match_t* const matches = optStatePtr->matchTable;
			
 
				     ZSTD_optimal_t lastSequence;
			
 
				+    ZSTD_optLdm_t optLdm;
			
 
				+
			
 
				+    optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
			
 
				+    optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
			
 
				+    ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
			
 
				 
			
 
				     /* init */
			
 
				     DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
			
@@ -832,7 +971,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
 
				         /* find first match */
			
 
				         {   U32 const litlen = (U32)(ip - anchor);
			
 
				             U32 const ll0 = !litlen;
			
 
				-            U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
			
 
				+            U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
			
 
				+            ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
			
 
				+                                              (U32)(ip-istart), (U32)(iend - ip));
			
 
				             if (!nbMatches) { ip++; continue; }
			
 
				 
			
 
				             /* initialize opt[0] */
			
@@ -925,9 +1066,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
 
				             if (opt[cur].mlen != 0) {
			
 
				                 U32 const prev = cur - opt[cur].mlen;
			
 
				                 repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
			
 
				-                memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
			
 
				+                ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
			
 
				             } else {
			
 
				-                memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
			
 
				+                ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
			
 
				             }
			
 
				 
			
 
				             /* last match must start at a minimum distance of 8 from oend */
			
@@ -945,8 +1086,12 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
 
				                 U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
			
 
				                 U32 const previousPrice = opt[cur].price;
			
 
				                 U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
			
 
				-                U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
			
 
				+                U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
			
 
				                 U32 matchNb;
			
 
				+
			
 
				+                ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
			
 
				+                                                  (U32)(inr-istart), (U32)(iend-inr));
			
 
				+
			
 
				                 if (!nbMatches) {
			
 
				                     DEBUGLOG(7, "rPos:%u : no match found", cur);
			
 
				                     continue;
			
@@ -1010,9 +1155,9 @@ _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
 
				          */
			
 
				         if (lastSequence.mlen != 0) {
			
 
				             repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
			
 
				-            memcpy(rep, &reps, sizeof(reps));
			
 
				+            ZSTD_memcpy(rep, &reps, sizeof(reps));
			
 
				         } else {
			
 
				-            memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
			
 
				+            ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
			
 
				         }
			
 
				 
			
 
				         {   U32 const storeEnd = cur + 1;
			
@@ -1110,7 +1255,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
 
				                const void* src, size_t srcSize)
			
 
				 {
			
 
				     U32 tmpRep[ZSTD_REP_NUM];  /* updated rep codes will sink here */
			
 
				-    memcpy(tmpRep, rep, sizeof(tmpRep));
			
 
				+    ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
			
 
				 
			
 
				     DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
			
 
				     assert(ms->opt.litLengthSum == 0);    /* first block */
			
@@ -1143,7 +1288,7 @@ size_t ZSTD_compressBlock_btultra2(
 
				         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
			
 
				         const void* src, size_t srcSize)
			
 
				 {
			
 
				-    U32 const current = (U32)((const BYTE*)src - ms->window.base);
			
 
				+    U32 const curr = (U32)((const BYTE*)src - ms->window.base);
			
 
				     DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
			
 
				 
			
 
				     /* 2-pass strategy:
			
@@ -1158,7 +1303,7 @@ size_t ZSTD_compressBlock_btultra2(
 
				     if ( (ms->opt.litLengthSum==0)   /* first block */
			
 
				       && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
			
 
				       && (ms->window.dictLimit == ms->window.lowLimit)   /* no dictionary */
			
 
				-      && (current == ms->window.dictLimit)   /* start of frame, nothing already loaded nor skipped */
			
 
				+      && (curr == ms->window.dictLimit)   /* start of frame, nothing already loaded nor skipped */
			
 
				       && (srcSize > ZSTD_PREDEF_THRESHOLD)
			
 
				       ) {
			
 
				         ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
			
--- a/Utilities/cmzstd/lib/compress/zstd_opt.h
+++ b/Utilities/cmzstd/lib/compress/zstd_opt.h
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
--- a/Utilities/cmzstd/lib/compress/zstdmt_compress.c
+++ b/Utilities/cmzstd/lib/compress/zstdmt_compress.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -20,8 +20,7 @@
 
				 
			
 
				 
			
 
				 /* ======   Dependencies   ====== */
			
 
				-#include <string.h>      /* memcpy, memset */
			
 
				-#include <limits.h>      /* INT_MAX, UINT_MAX */
			
 
				+#include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memset, INT_MAX, UINT_MAX */
			
 
				 #include "../common/mem.h"         /* MEM_STATIC */
			
 
				 #include "../common/pool.h"        /* threadpool */
			
 
				 #include "../common/threading.h"   /* mutex */
			
@@ -106,11 +105,11 @@ typedef struct ZSTDMT_bufferPool_s {
 
				 static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbWorkers, ZSTD_customMem cMem)
			
 
				 {
			
 
				     unsigned const maxNbBuffers = 2*nbWorkers + 3;
			
 
				-    ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_calloc(
			
 
				+    ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_customCalloc(
			
 
				         sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
			
 
				     if (bufPool==NULL) return NULL;
			
 
				     if (ZSTD_pthread_mutex_init(&bufPool->poolMutex, NULL)) {
			
 
				-        ZSTD_free(bufPool, cMem);
			
 
				+        ZSTD_customFree(bufPool, cMem);
			
 
				         return NULL;
			
 
				     }
			
 
				     bufPool->bufferSize = 64 KB;
			
@@ -127,10 +126,10 @@ static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
 
				     if (!bufPool) return;   /* compatibility with free on NULL */
			
 
				     for (u=0; u<bufPool->totalBuffers; u++) {
			
 
				         DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->bTable[u].start);
			
 
				-        ZSTD_free(bufPool->bTable[u].start, bufPool->cMem);
			
 
				+        ZSTD_customFree(bufPool->bTable[u].start, bufPool->cMem);
			
 
				     }
			
 
				     ZSTD_pthread_mutex_destroy(&bufPool->poolMutex);
			
 
				-    ZSTD_free(bufPool, bufPool->cMem);
			
 
				+    ZSTD_customFree(bufPool, bufPool->cMem);
			
 
				 }
			
 
				 
			
 
				 /* only works at initialization, not during compression */
			
@@ -201,13 +200,13 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
 
				         }
			
 
				         /* size conditions not respected : scratch this buffer, create new one */
			
 
				         DEBUGLOG(5, "ZSTDMT_getBuffer: existing buffer does not meet size conditions => freeing");
			
 
				-        ZSTD_free(buf.start, bufPool->cMem);
			
 
				+        ZSTD_customFree(buf.start, bufPool->cMem);
			
 
				     }
			
 
				     ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
			
 
				     /* create new buffer */
			
 
				     DEBUGLOG(5, "ZSTDMT_getBuffer: create a new buffer");
			
 
				     {   buffer_t buffer;
			
 
				-        void* const start = ZSTD_malloc(bSize, bufPool->cMem);
			
 
				+        void* const start = ZSTD_customMalloc(bSize, bufPool->cMem);
			
 
				         buffer.start = start;   /* note : start can be NULL if malloc fails ! */
			
 
				         buffer.capacity = (start==NULL) ? 0 : bSize;
			
 
				         if (start==NULL) {
			
@@ -229,13 +228,13 @@ static buffer_t ZSTDMT_resizeBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buffer)
 
				 {
			
 
				     size_t const bSize = bufPool->bufferSize;
			
 
				     if (buffer.capacity < bSize) {
			
 
				-        void* const start = ZSTD_malloc(bSize, bufPool->cMem);
			
 
				+        void* const start = ZSTD_customMalloc(bSize, bufPool->cMem);
			
 
				         buffer_t newBuffer;
			
 
				         newBuffer.start = start;
			
 
				         newBuffer.capacity = start == NULL ? 0 : bSize;
			
 
				         if (start != NULL) {
			
 
				             assert(newBuffer.capacity >= buffer.capacity);
			
 
				-            memcpy(newBuffer.start, buffer.start, buffer.capacity);
			
 
				+            ZSTD_memcpy(newBuffer.start, buffer.start, buffer.capacity);
			
 
				             DEBUGLOG(5, "ZSTDMT_resizeBuffer: created buffer of size %u", (U32)bSize);
			
 
				             return newBuffer;
			
 
				         }
			
@@ -261,14 +260,12 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
 
				     ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
			
 
				     /* Reached bufferPool capacity (should not happen) */
			
 
				     DEBUGLOG(5, "ZSTDMT_releaseBuffer: pool capacity reached => freeing ");
			
 
				-    ZSTD_free(buf.start, bufPool->cMem);
			
 
				+    ZSTD_customFree(buf.start, bufPool->cMem);
			
 
				 }
			
 
				 
			
 
				 
			
 
				 /* =====   Seq Pool Wrapper   ====== */
			
 
				 
			
 
				-static rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0};
			
 
				-
			
 
				 typedef ZSTDMT_bufferPool ZSTDMT_seqPool;
			
 
				 
			
 
				 static size_t ZSTDMT_sizeof_seqPool(ZSTDMT_seqPool* seqPool)
			
@@ -278,7 +275,7 @@ static size_t ZSTDMT_sizeof_seqPool(ZSTDMT_seqPool* seqPool)
 
				 
			
 
				 static rawSeqStore_t bufferToSeq(buffer_t buffer)
			
 
				 {
			
 
				-    rawSeqStore_t seq = {NULL, 0, 0, 0};
			
 
				+    rawSeqStore_t seq = kNullRawSeqStore;
			
 
				     seq.seq = (rawSeq*)buffer.start;
			
 
				     seq.capacity = buffer.capacity / sizeof(rawSeq);
			
 
				     return seq;
			
@@ -354,7 +351,7 @@ static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
 
				     for (cid=0; cid<pool->totalCCtx; cid++)
			
 
				         ZSTD_freeCCtx(pool->cctx[cid]);  /* note : compatible with free on NULL */
			
 
				     ZSTD_pthread_mutex_destroy(&pool->poolMutex);
			
 
				-    ZSTD_free(pool, pool->cMem);
			
 
				+    ZSTD_customFree(pool, pool->cMem);
			
 
				 }
			
 
				 
			
 
				 /* ZSTDMT_createCCtxPool() :
			
@@ -362,12 +359,12 @@ static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
 
				 static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers,
			
 
				                                               ZSTD_customMem cMem)
			
 
				 {
			
 
				-    ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc(
			
 
				+    ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_customCalloc(
			
 
				         sizeof(ZSTDMT_CCtxPool) + (nbWorkers-1)*sizeof(ZSTD_CCtx*), cMem);
			
 
				     assert(nbWorkers > 0);
			
 
				     if (!cctxPool) return NULL;
			
 
				     if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) {
			
 
				-        ZSTD_free(cctxPool, cMem);
			
 
				+        ZSTD_customFree(cctxPool, cMem);
			
 
				         return NULL;
			
 
				     }
			
 
				     cctxPool->cMem = cMem;
			
@@ -475,10 +472,8 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
 
				         ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
			
 
				         assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
			
 
				         assert(params.ldmParams.hashRateLog < 32);
			
 
				-        serialState->ldmState.hashPower =
			
 
				-                ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength);
			
 
				     } else {
			
 
				-        memset(&params.ldmParams, 0, sizeof(params.ldmParams));
			
 
				+        ZSTD_memset(&params.ldmParams, 0, sizeof(params.ldmParams));
			
 
				     }
			
 
				     serialState->nextJobID = 0;
			
 
				     if (params.fParams.checksumFlag)
			
@@ -489,35 +484,35 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
 
				         size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t);
			
 
				         unsigned const bucketLog =
			
 
				             params.ldmParams.hashLog - params.ldmParams.bucketSizeLog;
			
 
				-        size_t const bucketSize = (size_t)1 << bucketLog;
			
 
				         unsigned const prevBucketLog =
			
 
				             serialState->params.ldmParams.hashLog -
			
 
				             serialState->params.ldmParams.bucketSizeLog;
			
 
				+        size_t const numBuckets = (size_t)1 << bucketLog;
			
 
				         /* Size the seq pool tables */
			
 
				         ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
			
 
				         /* Reset the window */
			
 
				         ZSTD_window_init(&serialState->ldmState.window);
			
 
				         /* Resize tables and output space if necessary. */
			
 
				         if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) {
			
 
				-            ZSTD_free(serialState->ldmState.hashTable, cMem);
			
 
				-            serialState->ldmState.hashTable = (ldmEntry_t*)ZSTD_malloc(hashSize, cMem);
			
 
				+            ZSTD_customFree(serialState->ldmState.hashTable, cMem);
			
 
				+            serialState->ldmState.hashTable = (ldmEntry_t*)ZSTD_customMalloc(hashSize, cMem);
			
 
				         }
			
 
				         if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) {
			
 
				-            ZSTD_free(serialState->ldmState.bucketOffsets, cMem);
			
 
				-            serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_malloc(bucketSize, cMem);
			
 
				+            ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem);
			
 
				+            serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(numBuckets, cMem);
			
 
				         }
			
 
				         if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets)
			
 
				             return 1;
			
 
				         /* Zero the tables */
			
 
				-        memset(serialState->ldmState.hashTable, 0, hashSize);
			
 
				-        memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
			
 
				+        ZSTD_memset(serialState->ldmState.hashTable, 0, hashSize);
			
 
				+        ZSTD_memset(serialState->ldmState.bucketOffsets, 0, numBuckets);
			
 
				 
			
 
				         /* Update window state and fill hash table with dict */
			
 
				         serialState->ldmState.loadedDictEnd = 0;
			
 
				         if (dictSize > 0) {
			
 
				             if (dictContentType == ZSTD_dct_rawContent) {
			
 
				                 BYTE const* const dictEnd = (const BYTE*)dict + dictSize;
			
 
				-                ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
			
 
				+                ZSTD_window_update(&serialState->ldmState.window, dict, dictSize, /* forceNonContiguous */ 0);
			
 
				                 ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, &params.ldmParams);
			
 
				                 serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base);
			
 
				             } else {
			
@@ -537,7 +532,7 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
 
				 static int ZSTDMT_serialState_init(serialState_t* serialState)
			
 
				 {
			
 
				     int initError = 0;
			
 
				-    memset(serialState, 0, sizeof(*serialState));
			
 
				+    ZSTD_memset(serialState, 0, sizeof(*serialState));
			
 
				     initError |= ZSTD_pthread_mutex_init(&serialState->mutex, NULL);
			
 
				     initError |= ZSTD_pthread_cond_init(&serialState->cond, NULL);
			
 
				     initError |= ZSTD_pthread_mutex_init(&serialState->ldmWindowMutex, NULL);
			
@@ -552,8 +547,8 @@ static void ZSTDMT_serialState_free(serialState_t* serialState)
 
				     ZSTD_pthread_cond_destroy(&serialState->cond);
			
 
				     ZSTD_pthread_mutex_destroy(&serialState->ldmWindowMutex);
			
 
				     ZSTD_pthread_cond_destroy(&serialState->ldmWindowCond);
			
 
				-    ZSTD_free(serialState->ldmState.hashTable, cMem);
			
 
				-    ZSTD_free(serialState->ldmState.bucketOffsets, cMem);
			
 
				+    ZSTD_customFree(serialState->ldmState.hashTable, cMem);
			
 
				+    ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem);
			
 
				 }
			
 
				 
			
 
				 static void ZSTDMT_serialState_update(serialState_t* serialState,
			
@@ -574,7 +569,7 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
 
				             assert(seqStore.seq != NULL && seqStore.pos == 0 &&
			
 
				                    seqStore.size == 0 && seqStore.capacity > 0);
			
 
				             assert(src.size <= serialState->params.jobSize);
			
 
				-            ZSTD_window_update(&serialState->ldmState.window, src.start, src.size);
			
 
				+            ZSTD_window_update(&serialState->ldmState.window, src.start, src.size, /* forceNonContiguous */ 0);
			
 
				             error = ZSTD_ldm_generateSequences(
			
 
				                 &serialState->ldmState, &seqStore,
			
 
				                 &serialState->params.ldmParams, src.start, src.size);
			
@@ -686,6 +681,8 @@ static void ZSTDMT_compressionJob(void* jobDescription)
 
				     if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;
			
 
				     /* Don't run LDM for the chunks, since we handle it externally */
			
 
				     jobParams.ldmParams.enableLdm = 0;
			
 
				+    /* Correct nbWorkers to 0. */
			
 
				+    jobParams.nbWorkers = 0;
			
 
				 
			
 
				 
			
 
				     /* init */
			
@@ -698,6 +695,10 @@ static void ZSTDMT_compressionJob(void* jobDescription)
 
				         {   size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
			
 
				             if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
			
 
				         }
			
 
				+        if (!job->firstJob) {
			
 
				+            size_t const err = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_deterministicRefPrefix, 0);
			
 
				+            if (ZSTD_isError(err)) JOB_ERROR(err);
			
 
				+        }
			
 
				         {   size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
			
 
				                                         job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
			
 
				                                         ZSTD_dtlm_fast,
			
@@ -753,6 +754,13 @@ static void ZSTDMT_compressionJob(void* jobDescription)
 
				             if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
			
 
				             lastCBlockSize = cSize;
			
 
				     }   }
			
 
				+    if (!job->firstJob) {
			
 
				+        /* Double check that we don't have an ext-dict, because then our
			
 
				+         * repcode invalidation doesn't work.
			
 
				+         */
			
 
				+        assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
			
 
				+    }
			
 
				+    ZSTD_CCtx_trace(cctx, 0);
			
 
				 
			
 
				 _endJob:
			
 
				     ZSTDMT_serialState_ensureFinished(job->serial, job->jobID, job->cSize);
			
@@ -820,7 +828,6 @@ struct ZSTDMT_CCtx_s {
 
				     roundBuff_t roundBuff;
			
 
				     serialState_t serial;
			
 
				     rsyncState_t rsync;
			
 
				-    unsigned singleBlockingThread;
			
 
				     unsigned jobIDMask;
			
 
				     unsigned doneJobID;
			
 
				     unsigned nextJobID;
			
@@ -832,6 +839,7 @@ struct ZSTDMT_CCtx_s {
 
				     ZSTD_customMem cMem;
			
 
				     ZSTD_CDict* cdictLocal;
			
 
				     const ZSTD_CDict* cdict;
			
 
				+    unsigned providedFactory: 1;
			
 
				 };
			
 
				 
			
 
				 static void ZSTDMT_freeJobsTable(ZSTDMT_jobDescription* jobTable, U32 nbJobs, ZSTD_customMem cMem)
			
@@ -842,7 +850,7 @@ static void ZSTDMT_freeJobsTable(ZSTDMT_jobDescription* jobTable, U32 nbJobs, ZS
 
				         ZSTD_pthread_mutex_destroy(&jobTable[jobNb].job_mutex);
			
 
				         ZSTD_pthread_cond_destroy(&jobTable[jobNb].job_cond);
			
 
				     }
			
 
				-    ZSTD_free(jobTable, cMem);
			
 
				+    ZSTD_customFree(jobTable, cMem);
			
 
				 }
			
 
				 
			
 
				 /* ZSTDMT_allocJobsTable()
			
@@ -854,7 +862,7 @@ static ZSTDMT_jobDescription* ZSTDMT_createJobsTable(U32* nbJobsPtr, ZSTD_custom
 
				     U32 const nbJobs = 1 << nbJobsLog2;
			
 
				     U32 jobNb;
			
 
				     ZSTDMT_jobDescription* const jobTable = (ZSTDMT_jobDescription*)
			
 
				-                ZSTD_calloc(nbJobs * sizeof(ZSTDMT_jobDescription), cMem);
			
 
				+                ZSTD_customCalloc(nbJobs * sizeof(ZSTDMT_jobDescription), cMem);
			
 
				     int initError = 0;
			
 
				     if (jobTable==NULL) return NULL;
			
 
				     *nbJobsPtr = nbJobs;
			
@@ -885,12 +893,12 @@ static size_t ZSTDMT_expandJobsTable (ZSTDMT_CCtx* mtctx, U32 nbWorkers) {
 
				 
			
 
				 /* ZSTDMT_CCtxParam_setNbWorkers():
			
 
				  * Internal use only */
			
 
				-size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers)
			
 
				+static size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers)
			
 
				 {
			
 
				     return ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, (int)nbWorkers);
			
 
				 }
			
 
				 
			
 
				-MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers, ZSTD_customMem cMem)
			
 
				+MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers, ZSTD_customMem cMem, ZSTD_threadPool* pool)
			
 
				 {
			
 
				     ZSTDMT_CCtx* mtctx;
			
 
				     U32 nbJobs = nbWorkers + 2;
			
@@ -903,12 +911,19 @@ MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers,
 
				         /* invalid custom allocator */
			
 
				         return NULL;
			
 
				 
			
 
				-    mtctx = (ZSTDMT_CCtx*) ZSTD_calloc(sizeof(ZSTDMT_CCtx), cMem);
			
 
				+    mtctx = (ZSTDMT_CCtx*) ZSTD_customCalloc(sizeof(ZSTDMT_CCtx), cMem);
			
 
				     if (!mtctx) return NULL;
			
 
				     ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers);
			
 
				     mtctx->cMem = cMem;
			
 
				     mtctx->allJobsCompleted = 1;
			
 
				-    mtctx->factory = POOL_create_advanced(nbWorkers, 0, cMem);
			
 
				+    if (pool != NULL) {
			
 
				+      mtctx->factory = pool;
			
 
				+      mtctx->providedFactory = 1;
			
 
				+    }
			
 
				+    else {
			
 
				+      mtctx->factory = POOL_create_advanced(nbWorkers, 0, cMem);
			
 
				+      mtctx->providedFactory = 0;
			
 
				+    }
			
 
				     mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, cMem);
			
 
				     assert(nbJobs > 0); assert((nbJobs & (nbJobs - 1)) == 0);  /* ensure nbJobs is a power of 2 */
			
 
				     mtctx->jobIDMask = nbJobs - 1;
			
@@ -925,22 +940,18 @@ MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers,
 
				     return mtctx;
			
 
				 }
			
 
				 
			
 
				-ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem)
			
 
				+ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem, ZSTD_threadPool* pool)
			
 
				 {
			
 
				 #ifdef ZSTD_MULTITHREAD
			
 
				-    return ZSTDMT_createCCtx_advanced_internal(nbWorkers, cMem);
			
 
				+    return ZSTDMT_createCCtx_advanced_internal(nbWorkers, cMem, pool);
			
 
				 #else
			
 
				     (void)nbWorkers;
			
 
				     (void)cMem;
			
 
				+    (void)pool;
			
 
				     return NULL;
			
 
				 #endif
			
 
				 }
			
 
				 
			
 
				-ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers)
			
 
				-{
			
 
				-    return ZSTDMT_createCCtx_advanced(nbWorkers, ZSTD_defaultCMem);
			
 
				-}
			
 
				-
			
 
				 
			
 
				 /* ZSTDMT_releaseAllJobResources() :
			
 
				  * note : ensure all workers are killed first ! */
			
@@ -957,7 +968,7 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
 
				         ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
			
 
				 
			
 
				         /* Clear the job description, but keep the mutex/cond */
			
 
				-        memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID]));
			
 
				+        ZSTD_memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID]));
			
 
				         mtctx->jobs[jobID].job_mutex = mutex;
			
 
				         mtctx->jobs[jobID].job_cond = cond;
			
 
				     }
			
@@ -984,7 +995,8 @@ static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* mtctx)
 
				 size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
			
 
				 {
			
 
				     if (mtctx==NULL) return 0;   /* compatible with free on NULL */
			
 
				-    POOL_free(mtctx->factory);   /* stop and free worker threads */
			
 
				+    if (!mtctx->providedFactory)
			
 
				+        POOL_free(mtctx->factory);   /* stop and free worker threads */
			
 
				     ZSTDMT_releaseAllJobResources(mtctx);  /* release job resources into pools first */
			
 
				     ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
			
 
				     ZSTDMT_freeBufferPool(mtctx->bufPool);
			
@@ -993,8 +1005,8 @@ size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
 
				     ZSTDMT_serialState_free(&mtctx->serial);
			
 
				     ZSTD_freeCDict(mtctx->cdictLocal);
			
 
				     if (mtctx->roundBuff.buffer)
			
 
				-        ZSTD_free(mtctx->roundBuff.buffer, mtctx->cMem);
			
 
				-    ZSTD_free(mtctx, mtctx->cMem);
			
 
				+        ZSTD_customFree(mtctx->roundBuff.buffer, mtctx->cMem);
			
 
				+    ZSTD_customFree(mtctx, mtctx->cMem);
			
 
				     return 0;
			
 
				 }
			
 
				 
			
@@ -1011,65 +1023,6 @@ size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx)
 
				             + mtctx->roundBuff.capacity;
			
 
				 }
			
 
				 
			
 
				-/* Internal only */
			
 
				-size_t
			
 
				-ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params,
			
 
				-                                   ZSTDMT_parameter parameter,
			
 
				-                                   int value)
			
 
				-{
			
 
				-    DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter");
			
 
				-    switch(parameter)
			
 
				-    {
			
 
				-    case ZSTDMT_p_jobSize :
			
 
				-        DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %i", value);
			
 
				-        return ZSTD_CCtxParams_setParameter(params, ZSTD_c_jobSize, value);
			
 
				-    case ZSTDMT_p_overlapLog :
			
 
				-        DEBUGLOG(4, "ZSTDMT_p_overlapLog : %i", value);
			
 
				-        return ZSTD_CCtxParams_setParameter(params, ZSTD_c_overlapLog, value);
			
 
				-    case ZSTDMT_p_rsyncable :
			
 
				-        DEBUGLOG(4, "ZSTD_p_rsyncable : %i", value);
			
 
				-        return ZSTD_CCtxParams_setParameter(params, ZSTD_c_rsyncable, value);
			
 
				-    default :
			
 
				-        return ERROR(parameter_unsupported);
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value)
			
 
				-{
			
 
				-    DEBUGLOG(4, "ZSTDMT_setMTCtxParameter");
			
 
				-    return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
			
 
				-}
			
 
				-
			
 
				-size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value)
			
 
				-{
			
 
				-    switch (parameter) {
			
 
				-    case ZSTDMT_p_jobSize:
			
 
				-        return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_jobSize, value);
			
 
				-    case ZSTDMT_p_overlapLog:
			
 
				-        return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_overlapLog, value);
			
 
				-    case ZSTDMT_p_rsyncable:
			
 
				-        return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_rsyncable, value);
			
 
				-    default:
			
 
				-        return ERROR(parameter_unsupported);
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-/* Sets parameters relevant to the compression job,
			
 
				- * initializing others to default values. */
			
 
				-static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(const ZSTD_CCtx_params* params)
			
 
				-{
			
 
				-    ZSTD_CCtx_params jobParams = *params;
			
 
				-    /* Clear parameters related to multithreading */
			
 
				-    jobParams.forceWindow = 0;
			
 
				-    jobParams.nbWorkers = 0;
			
 
				-    jobParams.jobSize = 0;
			
 
				-    jobParams.overlapLog = 0;
			
 
				-    jobParams.rsyncable = 0;
			
 
				-    memset(&jobParams.ldmParams, 0, sizeof(ldmParams_t));
			
 
				-    memset(&jobParams.customMem, 0, sizeof(ZSTD_customMem));
			
 
				-    return jobParams;
			
 
				-}
			
 
				-
			
 
				 
			
 
				 /* ZSTDMT_resize() :
			
 
				  * @return : error code if fails, 0 on success */
			
@@ -1098,7 +1051,7 @@ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_p
 
				     DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)",
			
 
				                 compressionLevel);
			
 
				     mtctx->params.compressionLevel = compressionLevel;
			
 
				-    {   ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0);
			
 
				+    {   ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
			
 
				         cParams.windowLog = saved_wlog;
			
 
				         mtctx->params.cParams = cParams;
			
 
				     }
			
@@ -1185,8 +1138,8 @@ static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
 
				     if (params->ldmParams.enableLdm) {
			
 
				         /* In Long Range Mode, the windowLog is typically oversized.
			
 
				          * In which case, it's preferable to determine the jobSize
			
 
				-         * based on chainLog instead. */
			
 
				-        jobLog = MAX(21, params->cParams.chainLog + 4);
			
 
				+         * based on cycleLog instead. */
			
 
				+        jobLog = MAX(21, ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy) + 3);
			
 
				     } else {
			
 
				         jobLog = MAX(20, params->cParams.windowLog + 2);
			
 
				     }
			
@@ -1240,174 +1193,6 @@ static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
 
				     return (ovLog==0) ? 0 : (size_t)1 << ovLog;
			
 
				 }
			
 
				 
			
 
				-static unsigned
			
 
				-ZSTDMT_computeNbJobs(const ZSTD_CCtx_params* params, size_t srcSize, unsigned nbWorkers)
			
 
				-{
			
 
				-    assert(nbWorkers>0);
			
 
				-    {   size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
			
 
				-        size_t const jobMaxSize = jobSizeTarget << 2;
			
 
				-        size_t const passSizeMax = jobMaxSize * nbWorkers;
			
 
				-        unsigned const multiplier = (unsigned)(srcSize / passSizeMax) + 1;
			
 
				-        unsigned const nbJobsLarge = multiplier * nbWorkers;
			
 
				-        unsigned const nbJobsMax = (unsigned)(srcSize / jobSizeTarget) + 1;
			
 
				-        unsigned const nbJobsSmall = MIN(nbJobsMax, nbWorkers);
			
 
				-        return (multiplier>1) ? nbJobsLarge : nbJobsSmall;
			
 
				-}   }
			
 
				-
			
 
				-/* ZSTDMT_compress_advanced_internal() :
			
 
				- * This is a blocking function : it will only give back control to caller after finishing its compression job.
			
 
				- */
			
 
				-static size_t
			
 
				-ZSTDMT_compress_advanced_internal(
			
 
				-                ZSTDMT_CCtx* mtctx,
			
 
				-                void* dst, size_t dstCapacity,
			
 
				-          const void* src, size_t srcSize,
			
 
				-          const ZSTD_CDict* cdict,
			
 
				-                ZSTD_CCtx_params params)
			
 
				-{
			
 
				-    ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(&params);
			
 
				-    size_t const overlapSize = ZSTDMT_computeOverlapSize(&params);
			
 
				-    unsigned const nbJobs = ZSTDMT_computeNbJobs(&params, srcSize, params.nbWorkers);
			
 
				-    size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
			
 
				-    size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize;   /* avoid too small last block */
			
 
				-    const char* const srcStart = (const char*)src;
			
 
				-    size_t remainingSrcSize = srcSize;
			
 
				-    unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbJobs : (unsigned)(dstCapacity / ZSTD_compressBound(avgJobSize));  /* presumes avgJobSize >= 256 KB, which should be the case */
			
 
				-    size_t frameStartPos = 0, dstBufferPos = 0;
			
 
				-    assert(jobParams.nbWorkers == 0);
			
 
				-    assert(mtctx->cctxPool->totalCCtx == params.nbWorkers);
			
 
				-
			
 
				-    params.jobSize = (U32)avgJobSize;
			
 
				-    DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: nbJobs=%2u (rawSize=%u bytes; fixedSize=%u) ",
			
 
				-                nbJobs, (U32)proposedJobSize, (U32)avgJobSize);
			
 
				-
			
 
				-    if ((nbJobs==1) | (params.nbWorkers<=1)) {   /* fallback to single-thread mode : this is a blocking invocation anyway */
			
 
				-        ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
			
 
				-        DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode");
			
 
				-        if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams);
			
 
				-        return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, &jobParams);
			
 
				-    }
			
 
				-
			
 
				-    assert(avgJobSize >= 256 KB);  /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
			
 
				-    ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) );
			
 
				-    /* LDM doesn't even try to load the dictionary in single-ingestion mode */
			
 
				-    if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize, NULL, 0, ZSTD_dct_auto))
			
 
				-        return ERROR(memory_allocation);
			
 
				-
			
 
				-    FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) , "");  /* only expands if necessary */
			
 
				-
			
 
				-    {   unsigned u;
			
 
				-        for (u=0; u<nbJobs; u++) {
			
 
				-            size_t const jobSize = MIN(remainingSrcSize, avgJobSize);
			
 
				-            size_t const dstBufferCapacity = ZSTD_compressBound(jobSize);
			
 
				-            buffer_t const dstAsBuffer = { (char*)dst + dstBufferPos, dstBufferCapacity };
			
 
				-            buffer_t const dstBuffer = u < compressWithinDst ? dstAsBuffer : g_nullBuffer;
			
 
				-            size_t dictSize = u ? overlapSize : 0;
			
 
				-
			
 
				-            mtctx->jobs[u].prefix.start = srcStart + frameStartPos - dictSize;
			
 
				-            mtctx->jobs[u].prefix.size = dictSize;
			
 
				-            mtctx->jobs[u].src.start = srcStart + frameStartPos;
			
 
				-            mtctx->jobs[u].src.size = jobSize; assert(jobSize > 0);  /* avoid job.src.size == 0 */
			
 
				-            mtctx->jobs[u].consumed = 0;
			
 
				-            mtctx->jobs[u].cSize = 0;
			
 
				-            mtctx->jobs[u].cdict = (u==0) ? cdict : NULL;
			
 
				-            mtctx->jobs[u].fullFrameSize = srcSize;
			
 
				-            mtctx->jobs[u].params = jobParams;
			
 
				-            /* do not calculate checksum within sections, but write it in header for first section */
			
 
				-            mtctx->jobs[u].dstBuff = dstBuffer;
			
 
				-            mtctx->jobs[u].cctxPool = mtctx->cctxPool;
			
 
				-            mtctx->jobs[u].bufPool = mtctx->bufPool;
			
 
				-            mtctx->jobs[u].seqPool = mtctx->seqPool;
			
 
				-            mtctx->jobs[u].serial = &mtctx->serial;
			
 
				-            mtctx->jobs[u].jobID = u;
			
 
				-            mtctx->jobs[u].firstJob = (u==0);
			
 
				-            mtctx->jobs[u].lastJob = (u==nbJobs-1);
			
 
				-
			
 
				-            DEBUGLOG(5, "ZSTDMT_compress_advanced_internal: posting job %u  (%u bytes)", u, (U32)jobSize);
			
 
				-            DEBUG_PRINTHEX(6, mtctx->jobs[u].prefix.start, 12);
			
 
				-            POOL_add(mtctx->factory, ZSTDMT_compressionJob, &mtctx->jobs[u]);
			
 
				-
			
 
				-            frameStartPos += jobSize;
			
 
				-            dstBufferPos += dstBufferCapacity;
			
 
				-            remainingSrcSize -= jobSize;
			
 
				-    }   }
			
 
				-
			
 
				-    /* collect result */
			
 
				-    {   size_t error = 0, dstPos = 0;
			
 
				-        unsigned jobID;
			
 
				-        for (jobID=0; jobID<nbJobs; jobID++) {
			
 
				-            DEBUGLOG(5, "waiting for job %u ", jobID);
			
 
				-            ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[jobID].job_mutex);
			
 
				-            while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].src.size) {
			
 
				-                DEBUGLOG(5, "waiting for jobCompleted signal from job %u", jobID);
			
 
				-                ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex);
			
 
				-            }
			
 
				-            ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex);
			
 
				-            DEBUGLOG(5, "ready to write job %u ", jobID);
			
 
				-
			
 
				-            {   size_t const cSize = mtctx->jobs[jobID].cSize;
			
 
				-                if (ZSTD_isError(cSize)) error = cSize;
			
 
				-                if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall);
			
 
				-                if (jobID) {   /* note : job 0 is written directly at dst, which is correct position */
			
 
				-                    if (!error)
			
 
				-                        memmove((char*)dst + dstPos, mtctx->jobs[jobID].dstBuff.start, cSize);  /* may overlap when job compressed within dst */
			
 
				-                    if (jobID >= compressWithinDst) {  /* job compressed into its own buffer, which must be released */
			
 
				-                        DEBUGLOG(5, "releasing buffer %u>=%u", jobID, compressWithinDst);
			
 
				-                        ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
			
 
				-                }   }
			
 
				-                mtctx->jobs[jobID].dstBuff = g_nullBuffer;
			
 
				-                mtctx->jobs[jobID].cSize = 0;
			
 
				-                dstPos += cSize ;
			
 
				-            }
			
 
				-        }  /* for (jobID=0; jobID<nbJobs; jobID++) */
			
 
				-
			
 
				-        DEBUGLOG(4, "checksumFlag : %u ", params.fParams.checksumFlag);
			
 
				-        if (params.fParams.checksumFlag) {
			
 
				-            U32 const checksum = (U32)XXH64_digest(&mtctx->serial.xxhState);
			
 
				-            if (dstPos + 4 > dstCapacity) {
			
 
				-                error = ERROR(dstSize_tooSmall);
			
 
				-            } else {
			
 
				-                DEBUGLOG(4, "writing checksum : %08X \n", checksum);
			
 
				-                MEM_writeLE32((char*)dst + dstPos, checksum);
			
 
				-                dstPos += 4;
			
 
				-        }   }
			
 
				-
			
 
				-        if (!error) DEBUGLOG(4, "compressed size : %u  ", (U32)dstPos);
			
 
				-        return error ? error : dstPos;
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
			
 
				-                                void* dst, size_t dstCapacity,
			
 
				-                          const void* src, size_t srcSize,
			
 
				-                          const ZSTD_CDict* cdict,
			
 
				-                                ZSTD_parameters params,
			
 
				-                                int overlapLog)
			
 
				-{
			
 
				-    ZSTD_CCtx_params cctxParams = mtctx->params;
			
 
				-    cctxParams.cParams = params.cParams;
			
 
				-    cctxParams.fParams = params.fParams;
			
 
				-    assert(ZSTD_OVERLAPLOG_MIN <= overlapLog && overlapLog <= ZSTD_OVERLAPLOG_MAX);
			
 
				-    cctxParams.overlapLog = overlapLog;
			
 
				-    return ZSTDMT_compress_advanced_internal(mtctx,
			
 
				-                                             dst, dstCapacity,
			
 
				-                                             src, srcSize,
			
 
				-                                             cdict, cctxParams);
			
 
				-}
			
 
				-
			
 
				-
			
 
				-size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
			
 
				-                           void* dst, size_t dstCapacity,
			
 
				-                     const void* src, size_t srcSize,
			
 
				-                           int compressionLevel)
			
 
				-{
			
 
				-    ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
			
 
				-    int const overlapLog = ZSTDMT_overlapLog_default(params.cParams.strategy);
			
 
				-    params.fParams.contentSizeFlag = 1;
			
 
				-    return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapLog);
			
 
				-}
			
 
				-
			
 
				-
			
 
				 /* ====================================== */
			
 
				 /* =======      Streaming API     ======= */
			
 
				 /* ====================================== */
			
@@ -1432,16 +1217,6 @@ size_t ZSTDMT_initCStream_internal(
 
				     if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
			
 
				     if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX;
			
 
				 
			
 
				-    mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN);  /* do not trigger multi-threading when srcSize is too small */
			
 
				-    if (mtctx->singleBlockingThread) {
			
 
				-        ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(&params);
			
 
				-        DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode");
			
 
				-        assert(singleThreadParams.nbWorkers == 0);
			
 
				-        return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0],
			
 
				-                                         dict, dictSize, cdict,
			
 
				-                                         &singleThreadParams, pledgedSrcSize);
			
 
				-    }
			
 
				-
			
 
				     DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers);
			
 
				 
			
 
				     if (mtctx->allJobsCompleted == 0) {   /* previous compression not correctly finished */
			
@@ -1475,9 +1250,8 @@ size_t ZSTDMT_initCStream_internal(
 
				 
			
 
				     if (params.rsyncable) {
			
 
				         /* Aim for the targetsectionSize as the average job size. */
			
 
				-        U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
			
 
				-        U32 const rsyncBits = ZSTD_highbit32(jobSizeMB) + 20;
			
 
				-        assert(jobSizeMB >= 1);
			
 
				+        U32 const jobSizeKB = (U32)(mtctx->targetSectionSize >> 10);
			
 
				+        U32 const rsyncBits = (assert(jobSizeKB >= 1), ZSTD_highbit32(jobSizeKB) + 10);
			
 
				         DEBUGLOG(4, "rsyncLog = %u", rsyncBits);
			
 
				         mtctx->rsync.hash = 0;
			
 
				         mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;
			
@@ -1504,8 +1278,8 @@ size_t ZSTDMT_initCStream_internal(
 
				         size_t const capacity = MAX(windowSize, sectionsSize) + slackSize;
			
 
				         if (mtctx->roundBuff.capacity < capacity) {
			
 
				             if (mtctx->roundBuff.buffer)
			
 
				-                ZSTD_free(mtctx->roundBuff.buffer, mtctx->cMem);
			
 
				-            mtctx->roundBuff.buffer = (BYTE*)ZSTD_malloc(capacity, mtctx->cMem);
			
 
				+                ZSTD_customFree(mtctx->roundBuff.buffer, mtctx->cMem);
			
 
				+            mtctx->roundBuff.buffer = (BYTE*)ZSTD_customMalloc(capacity, mtctx->cMem);
			
 
				             if (mtctx->roundBuff.buffer == NULL) {
			
 
				                 mtctx->roundBuff.capacity = 0;
			
 
				                 return ERROR(memory_allocation);
			
@@ -1530,53 +1304,6 @@ size_t ZSTDMT_initCStream_internal(
 
				     return 0;
			
 
				 }
			
 
				 
			
 
				-size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
			
 
				-                             const void* dict, size_t dictSize,
			
 
				-                                   ZSTD_parameters params,
			
 
				-                                   unsigned long long pledgedSrcSize)
			
 
				-{
			
 
				-    ZSTD_CCtx_params cctxParams = mtctx->params;  /* retrieve sticky params */
			
 
				-    DEBUGLOG(4, "ZSTDMT_initCStream_advanced (pledgedSrcSize=%u)", (U32)pledgedSrcSize);
			
 
				-    cctxParams.cParams = params.cParams;
			
 
				-    cctxParams.fParams = params.fParams;
			
 
				-    return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, ZSTD_dct_auto, NULL,
			
 
				-                                       cctxParams, pledgedSrcSize);
			
 
				-}
			
 
				-
			
 
				-size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
			
 
				-                               const ZSTD_CDict* cdict,
			
 
				-                                     ZSTD_frameParameters fParams,
			
 
				-                                     unsigned long long pledgedSrcSize)
			
 
				-{
			
 
				-    ZSTD_CCtx_params cctxParams = mtctx->params;
			
 
				-    if (cdict==NULL) return ERROR(dictionary_wrong);   /* method incompatible with NULL cdict */
			
 
				-    cctxParams.cParams = ZSTD_getCParamsFromCDict(cdict);
			
 
				-    cctxParams.fParams = fParams;
			
 
				-    return ZSTDMT_initCStream_internal(mtctx, NULL, 0 /*dictSize*/, ZSTD_dct_auto, cdict,
			
 
				-                                       cctxParams, pledgedSrcSize);
			
 
				-}
			
 
				-
			
 
				-
			
 
				-/* ZSTDMT_resetCStream() :
			
 
				- * pledgedSrcSize can be zero == unknown (for the time being)
			
 
				- * prefer using ZSTD_CONTENTSIZE_UNKNOWN,
			
 
				- * as `0` might mean "empty" in the future */
			
 
				-size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize)
			
 
				-{
			
 
				-    if (!pledgedSrcSize) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
			
 
				-    return ZSTDMT_initCStream_internal(mtctx, NULL, 0, ZSTD_dct_auto, 0, mtctx->params,
			
 
				-                                       pledgedSrcSize);
			
 
				-}
			
 
				-
			
 
				-size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel) {
			
 
				-    ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0);
			
 
				-    ZSTD_CCtx_params cctxParams = mtctx->params;   /* retrieve sticky params */
			
 
				-    DEBUGLOG(4, "ZSTDMT_initCStream (cLevel=%i)", compressionLevel);
			
 
				-    cctxParams.cParams = params.cParams;
			
 
				-    cctxParams.fParams = params.fParams;
			
 
				-    return ZSTDMT_initCStream_internal(mtctx, NULL, 0, ZSTD_dct_auto, NULL, cctxParams, ZSTD_CONTENTSIZE_UNKNOWN);
			
 
				-}
			
 
				-
			
 
				 
			
 
				 /* ZSTDMT_writeLastEmptyBlock()
			
 
				  * Write a single empty block with an end-of-frame to finish a frame.
			
@@ -1740,7 +1467,7 @@ static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, u
 
				             assert(cSize >= mtctx->jobs[wJobID].dstFlushed);
			
 
				             assert(mtctx->jobs[wJobID].dstBuff.start != NULL);
			
 
				             if (toFlush > 0) {
			
 
				-                memcpy((char*)output->dst + output->pos,
			
 
				+                ZSTD_memcpy((char*)output->dst + output->pos,
			
 
				                     (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed,
			
 
				                     toFlush);
			
 
				             }
			
@@ -1894,7 +1621,7 @@ static int ZSTDMT_tryGetInputRange(ZSTDMT_CCtx* mtctx)
 
				             return 0;
			
 
				         }
			
 
				         ZSTDMT_waitForLdmComplete(mtctx, buffer);
			
 
				-        memmove(start, mtctx->inBuff.prefix.start, prefixSize);
			
 
				+        ZSTD_memmove(start, mtctx->inBuff.prefix.start, prefixSize);
			
 
				         mtctx->inBuff.prefix.start = start;
			
 
				         mtctx->roundBuff.pos = prefixSize;
			
 
				     }
			
@@ -1968,6 +1695,16 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
 
				         pos = 0;
			
 
				         prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
			
 
				         hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
			
 
				+        if ((hash & hitMask) == hitMask) {
			
 
				+            /* We're already at a sync point so don't load any more until
			
 
				+             * we're able to flush this sync point.
			
 
				+             * This likely happened because the job table was full so we
			
 
				+             * couldn't add our job.
			
 
				+             */
			
 
				+            syncPoint.toLoad = 0;
			
 
				+            syncPoint.flush = 1;
			
 
				+            return syncPoint;
			
 
				+        }
			
 
				     } else {
			
 
				         /* We don't have enough bytes buffered to initialize the hash, but
			
 
				          * we know we have at least RSYNC_LENGTH bytes total.
			
@@ -2022,34 +1759,11 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
 
				     assert(output->pos <= output->size);
			
 
				     assert(input->pos  <= input->size);
			
 
				 
			
 
				-    if (mtctx->singleBlockingThread) {  /* delegate to single-thread (synchronous) */
			
 
				-        return ZSTD_compressStream2(mtctx->cctxPool->cctx[0], output, input, endOp);
			
 
				-    }
			
 
				-
			
 
				     if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) {
			
 
				         /* current frame being ended. Only flush/end are allowed */
			
 
				         return ERROR(stage_wrong);
			
 
				     }
			
 
				 
			
 
				-    /* single-pass shortcut (note : synchronous-mode) */
			
 
				-    if ( (!mtctx->params.rsyncable)   /* rsyncable mode is disabled */
			
 
				-      && (mtctx->nextJobID == 0)      /* just started */
			
 
				-      && (mtctx->inBuff.filled == 0)  /* nothing buffered */
			
 
				-      && (!mtctx->jobReady)           /* no job already created */
			
 
				-      && (endOp == ZSTD_e_end)        /* end order */
			
 
				-      && (output->size - output->pos >= ZSTD_compressBound(input->size - input->pos)) ) { /* enough space in dst */
			
 
				-        size_t const cSize = ZSTDMT_compress_advanced_internal(mtctx,
			
 
				-                (char*)output->dst + output->pos, output->size - output->pos,
			
 
				-                (const char*)input->src + input->pos, input->size - input->pos,
			
 
				-                mtctx->cdict, mtctx->params);
			
 
				-        if (ZSTD_isError(cSize)) return cSize;
			
 
				-        input->pos = input->size;
			
 
				-        output->pos += cSize;
			
 
				-        mtctx->allJobsCompleted = 1;
			
 
				-        mtctx->frameEnded = 1;
			
 
				-        return 0;
			
 
				-    }
			
 
				-
			
 
				     /* fill input buffer */
			
 
				     if ( (!mtctx->jobReady)
			
 
				       && (input->size > input->pos) ) {   /* support NULL input */
			
@@ -2072,13 +1786,21 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
 
				             assert(mtctx->inBuff.buffer.capacity >= mtctx->targetSectionSize);
			
 
				             DEBUGLOG(5, "ZSTDMT_compressStream_generic: adding %u bytes on top of %u to buffer of size %u",
			
 
				                         (U32)syncPoint.toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize);
			
 
				-            memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, syncPoint.toLoad);
			
 
				+            ZSTD_memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, syncPoint.toLoad);
			
 
				             input->pos += syncPoint.toLoad;
			
 
				             mtctx->inBuff.filled += syncPoint.toLoad;
			
 
				             forwardInputProgress = syncPoint.toLoad>0;
			
 
				         }
			
 
				-        if ((input->pos < input->size) && (endOp == ZSTD_e_end))
			
 
				-            endOp = ZSTD_e_flush;   /* can't end now : not all input consumed */
			
 
				+    }
			
 
				+    if ((input->pos < input->size) && (endOp == ZSTD_e_end)) {
			
 
				+        /* Can't end yet because the input is not fully consumed.
			
 
				+            * We are in one of these cases:
			
 
				+            * - mtctx->inBuff is NULL & empty: we couldn't get an input buffer so don't create a new job.
			
 
				+            * - We filled the input buffer: flush this job but don't end the frame.
			
 
				+            * - We hit a synchronization point: flush this job but don't end the frame.
			
 
				+            */
			
 
				+        assert(mtctx->inBuff.filled == 0 || mtctx->inBuff.filled == mtctx->targetSectionSize || mtctx->params.rsyncable);
			
 
				+        endOp = ZSTD_e_flush;
			
 
				     }
			
 
				 
			
 
				     if ( (mtctx->jobReady)
			
@@ -2097,47 +1819,3 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
 
				         return remainingToFlush;
			
 
				     }
			
 
				 }
			
 
				-
			
 
				-
			
 
				-size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
			
 
				-{
			
 
				-    FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) , "");
			
 
				-
			
 
				-    /* recommended next input size : fill current input buffer */
			
 
				-    return mtctx->targetSectionSize - mtctx->inBuff.filled;   /* note : could be zero when input buffer is fully filled and no more availability to create new job */
			
 
				-}
			
 
				-
			
 
				-
			
 
				-static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_EndDirective endFrame)
			
 
				-{
			
 
				-    size_t const srcSize = mtctx->inBuff.filled;
			
 
				-    DEBUGLOG(5, "ZSTDMT_flushStream_internal");
			
 
				-
			
 
				-    if ( mtctx->jobReady     /* one job ready for a worker to pick up */
			
 
				-      || (srcSize > 0)       /* still some data within input buffer */
			
 
				-      || ((endFrame==ZSTD_e_end) && !mtctx->frameEnded)) {  /* need a last 0-size block to end frame */
			
 
				-           DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)",
			
 
				-                        (U32)srcSize, (U32)endFrame);
			
 
				-        FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) , "");
			
 
				-    }
			
 
				-
			
 
				-    /* check if there is any data available to flush */
			
 
				-    return ZSTDMT_flushProduced(mtctx, output, 1 /* blockToFlush */, endFrame);
			
 
				-}
			
 
				-
			
 
				-
			
 
				-size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output)
			
 
				-{
			
 
				-    DEBUGLOG(5, "ZSTDMT_flushStream");
			
 
				-    if (mtctx->singleBlockingThread)
			
 
				-        return ZSTD_flushStream(mtctx->cctxPool->cctx[0], output);
			
 
				-    return ZSTDMT_flushStream_internal(mtctx, output, ZSTD_e_flush);
			
 
				-}
			
 
				-
			
 
				-size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output)
			
 
				-{
			
 
				-    DEBUGLOG(4, "ZSTDMT_endStream");
			
 
				-    if (mtctx->singleBlockingThread)
			
 
				-        return ZSTD_endStream(mtctx->cctxPool->cctx[0], output);
			
 
				-    return ZSTDMT_flushStream_internal(mtctx, output, ZSTD_e_end);
			
 
				-}
			
--- a/Utilities/cmzstd/lib/compress/zstdmt_compress.h
+++ b/Utilities/cmzstd/lib/compress/zstdmt_compress.h
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -19,113 +19,57 @@
 
				 /* Note : This is an internal API.
			
 
				  *        These APIs used to be exposed with ZSTDLIB_API,
			
 
				  *        because it used to be the only way to invoke MT compression.
			
 
				- *        Now, it's recommended to use ZSTD_compress2 and ZSTD_compressStream2()
			
 
				- *        instead.
			
 
				- *
			
 
				- *        If you depend on these APIs and can't switch, then define
			
 
				- *        ZSTD_LEGACY_MULTITHREADED_API when making the dynamic library.
			
 
				- *        However, we may completely remove these functions in a future
			
 
				- *        release, so please switch soon.
			
 
				+ *        Now, you must use ZSTD_compress2 and ZSTD_compressStream2() instead.
			
 
				  *
			
 
				  *        This API requires ZSTD_MULTITHREAD to be defined during compilation,
			
 
				  *        otherwise ZSTDMT_createCCtx*() will fail.
			
 
				  */
			
 
				 
			
 
				-#ifdef ZSTD_LEGACY_MULTITHREADED_API
			
 
				-#  define ZSTDMT_API ZSTDLIB_API
			
 
				-#else
			
 
				-#  define ZSTDMT_API
			
 
				-#endif
			
 
				-
			
 
				 /* ===   Dependencies   === */
			
 
				-#include <stddef.h>                /* size_t */
			
 
				+#include "../common/zstd_deps.h"   /* size_t */
			
 
				 #define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_parameters */
			
 
				 #include "../zstd.h"            /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
			
 
				 
			
 
				 
			
 
				 /* ===   Constants   === */
			
 
				-#ifndef ZSTDMT_NBWORKERS_MAX
			
 
				-#  define ZSTDMT_NBWORKERS_MAX 200
			
 
				+#ifndef ZSTDMT_NBWORKERS_MAX /* a different value can be selected at compile time */
			
 
				+#  define ZSTDMT_NBWORKERS_MAX ((sizeof(void*)==4) /*32-bit*/ ? 64 : 256)
			
 
				 #endif
			
 
				-#ifndef ZSTDMT_JOBSIZE_MIN
			
 
				-#  define ZSTDMT_JOBSIZE_MIN (1 MB)
			
 
				+#ifndef ZSTDMT_JOBSIZE_MIN   /* a different value can be selected at compile time */
			
 
				+#  define ZSTDMT_JOBSIZE_MIN (512 KB)
			
 
				 #endif
			
 
				 #define ZSTDMT_JOBLOG_MAX   (MEM_32bits() ? 29 : 30)
			
 
				 #define ZSTDMT_JOBSIZE_MAX  (MEM_32bits() ? (512 MB) : (1024 MB))
			
 
				 
			
 
				 
			
 
				+/* ========================================================
			
 
				+ * ===  Private interface, for use by ZSTD_compress.c   ===
			
 
				+ * ===  Not exposed in libzstd. Never invoke directly   ===
			
 
				+ * ======================================================== */
			
 
				+
			
 
				 /* ===   Memory management   === */
			
 
				 typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
			
 
				 /* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
			
 
				-ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);
			
 
				-/* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
			
 
				-ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
			
 
				-                                                    ZSTD_customMem cMem);
			
 
				-ZSTDMT_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
			
 
				-
			
 
				-ZSTDMT_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
			
 
				-
			
 
				-
			
 
				-/* ===   Simple one-pass compression function   === */
			
 
				-
			
 
				-ZSTDMT_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
			
 
				-                                       void* dst, size_t dstCapacity,
			
 
				-                                 const void* src, size_t srcSize,
			
 
				-                                       int compressionLevel);
			
 
				-
			
 
				+ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
			
 
				+                                        ZSTD_customMem cMem,
			
 
				+					ZSTD_threadPool *pool);
			
 
				+size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
			
 
				 
			
 
				+size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
			
 
				 
			
 
				 /* ===   Streaming functions   === */
			
 
				 
			
 
				-ZSTDMT_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
			
 
				-ZSTDMT_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize);  /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */
			
 
				-
			
 
				-ZSTDMT_API size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
			
 
				-ZSTDMT_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
			
 
				-
			
 
				-ZSTDMT_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output);   /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
			
 
				-ZSTDMT_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output);     /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
			
 
				-
			
 
				-
			
 
				-/* ===   Advanced functions and parameters  === */
			
 
				-
			
 
				-ZSTDMT_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
			
 
				-                                          void* dst, size_t dstCapacity,
			
 
				-                                    const void* src, size_t srcSize,
			
 
				-                                    const ZSTD_CDict* cdict,
			
 
				-                                          ZSTD_parameters params,
			
 
				-                                          int overlapLog);
			
 
				-
			
 
				-ZSTDMT_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
			
 
				-                                        const void* dict, size_t dictSize,   /* dict can be released after init, a local copy is preserved within zcs */
			
 
				-                                        ZSTD_parameters params,
			
 
				-                                        unsigned long long pledgedSrcSize);  /* pledgedSrcSize is optional and can be zero == unknown */
			
 
				-
			
 
				-ZSTDMT_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
			
 
				-                                        const ZSTD_CDict* cdict,
			
 
				-                                        ZSTD_frameParameters fparams,
			
 
				-                                        unsigned long long pledgedSrcSize);  /* note : zero means empty */
			
 
				-
			
 
				-/* ZSTDMT_parameter :
			
 
				- * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
			
 
				-typedef enum {
			
 
				-    ZSTDMT_p_jobSize,     /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
			
 
				-    ZSTDMT_p_overlapLog,  /* Each job may reload a part of previous job to enhance compression ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
			
 
				-    ZSTDMT_p_rsyncable    /* Enables rsyncable mode. */
			
 
				-} ZSTDMT_parameter;
			
 
				-
			
 
				-/* ZSTDMT_setMTCtxParameter() :
			
 
				- * allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter.
			
 
				- * The function must be called typically after ZSTD_createCCtx() but __before ZSTDMT_init*() !__
			
 
				- * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
			
 
				- * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
			
 
				-ZSTDMT_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value);
			
 
				-
			
 
				-/* ZSTDMT_getMTCtxParameter() :
			
 
				- * Query the ZSTDMT_CCtx for a parameter value.
			
 
				- * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
			
 
				-ZSTDMT_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value);
			
 
				+size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
			
 
				 
			
 
				+/*! ZSTDMT_initCStream_internal() :
			
 
				+ *  Private use only. Init streaming operation.
			
 
				+ *  expects params to be valid.
			
 
				+ *  must receive dict, or cdict, or none, but not both.
			
 
				+ *  @return : 0, or an error code */
			
 
				+size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
			
 
				+                    const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
			
 
				+                    const ZSTD_CDict* cdict,
			
 
				+                    ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
			
 
				 
			
 
				 /*! ZSTDMT_compressStream_generic() :
			
 
				  *  Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream()
			
@@ -134,16 +78,10 @@ ZSTDMT_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter
 
				  *           0 if fully flushed
			
 
				  *           or an error code
			
 
				  *  note : needs to be init using any ZSTD_initCStream*() variant */
			
 
				-ZSTDMT_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
			
 
				-                                                ZSTD_outBuffer* output,
			
 
				-                                                ZSTD_inBuffer* input,
			
 
				-                                                ZSTD_EndDirective endOp);
			
 
				-
			
 
				-
			
 
				-/* ========================================================
			
 
				- * ===  Private interface, for use by ZSTD_compress.c   ===
			
 
				- * ===  Not exposed in libzstd. Never invoke directly   ===
			
 
				- * ======================================================== */
			
 
				+size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
			
 
				+                                     ZSTD_outBuffer* output,
			
 
				+                                     ZSTD_inBuffer* input,
			
 
				+                                     ZSTD_EndDirective endOp);
			
 
				 
			
 
				  /*! ZSTDMT_toFlushNow()
			
 
				   *  Tell how many bytes are ready to be flushed immediately.
			
@@ -153,15 +91,6 @@ ZSTDMT_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
 
				   *  therefore flushing is limited by speed of oldest job. */
			
 
				 size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx);
			
 
				 
			
 
				-/*! ZSTDMT_CCtxParam_setMTCtxParameter()
			
 
				- *  like ZSTDMT_setMTCtxParameter(), but into a ZSTD_CCtx_Params */
			
 
				-size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, int value);
			
 
				-
			
 
				-/*! ZSTDMT_CCtxParam_setNbWorkers()
			
 
				- *  Set nbWorkers, and clamp it.
			
 
				- *  Also reset jobSize and overlapLog */
			
 
				-size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers);
			
 
				-
			
 
				 /*! ZSTDMT_updateCParams_whileCompressing() :
			
 
				  *  Updates only a selected set of compression parameters, to remain compatible with current frame.
			
 
				  *  New parameters will be applied to next compression job. */
			
@@ -174,17 +103,6 @@ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_p
 
				 ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx);
			
 
				 
			
 
				 
			
 
				-/*! ZSTDMT_initCStream_internal() :
			
 
				- *  Private use only. Init streaming operation.
			
 
				- *  expects params to be valid.
			
 
				- *  must receive dict, or cdict, or none, but not both.
			
 
				- *  @return : 0, or an error code */
			
 
				-size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
			
 
				-                    const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
			
 
				-                    const ZSTD_CDict* cdict,
			
 
				-                    ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
			
 
				-
			
 
				-
			
 
				 #if defined (__cplusplus)
			
 
				 }
			
 
				 #endif
			
--- a/Utilities/cmzstd/lib/decompress/huf_decompress.c
+++ b/Utilities/cmzstd/lib/decompress/huf_decompress.c
@@ -1,7 +1,7 @@
 
				 /* ******************************************************************
			
 
				  * huff0 huffman decoder,
			
 
				  * part of Finite State Entropy library
			
 
				- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  *
			
 
				  *  You can contact the author at :
			
 
				  *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
			
@@ -15,7 +15,7 @@
 
				 /* **************************************************************
			
 
				 *  Dependencies
			
 
				 ****************************************************************/
			
 
				-#include <string.h>     /* memcpy, memset */
			
 
				+#include "../common/zstd_deps.h"  /* ZSTD_memcpy, ZSTD_memset */
			
 
				 #include "../common/compiler.h"
			
 
				 #include "../common/bitstream.h"  /* BIT_* */
			
 
				 #include "../common/fse.h"        /* to compress headers */
			
@@ -103,7 +103,7 @@ typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved;
 
				 static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
			
 
				 {
			
 
				     DTableDesc dtd;
			
 
				-    memcpy(&dtd, table, sizeof(dtd));
			
 
				+    ZSTD_memcpy(&dtd, table, sizeof(dtd));
			
 
				     return dtd;
			
 
				 }
			
 
				 
			
@@ -115,29 +115,51 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
 
				 /*-***************************/
			
 
				 typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1;   /* single-symbol decoding */
			
 
				 
			
 
				+/**
			
 
				+ * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at
			
 
				+ * a time.
			
 
				+ */
			
 
				+static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
			
 
				+    U64 D4;
			
 
				+    if (MEM_isLittleEndian()) {
			
 
				+        D4 = symbol + (nbBits << 8);
			
 
				+    } else {
			
 
				+        D4 = (symbol << 8) + nbBits;
			
 
				+    }
			
 
				+    D4 *= 0x0001000100010001ULL;
			
 
				+    return D4;
			
 
				+}
			
 
				+
			
 
				+typedef struct {
			
 
				+        U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
			
 
				+        U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
			
 
				+        U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
			
 
				+        BYTE symbols[HUF_SYMBOLVALUE_MAX + 1];
			
 
				+        BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
			
 
				+} HUF_ReadDTableX1_Workspace;
			
 
				+
			
 
				+
			
 
				 size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
			
 
				+{
			
 
				+    return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
			
 
				+}
			
 
				+
			
 
				+size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)
			
 
				 {
			
 
				     U32 tableLog = 0;
			
 
				     U32 nbSymbols = 0;
			
 
				     size_t iSize;
			
 
				     void* const dtPtr = DTable + 1;
			
 
				     HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
			
 
				+    HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace;
			
 
				 
			
 
				-    U32* rankVal;
			
 
				-    BYTE* huffWeight;
			
 
				-    size_t spaceUsed32 = 0;
			
 
				-
			
 
				-    rankVal = (U32 *)workSpace + spaceUsed32;
			
 
				-    spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
			
 
				-    huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
			
 
				-    spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
			
 
				-
			
 
				-    if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
			
 
				+    DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp));
			
 
				+    if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge);
			
 
				 
			
 
				     DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
			
 
				-    /* memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */
			
 
				+    /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */
			
 
				 
			
 
				-    iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
			
 
				+    iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2);
			
 
				     if (HUF_isError(iSize)) return iSize;
			
 
				 
			
 
				     /* Table header */
			
@@ -145,52 +167,117 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
 
				         if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge);   /* DTable too small, Huffman tree cannot fit in */
			
 
				         dtd.tableType = 0;
			
 
				         dtd.tableLog = (BYTE)tableLog;
			
 
				-        memcpy(DTable, &dtd, sizeof(dtd));
			
 
				+        ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
			
 
				     }
			
 
				 
			
 
				-    /* Calculate starting value for each rank */
			
 
				-    {   U32 n, nextRankStart = 0;
			
 
				-        for (n=1; n<tableLog+1; n++) {
			
 
				-            U32 const current = nextRankStart;
			
 
				-            nextRankStart += (rankVal[n] << (n-1));
			
 
				-            rankVal[n] = current;
			
 
				-    }   }
			
 
				+    /* Compute symbols and rankStart given rankVal:
			
 
				+     *
			
 
				+     * rankVal already contains the number of values of each weight.
			
 
				+     *
			
 
				+     * symbols contains the symbols ordered by weight. First are the rankVal[0]
			
 
				+     * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on.
			
 
				+     * symbols[0] is filled (but unused) to avoid a branch.
			
 
				+     *
			
 
				+     * rankStart contains the offset where each rank belongs in the DTable.
			
 
				+     * rankStart[0] is not filled because there are no entries in the table for
			
 
				+     * weight 0.
			
 
				+     */
			
 
				+    {
			
 
				+        int n;
			
 
				+        int nextRankStart = 0;
			
 
				+        int const unroll = 4;
			
 
				+        int const nLimit = (int)nbSymbols - unroll + 1;
			
 
				+        for (n=0; n<(int)tableLog+1; n++) {
			
 
				+            U32 const curr = nextRankStart;
			
 
				+            nextRankStart += wksp->rankVal[n];
			
 
				+            wksp->rankStart[n] = curr;
			
 
				+        }
			
 
				+        for (n=0; n < nLimit; n += unroll) {
			
 
				+            int u;
			
 
				+            for (u=0; u < unroll; ++u) {
			
 
				+                size_t const w = wksp->huffWeight[n+u];
			
 
				+                wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u);
			
 
				+            }
			
 
				+        }
			
 
				+        for (; n < (int)nbSymbols; ++n) {
			
 
				+            size_t const w = wksp->huffWeight[n];
			
 
				+            wksp->symbols[wksp->rankStart[w]++] = (BYTE)n;
			
 
				+        }
			
 
				+    }
			
 
				 
			
 
				-    /* fill DTable */
			
 
				-    {   U32 n;
			
 
				-        size_t const nEnd = nbSymbols;
			
 
				-        for (n=0; n<nEnd; n++) {
			
 
				-            size_t const w = huffWeight[n];
			
 
				-            size_t const length = (1 << w) >> 1;
			
 
				-            size_t const uStart = rankVal[w];
			
 
				-            size_t const uEnd = uStart + length;
			
 
				-            size_t u;
			
 
				-            HUF_DEltX1 D;
			
 
				-            D.byte = (BYTE)n;
			
 
				-            D.nbBits = (BYTE)(tableLog + 1 - w);
			
 
				-            rankVal[w] = (U32)uEnd;
			
 
				-            if (length < 4) {
			
 
				-                /* Use length in the loop bound so the compiler knows it is short. */
			
 
				-                for (u = 0; u < length; ++u)
			
 
				-                    dt[uStart + u] = D;
			
 
				-            } else {
			
 
				-                /* Unroll the loop 4 times, we know it is a power of 2. */
			
 
				-                for (u = uStart; u < uEnd; u += 4) {
			
 
				-                    dt[u + 0] = D;
			
 
				-                    dt[u + 1] = D;
			
 
				-                    dt[u + 2] = D;
			
 
				-                    dt[u + 3] = D;
			
 
				-    }   }   }   }
			
 
				+    /* fill DTable
			
 
				+     * We fill all entries of each weight in order.
			
 
				+     * That way length is a constant for each iteration of the outter loop.
			
 
				+     * We can switch based on the length to a different inner loop which is
			
 
				+     * optimized for that particular case.
			
 
				+     */
			
 
				+    {
			
 
				+        U32 w;
			
 
				+        int symbol=wksp->rankVal[0];
			
 
				+        int rankStart=0;
			
 
				+        for (w=1; w<tableLog+1; ++w) {
			
 
				+            int const symbolCount = wksp->rankVal[w];
			
 
				+            int const length = (1 << w) >> 1;
			
 
				+            int uStart = rankStart;
			
 
				+            BYTE const nbBits = (BYTE)(tableLog + 1 - w);
			
 
				+            int s;
			
 
				+            int u;
			
 
				+            switch (length) {
			
 
				+            case 1:
			
 
				+                for (s=0; s<symbolCount; ++s) {
			
 
				+                    HUF_DEltX1 D;
			
 
				+                    D.byte = wksp->symbols[symbol + s];
			
 
				+                    D.nbBits = nbBits;
			
 
				+                    dt[uStart] = D;
			
 
				+                    uStart += 1;
			
 
				+                }
			
 
				+                break;
			
 
				+            case 2:
			
 
				+                for (s=0; s<symbolCount; ++s) {
			
 
				+                    HUF_DEltX1 D;
			
 
				+                    D.byte = wksp->symbols[symbol + s];
			
 
				+                    D.nbBits = nbBits;
			
 
				+                    dt[uStart+0] = D;
			
 
				+                    dt[uStart+1] = D;
			
 
				+                    uStart += 2;
			
 
				+                }
			
 
				+                break;
			
 
				+            case 4:
			
 
				+                for (s=0; s<symbolCount; ++s) {
			
 
				+                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
			
 
				+                    MEM_write64(dt + uStart, D4);
			
 
				+                    uStart += 4;
			
 
				+                }
			
 
				+                break;
			
 
				+            case 8:
			
 
				+                for (s=0; s<symbolCount; ++s) {
			
 
				+                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
			
 
				+                    MEM_write64(dt + uStart, D4);
			
 
				+                    MEM_write64(dt + uStart + 4, D4);
			
 
				+                    uStart += 8;
			
 
				+                }
			
 
				+                break;
			
 
				+            default:
			
 
				+                for (s=0; s<symbolCount; ++s) {
			
 
				+                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
			
 
				+                    for (u=0; u < length; u += 16) {
			
 
				+                        MEM_write64(dt + uStart + u + 0, D4);
			
 
				+                        MEM_write64(dt + uStart + u + 4, D4);
			
 
				+                        MEM_write64(dt + uStart + u + 8, D4);
			
 
				+                        MEM_write64(dt + uStart + u + 12, D4);
			
 
				+                    }
			
 
				+                    assert(u == length);
			
 
				+                    uStart += length;
			
 
				+                }
			
 
				+                break;
			
 
				+            }
			
 
				+            symbol += symbolCount;
			
 
				+            rankStart += symbolCount * length;
			
 
				+        }
			
 
				+    }
			
 
				     return iSize;
			
 
				 }
			
 
				 
			
 
				-size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
			
 
				-{
			
 
				-    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
			
 
				-    return HUF_readDTableX1_wksp(DTable, src, srcSize,
			
 
				-                                 workSpace, sizeof(workSpace));
			
 
				-}
			
 
				-
			
 
				 FORCE_INLINE_TEMPLATE BYTE
			
 
				 HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
			
 
				 {
			
@@ -389,20 +476,6 @@ size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
 
				 }
			
 
				 
			
 
				 
			
 
				-size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
			
 
				-                              const void* cSrc, size_t cSrcSize)
			
 
				-{
			
 
				-    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
			
 
				-    return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
			
 
				-                                       workSpace, sizeof(workSpace));
			
 
				-}
			
 
				-
			
 
				-size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
			
 
				-{
			
 
				-    HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
			
 
				-    return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
			
 
				-}
			
 
				-
			
 
				 size_t HUF_decompress4X1_usingDTable(
			
 
				           void* dst,  size_t dstSize,
			
 
				     const void* cSrc, size_t cSrcSize,
			
@@ -419,8 +492,7 @@ static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size
 
				 {
			
 
				     const BYTE* ip = (const BYTE*) cSrc;
			
 
				 
			
 
				-    size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
			
 
				-                                                workSpace, wkspSize);
			
 
				+    size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
			
 
				     if (HUF_isError(hSize)) return hSize;
			
 
				     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
			
 
				     ip += hSize; cSrcSize -= hSize;
			
@@ -436,18 +508,6 @@ size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
 
				 }
			
 
				 
			
 
				 
			
 
				-size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
			
 
				-{
			
 
				-    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
			
 
				-    return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
			
 
				-                                       workSpace, sizeof(workSpace));
			
 
				-}
			
 
				-size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
			
 
				-{
			
 
				-    HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
			
 
				-    return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
			
 
				-}
			
 
				-
			
 
				 #endif /* HUF_FORCE_DECOMPRESS_X2 */
			
 
				 
			
 
				 
			
@@ -468,13 +528,15 @@ typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
 
				 static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
			
 
				                            const U32* rankValOrigin, const int minWeight,
			
 
				                            const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
			
 
				-                           U32 nbBitsBaseline, U16 baseSeq)
			
 
				+                           U32 nbBitsBaseline, U16 baseSeq, U32* wksp, size_t wkspSize)
			
 
				 {
			
 
				     HUF_DEltX2 DElt;
			
 
				-    U32 rankVal[HUF_TABLELOG_MAX + 1];
			
 
				+    U32* rankVal = wksp;
			
 
				 
			
 
				+    assert(wkspSize >= HUF_TABLELOG_MAX + 1);
			
 
				+    (void)wkspSize;
			
 
				     /* get pre-calculated rankVal */
			
 
				-    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
			
 
				+    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1));
			
 
				 
			
 
				     /* fill skipped values */
			
 
				     if (minWeight>1) {
			
@@ -509,14 +571,18 @@ static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 co
 
				 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
			
 
				                            const sortedSymbol_t* sortedList, const U32 sortedListSize,
			
 
				                            const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
			
 
				-                           const U32 nbBitsBaseline)
			
 
				+                           const U32 nbBitsBaseline, U32* wksp, size_t wkspSize)
			
 
				 {
			
 
				-    U32 rankVal[HUF_TABLELOG_MAX + 1];
			
 
				+    U32* rankVal = wksp;
			
 
				     const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
			
 
				     const U32 minBits  = nbBitsBaseline - maxWeight;
			
 
				     U32 s;
			
 
				 
			
 
				-    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
			
 
				+    assert(wkspSize >= HUF_TABLELOG_MAX + 1);
			
 
				+    wksp += HUF_TABLELOG_MAX + 1;
			
 
				+    wkspSize -= HUF_TABLELOG_MAX + 1;
			
 
				+
			
 
				+    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1));
			
 
				 
			
 
				     /* fill DTable */
			
 
				     for (s=0; s<sortedListSize; s++) {
			
@@ -534,7 +600,7 @@ static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
 
				             HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
			
 
				                            rankValOrigin[nbBits], minWeight,
			
 
				                            sortedList+sortedRank, sortedListSize-sortedRank,
			
 
				-                           nbBitsBaseline, symbol);
			
 
				+                           nbBitsBaseline, symbol, wksp, wkspSize);
			
 
				         } else {
			
 
				             HUF_DEltX2 DElt;
			
 
				             MEM_writeLE16(&(DElt.sequence), symbol);
			
@@ -548,6 +614,15 @@ static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
 
				     }
			
 
				 }
			
 
				 
			
 
				+typedef struct {
			
 
				+    rankValCol_t rankVal[HUF_TABLELOG_MAX];
			
 
				+    U32 rankStats[HUF_TABLELOG_MAX + 1];
			
 
				+    U32 rankStart0[HUF_TABLELOG_MAX + 2];
			
 
				+    sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
			
 
				+    BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
			
 
				+    U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
			
 
				+} HUF_ReadDTableX2_Workspace;
			
 
				+
			
 
				 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
			
 
				                        const void* src, size_t srcSize,
			
 
				                              void* workSpace, size_t wkspSize)
			
@@ -560,48 +635,33 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
 
				     HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
			
 
				     U32 *rankStart;
			
 
				 
			
 
				-    rankValCol_t* rankVal;
			
 
				-    U32* rankStats;
			
 
				-    U32* rankStart0;
			
 
				-    sortedSymbol_t* sortedSymbol;
			
 
				-    BYTE* weightList;
			
 
				-    size_t spaceUsed32 = 0;
			
 
				-
			
 
				-    rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
			
 
				-    spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
			
 
				-    rankStats = (U32 *)workSpace + spaceUsed32;
			
 
				-    spaceUsed32 += HUF_TABLELOG_MAX + 1;
			
 
				-    rankStart0 = (U32 *)workSpace + spaceUsed32;
			
 
				-    spaceUsed32 += HUF_TABLELOG_MAX + 2;
			
 
				-    sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
			
 
				-    spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
			
 
				-    weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
			
 
				-    spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
			
 
				-
			
 
				-    if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
			
 
				-
			
 
				-    rankStart = rankStart0 + 1;
			
 
				-    memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
			
 
				+    HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace;
			
 
				+
			
 
				+    if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC);
			
 
				+
			
 
				+    rankStart = wksp->rankStart0 + 1;
			
 
				+    ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats));
			
 
				+    ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0));
			
 
				 
			
 
				     DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable));   /* if compiler fails here, assertion is wrong */
			
 
				     if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
			
 
				-    /* memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */
			
 
				+    /* ZSTD_memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */
			
 
				 
			
 
				-    iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
			
 
				+    iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), /* bmi2 */ 0);
			
 
				     if (HUF_isError(iSize)) return iSize;
			
 
				 
			
 
				     /* check result */
			
 
				     if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
			
 
				 
			
 
				     /* find maxWeight */
			
 
				-    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
			
 
				+    for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
			
 
				 
			
 
				     /* Get start index of each weight */
			
 
				     {   U32 w, nextRankStart = 0;
			
 
				         for (w=1; w<maxW+1; w++) {
			
 
				-            U32 current = nextRankStart;
			
 
				-            nextRankStart += rankStats[w];
			
 
				-            rankStart[w] = current;
			
 
				+            U32 curr = nextRankStart;
			
 
				+            nextRankStart += wksp->rankStats[w];
			
 
				+            rankStart[w] = curr;
			
 
				         }
			
 
				         rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
			
 
				         sizeOfSort = nextRankStart;
			
@@ -610,57 +670,51 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
 
				     /* sort symbols by weight */
			
 
				     {   U32 s;
			
 
				         for (s=0; s<nbSymbols; s++) {
			
 
				-            U32 const w = weightList[s];
			
 
				+            U32 const w = wksp->weightList[s];
			
 
				             U32 const r = rankStart[w]++;
			
 
				-            sortedSymbol[r].symbol = (BYTE)s;
			
 
				-            sortedSymbol[r].weight = (BYTE)w;
			
 
				+            wksp->sortedSymbol[r].symbol = (BYTE)s;
			
 
				+            wksp->sortedSymbol[r].weight = (BYTE)w;
			
 
				         }
			
 
				         rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
			
 
				     }
			
 
				 
			
 
				     /* Build rankVal */
			
 
				-    {   U32* const rankVal0 = rankVal[0];
			
 
				+    {   U32* const rankVal0 = wksp->rankVal[0];
			
 
				         {   int const rescale = (maxTableLog-tableLog) - 1;   /* tableLog <= maxTableLog */
			
 
				             U32 nextRankVal = 0;
			
 
				             U32 w;
			
 
				             for (w=1; w<maxW+1; w++) {
			
 
				-                U32 current = nextRankVal;
			
 
				-                nextRankVal += rankStats[w] << (w+rescale);
			
 
				-                rankVal0[w] = current;
			
 
				+                U32 curr = nextRankVal;
			
 
				+                nextRankVal += wksp->rankStats[w] << (w+rescale);
			
 
				+                rankVal0[w] = curr;
			
 
				         }   }
			
 
				         {   U32 const minBits = tableLog+1 - maxW;
			
 
				             U32 consumed;
			
 
				             for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
			
 
				-                U32* const rankValPtr = rankVal[consumed];
			
 
				+                U32* const rankValPtr = wksp->rankVal[consumed];
			
 
				                 U32 w;
			
 
				                 for (w = 1; w < maxW+1; w++) {
			
 
				                     rankValPtr[w] = rankVal0[w] >> consumed;
			
 
				     }   }   }   }
			
 
				 
			
 
				     HUF_fillDTableX2(dt, maxTableLog,
			
 
				-                   sortedSymbol, sizeOfSort,
			
 
				-                   rankStart0, rankVal, maxW,
			
 
				-                   tableLog+1);
			
 
				+                   wksp->sortedSymbol, sizeOfSort,
			
 
				+                   wksp->rankStart0, wksp->rankVal, maxW,
			
 
				+                   tableLog+1,
			
 
				+                   wksp->calleeWksp, sizeof(wksp->calleeWksp) / sizeof(U32));
			
 
				 
			
 
				     dtd.tableLog = (BYTE)maxTableLog;
			
 
				     dtd.tableType = 1;
			
 
				-    memcpy(DTable, &dtd, sizeof(dtd));
			
 
				+    ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
			
 
				     return iSize;
			
 
				 }
			
 
				 
			
 
				-size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
			
 
				-{
			
 
				-  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
			
 
				-  return HUF_readDTableX2_wksp(DTable, src, srcSize,
			
 
				-                               workSpace, sizeof(workSpace));
			
 
				-}
			
 
				-
			
 
				 
			
 
				 FORCE_INLINE_TEMPLATE U32
			
 
				 HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
			
 
				 {
			
 
				     size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
			
 
				-    memcpy(op, dt+val, 2);
			
 
				+    ZSTD_memcpy(op, dt+val, 2);
			
 
				     BIT_skipBits(DStream, dt[val].nbBits);
			
 
				     return dt[val].length;
			
 
				 }
			
@@ -669,7 +723,7 @@ FORCE_INLINE_TEMPLATE U32
 
				 HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
			
 
				 {
			
 
				     size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
			
 
				-    memcpy(op, dt+val, 1);
			
 
				+    ZSTD_memcpy(op, dt+val, 1);
			
 
				     if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
			
 
				     else {
			
 
				         if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
			
@@ -890,20 +944,6 @@ size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
 
				 }
			
 
				 
			
 
				 
			
 
				-size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
			
 
				-                              const void* cSrc, size_t cSrcSize)
			
 
				-{
			
 
				-    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
			
 
				-    return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
			
 
				-                                       workSpace, sizeof(workSpace));
			
 
				-}
			
 
				-
			
 
				-size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
			
 
				-{
			
 
				-    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
			
 
				-    return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
			
 
				-}
			
 
				-
			
 
				 size_t HUF_decompress4X2_usingDTable(
			
 
				           void* dst,  size_t dstSize,
			
 
				     const void* cSrc, size_t cSrcSize,
			
@@ -937,20 +977,6 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
 
				 }
			
 
				 
			
 
				 
			
 
				-size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
			
 
				-                              const void* cSrc, size_t cSrcSize)
			
 
				-{
			
 
				-    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
			
 
				-    return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
			
 
				-                                       workSpace, sizeof(workSpace));
			
 
				-}
			
 
				-
			
 
				-size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
			
 
				-{
			
 
				-    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
			
 
				-    return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
			
 
				-}
			
 
				-
			
 
				 #endif /* HUF_FORCE_DECOMPRESS_X1 */
			
 
				 
			
 
				 
			
@@ -1051,67 +1077,6 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
 
				 }
			
 
				 
			
 
				 
			
 
				-typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
			
 
				-
			
 
				-size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
			
 
				-{
			
 
				-#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
			
 
				-    static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
			
 
				-#endif
			
 
				-
			
 
				-    /* validation checks */
			
 
				-    if (dstSize == 0) return ERROR(dstSize_tooSmall);
			
 
				-    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
			
 
				-    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
			
 
				-    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
			
 
				-
			
 
				-    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
			
 
				-#if defined(HUF_FORCE_DECOMPRESS_X1)
			
 
				-        (void)algoNb;
			
 
				-        assert(algoNb == 0);
			
 
				-        return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
			
 
				-#elif defined(HUF_FORCE_DECOMPRESS_X2)
			
 
				-        (void)algoNb;
			
 
				-        assert(algoNb == 1);
			
 
				-        return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
			
 
				-#else
			
 
				-        return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
			
 
				-#endif
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
			
 
				-{
			
 
				-    /* validation checks */
			
 
				-    if (dstSize == 0) return ERROR(dstSize_tooSmall);
			
 
				-    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
			
 
				-    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
			
 
				-    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
			
 
				-
			
 
				-    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
			
 
				-#if defined(HUF_FORCE_DECOMPRESS_X1)
			
 
				-        (void)algoNb;
			
 
				-        assert(algoNb == 0);
			
 
				-        return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
			
 
				-#elif defined(HUF_FORCE_DECOMPRESS_X2)
			
 
				-        (void)algoNb;
			
 
				-        assert(algoNb == 1);
			
 
				-        return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
			
 
				-#else
			
 
				-        return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
			
 
				-                        HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
			
 
				-#endif
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
			
 
				-{
			
 
				-    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
			
 
				-    return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
			
 
				-                                         workSpace, sizeof(workSpace));
			
 
				-}
			
 
				-
			
 
				-
			
 
				 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
			
 
				                                      size_t dstSize, const void* cSrc,
			
 
				                                      size_t cSrcSize, void* workSpace,
			
@@ -1145,8 +1110,8 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
 
				     /* validation checks */
			
 
				     if (dstSize == 0) return ERROR(dstSize_tooSmall);
			
 
				     if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
			
 
				-    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
			
 
				-    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
			
 
				+    if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
			
 
				+    if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
			
 
				 
			
 
				     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
			
 
				 #if defined(HUF_FORCE_DECOMPRESS_X1)
			
@@ -1168,14 +1133,6 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
 
				     }
			
 
				 }
			
 
				 
			
 
				-size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
			
 
				-                             const void* cSrc, size_t cSrcSize)
			
 
				-{
			
 
				-    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
			
 
				-    return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
			
 
				-                                      workSpace, sizeof(workSpace));
			
 
				-}
			
 
				-
			
 
				 
			
 
				 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
			
 
				 {
			
@@ -1199,7 +1156,7 @@ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstS
 
				 {
			
 
				     const BYTE* ip = (const BYTE*) cSrc;
			
 
				 
			
 
				-    size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
			
 
				+    size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
			
 
				     if (HUF_isError(hSize)) return hSize;
			
 
				     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
			
 
				     ip += hSize; cSrcSize -= hSize;
			
@@ -1246,3 +1203,149 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds
 
				 #endif
			
 
				     }
			
 
				 }
			
 
				+
			
 
				+#ifndef ZSTD_NO_UNUSED_FUNCTIONS
			
 
				+#ifndef HUF_FORCE_DECOMPRESS_X2
			
 
				+size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
			
 
				+{
			
 
				+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
			
 
				+    return HUF_readDTableX1_wksp(DTable, src, srcSize,
			
 
				+                                 workSpace, sizeof(workSpace));
			
 
				+}
			
 
				+
			
 
				+size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
			
 
				+                              const void* cSrc, size_t cSrcSize)
			
 
				+{
			
 
				+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
			
 
				+    return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
			
 
				+                                       workSpace, sizeof(workSpace));
			
 
				+}
			
 
				+
			
 
				+size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
			
 
				+{
			
 
				+    HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
			
 
				+    return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#ifndef HUF_FORCE_DECOMPRESS_X1
			
 
				+size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
			
 
				+{
			
 
				+  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
			
 
				+  return HUF_readDTableX2_wksp(DTable, src, srcSize,
			
 
				+                               workSpace, sizeof(workSpace));
			
 
				+}
			
 
				+
			
 
				+size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
			
 
				+                              const void* cSrc, size_t cSrcSize)
			
 
				+{
			
 
				+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
			
 
				+    return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
			
 
				+                                       workSpace, sizeof(workSpace));
			
 
				+}
			
 
				+
			
 
				+size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
			
 
				+{
			
 
				+    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
			
 
				+    return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#ifndef HUF_FORCE_DECOMPRESS_X2
			
 
				+size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
			
 
				+{
			
 
				+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
			
 
				+    return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
			
 
				+                                       workSpace, sizeof(workSpace));
			
 
				+}
			
 
				+size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
			
 
				+{
			
 
				+    HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
			
 
				+    return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#ifndef HUF_FORCE_DECOMPRESS_X1
			
 
				+size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
			
 
				+                              const void* cSrc, size_t cSrcSize)
			
 
				+{
			
 
				+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
			
 
				+    return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
			
 
				+                                       workSpace, sizeof(workSpace));
			
 
				+}
			
 
				+
			
 
				+size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
			
 
				+{
			
 
				+    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
			
 
				+    return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
			
 
				+
			
 
				+size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
			
 
				+{
			
 
				+#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
			
 
				+    static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
			
 
				+#endif
			
 
				+
			
 
				+    /* validation checks */
			
 
				+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
			
 
				+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
			
 
				+    if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
			
 
				+    if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
			
 
				+
			
 
				+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
			
 
				+#if defined(HUF_FORCE_DECOMPRESS_X1)
			
 
				+        (void)algoNb;
			
 
				+        assert(algoNb == 0);
			
 
				+        return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
			
 
				+#elif defined(HUF_FORCE_DECOMPRESS_X2)
			
 
				+        (void)algoNb;
			
 
				+        assert(algoNb == 1);
			
 
				+        return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
			
 
				+#else
			
 
				+        return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
			
 
				+#endif
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
			
 
				+{
			
 
				+    /* validation checks */
			
 
				+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
			
 
				+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
			
 
				+    if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
			
 
				+    if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
			
 
				+
			
 
				+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
			
 
				+#if defined(HUF_FORCE_DECOMPRESS_X1)
			
 
				+        (void)algoNb;
			
 
				+        assert(algoNb == 0);
			
 
				+        return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
			
 
				+#elif defined(HUF_FORCE_DECOMPRESS_X2)
			
 
				+        (void)algoNb;
			
 
				+        assert(algoNb == 1);
			
 
				+        return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
			
 
				+#else
			
 
				+        return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
			
 
				+                        HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
			
 
				+#endif
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
			
 
				+{
			
 
				+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
			
 
				+    return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
			
 
				+                                         workSpace, sizeof(workSpace));
			
 
				+}
			
 
				+
			
 
				+size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
			
 
				+                             const void* cSrc, size_t cSrcSize)
			
 
				+{
			
 
				+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
			
 
				+    return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
			
 
				+                                      workSpace, sizeof(workSpace));
			
 
				+}
			
 
				+#endif
			
--- a/Utilities/cmzstd/lib/decompress/zstd_ddict.c
+++ b/Utilities/cmzstd/lib/decompress/zstd_ddict.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -14,7 +14,7 @@
 
				 /*-*******************************************************
			
 
				 *  Dependencies
			
 
				 *********************************************************/
			
 
				-#include <string.h>      /* memcpy, memmove, memset */
			
 
				+#include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
			
 
				 #include "../common/cpu.h"         /* bmi2 */
			
 
				 #include "../common/mem.h"         /* low level memory routines */
			
 
				 #define FSE_STATIC_LINKING_ONLY
			
@@ -127,11 +127,11 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
 
				         ddict->dictContent = dict;
			
 
				         if (!dict) dictSize = 0;
			
 
				     } else {
			
 
				-        void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem);
			
 
				+        void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
			
 
				         ddict->dictBuffer = internalBuffer;
			
 
				         ddict->dictContent = internalBuffer;
			
 
				         if (!internalBuffer) return ERROR(memory_allocation);
			
 
				-        memcpy(internalBuffer, dict, dictSize);
			
 
				+        ZSTD_memcpy(internalBuffer, dict, dictSize);
			
 
				     }
			
 
				     ddict->dictSize = dictSize;
			
 
				     ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
			
@@ -147,9 +147,9 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
 
				                                       ZSTD_dictContentType_e dictContentType,
			
 
				                                       ZSTD_customMem customMem)
			
 
				 {
			
 
				-    if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
			
 
				+    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
			
 
				 
			
 
				-    {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
			
 
				+    {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
			
 
				         if (ddict == NULL) return NULL;
			
 
				         ddict->cMem = customMem;
			
 
				         {   size_t const initResult = ZSTD_initDDict_internal(ddict,
			
@@ -198,7 +198,7 @@ const ZSTD_DDict* ZSTD_initStaticDDict(
 
				     if ((size_t)sBuffer & 7) return NULL;   /* 8-aligned */
			
 
				     if (sBufferSize < neededSpace) return NULL;
			
 
				     if (dictLoadMethod == ZSTD_dlm_byCopy) {
			
 
				-        memcpy(ddict+1, dict, dictSize);  /* local copy */
			
 
				+        ZSTD_memcpy(ddict+1, dict, dictSize);  /* local copy */
			
 
				         dict = ddict+1;
			
 
				     }
			
 
				     if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
			
@@ -213,8 +213,8 @@ size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
 
				 {
			
 
				     if (ddict==NULL) return 0;   /* support free on NULL */
			
 
				     {   ZSTD_customMem const cMem = ddict->cMem;
			
 
				-        ZSTD_free(ddict->dictBuffer, cMem);
			
 
				-        ZSTD_free(ddict, cMem);
			
 
				+        ZSTD_customFree(ddict->dictBuffer, cMem);
			
 
				+        ZSTD_customFree(ddict, cMem);
			
 
				         return 0;
			
 
				     }
			
 
				 }
			
--- a/Utilities/cmzstd/lib/decompress/zstd_ddict.h
+++ b/Utilities/cmzstd/lib/decompress/zstd_ddict.h
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -15,7 +15,7 @@
 
				 /*-*******************************************************
			
 
				  *  Dependencies
			
 
				  *********************************************************/
			
 
				-#include <stddef.h>   /* size_t */
			
 
				+#include "../common/zstd_deps.h"   /* size_t */
			
 
				 #include "../zstd.h"     /* ZSTD_DDict, and several public functions */
			
 
				 
			
 
				 
			
--- a/Utilities/cmzstd/lib/decompress/zstd_decompress.c
+++ b/Utilities/cmzstd/lib/decompress/zstd_decompress.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -55,13 +55,14 @@
 
				 /*-*******************************************************
			
 
				 *  Dependencies
			
 
				 *********************************************************/
			
 
				-#include <string.h>      /* memcpy, memmove, memset */
			
 
				+#include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
			
 
				 #include "../common/cpu.h"         /* bmi2 */
			
 
				 #include "../common/mem.h"         /* low level memory routines */
			
 
				 #define FSE_STATIC_LINKING_ONLY
			
 
				 #include "../common/fse.h"
			
 
				 #define HUF_STATIC_LINKING_ONLY
			
 
				 #include "../common/huf.h"
			
 
				+#include "../common/xxhash.h" /* XXH64_reset, XXH64_update, XXH64_digest, XXH64 */
			
 
				 #include "../common/zstd_internal.h"  /* blockProperties_t */
			
 
				 #include "zstd_decompress_internal.h"   /* ZSTD_DCtx */
			
 
				 #include "zstd_ddict.h"  /* ZSTD_DDictDictContent */
			
@@ -72,6 +73,144 @@
 
				 #endif
			
 
				 
			
 
				 
			
 
				+
			
 
				+/*************************************
			
 
				+ * Multiple DDicts Hashset internals *
			
 
				+ *************************************/
			
 
				+
			
 
				+#define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4
			
 
				+#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3   /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float.
			
 
				+                                                     * Currently, that means a 0.75 load factor.
			
 
				+                                                     * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded
			
 
				+                                                     * the load factor of the ddict hash set.
			
 
				+                                                     */
			
 
				+
			
 
				+#define DDICT_HASHSET_TABLE_BASE_SIZE 64
			
 
				+#define DDICT_HASHSET_RESIZE_FACTOR 2
			
 
				+
			
 
				+/* Hash function to determine starting position of dict insertion within the table
			
 
				+ * Returns an index between [0, hashSet->ddictPtrTableSize]
			
 
				+ */
			
 
				+static size_t ZSTD_DDictHashSet_getIndex(const ZSTD_DDictHashSet* hashSet, U32 dictID) {
			
 
				+    const U64 hash = XXH64(&dictID, sizeof(U32), 0);
			
 
				+    /* DDict ptr table size is a multiple of 2, use size - 1 as mask to get index within [0, hashSet->ddictPtrTableSize) */
			
 
				+    return hash & (hashSet->ddictPtrTableSize - 1);
			
 
				+}
			
 
				+
			
 
				+/* Adds DDict to a hashset without resizing it.
			
 
				+ * If inserting a DDict with a dictID that already exists in the set, replaces the one in the set.
			
 
				+ * Returns 0 if successful, or a zstd error code if something went wrong.
			
 
				+ */
			
 
				+static size_t ZSTD_DDictHashSet_emplaceDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict) {
			
 
				+    const U32 dictID = ZSTD_getDictID_fromDDict(ddict);
			
 
				+    size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID);
			
 
				+    const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1;
			
 
				+    RETURN_ERROR_IF(hashSet->ddictPtrCount == hashSet->ddictPtrTableSize, GENERIC, "Hash set is full!");
			
 
				+    DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx);
			
 
				+    while (hashSet->ddictPtrTable[idx] != NULL) {
			
 
				+        /* Replace existing ddict if inserting ddict with same dictID */
			
 
				+        if (ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]) == dictID) {
			
 
				+            DEBUGLOG(4, "DictID already exists, replacing rather than adding");
			
 
				+            hashSet->ddictPtrTable[idx] = ddict;
			
 
				+            return 0;
			
 
				+        }
			
 
				+        idx &= idxRangeMask;
			
 
				+        idx++;
			
 
				+    }
			
 
				+    DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx);
			
 
				+    hashSet->ddictPtrTable[idx] = ddict;
			
 
				+    hashSet->ddictPtrCount++;
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+/* Expands hash table by factor of DDICT_HASHSET_RESIZE_FACTOR and
			
 
				+ * rehashes all values, allocates new table, frees old table.
			
 
				+ * Returns 0 on success, otherwise a zstd error code.
			
 
				+ */
			
 
				+static size_t ZSTD_DDictHashSet_expand(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) {
			
 
				+    size_t newTableSize = hashSet->ddictPtrTableSize * DDICT_HASHSET_RESIZE_FACTOR;
			
 
				+    const ZSTD_DDict** newTable = (const ZSTD_DDict**)ZSTD_customCalloc(sizeof(ZSTD_DDict*) * newTableSize, customMem);
			
 
				+    const ZSTD_DDict** oldTable = hashSet->ddictPtrTable;
			
 
				+    size_t oldTableSize = hashSet->ddictPtrTableSize;
			
 
				+    size_t i;
			
 
				+
			
 
				+    DEBUGLOG(4, "Expanding DDict hash table! Old size: %zu new size: %zu", oldTableSize, newTableSize);
			
 
				+    RETURN_ERROR_IF(!newTable, memory_allocation, "Expanded hashset allocation failed!");
			
 
				+    hashSet->ddictPtrTable = newTable;
			
 
				+    hashSet->ddictPtrTableSize = newTableSize;
			
 
				+    hashSet->ddictPtrCount = 0;
			
 
				+    for (i = 0; i < oldTableSize; ++i) {
			
 
				+        if (oldTable[i] != NULL) {
			
 
				+            FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, oldTable[i]), "");
			
 
				+        }
			
 
				+    }
			
 
				+    ZSTD_customFree((void*)oldTable, customMem);
			
 
				+    DEBUGLOG(4, "Finished re-hash");
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+/* Fetches a DDict with the given dictID
			
 
				+ * Returns the ZSTD_DDict* with the requested dictID. If it doesn't exist, then returns NULL.
			
 
				+ */
			
 
				+static const ZSTD_DDict* ZSTD_DDictHashSet_getDDict(ZSTD_DDictHashSet* hashSet, U32 dictID) {
			
 
				+    size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID);
			
 
				+    const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1;
			
 
				+    DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx);
			
 
				+    for (;;) {
			
 
				+        size_t currDictID = ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]);
			
 
				+        if (currDictID == dictID || currDictID == 0) {
			
 
				+            /* currDictID == 0 implies a NULL ddict entry */
			
 
				+            break;
			
 
				+        } else {
			
 
				+            idx &= idxRangeMask;    /* Goes to start of table when we reach the end */
			
 
				+            idx++;
			
 
				+        }
			
 
				+    }
			
 
				+    DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx);
			
 
				+    return hashSet->ddictPtrTable[idx];
			
 
				+}
			
 
				+
			
 
				+/* Allocates space for and returns a ddict hash set
			
 
				+ * The hash set's ZSTD_DDict* table has all values automatically set to NULL to begin with.
			
 
				+ * Returns NULL if allocation failed.
			
 
				+ */
			
 
				+static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) {
			
 
				+    ZSTD_DDictHashSet* ret = (ZSTD_DDictHashSet*)ZSTD_customMalloc(sizeof(ZSTD_DDictHashSet), customMem);
			
 
				+    DEBUGLOG(4, "Allocating new hash set");
			
 
				+    ret->ddictPtrTable = (const ZSTD_DDict**)ZSTD_customCalloc(DDICT_HASHSET_TABLE_BASE_SIZE * sizeof(ZSTD_DDict*), customMem);
			
 
				+    ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE;
			
 
				+    ret->ddictPtrCount = 0;
			
 
				+    if (!ret || !ret->ddictPtrTable) {
			
 
				+        return NULL;
			
 
				+    }
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+/* Frees the table of ZSTD_DDict* within a hashset, then frees the hashset itself.
			
 
				+ * Note: The ZSTD_DDict* within the table are NOT freed.
			
 
				+ */
			
 
				+static void ZSTD_freeDDictHashSet(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) {
			
 
				+    DEBUGLOG(4, "Freeing ddict hash set");
			
 
				+    if (hashSet && hashSet->ddictPtrTable) {
			
 
				+        ZSTD_customFree((void*)hashSet->ddictPtrTable, customMem);
			
 
				+    }
			
 
				+    if (hashSet) {
			
 
				+        ZSTD_customFree(hashSet, customMem);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+/* Public function: Adds a DDict into the ZSTD_DDictHashSet, possibly triggering a resize of the hash set.
			
 
				+ * Returns 0 on success, or a ZSTD error.
			
 
				+ */
			
 
				+static size_t ZSTD_DDictHashSet_addDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict, ZSTD_customMem customMem) {
			
 
				+    DEBUGLOG(4, "Adding dict ID: %u to hashset with - Count: %zu Tablesize: %zu", ZSTD_getDictID_fromDDict(ddict), hashSet->ddictPtrCount, hashSet->ddictPtrTableSize);
			
 
				+    if (hashSet->ddictPtrCount * DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT / hashSet->ddictPtrTableSize * DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT != 0) {
			
 
				+        FORWARD_IF_ERROR(ZSTD_DDictHashSet_expand(hashSet, customMem), "");
			
 
				+    }
			
 
				+    FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, ddict), "");
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				 /*-*************************************************************
			
 
				 *   Context management
			
 
				 ***************************************************************/
			
@@ -94,11 +233,19 @@ static size_t ZSTD_startingInputLength(ZSTD_format_e format)
 
				     return startingInputLength;
			
 
				 }
			
 
				 
			
 
				+static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx)
			
 
				+{
			
 
				+    assert(dctx->streamStage == zdss_init);
			
 
				+    dctx->format = ZSTD_f_zstd1;
			
 
				+    dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
			
 
				+    dctx->outBufferMode = ZSTD_bm_buffered;
			
 
				+    dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum;
			
 
				+    dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict;
			
 
				+}
			
 
				+
			
 
				 static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
			
 
				 {
			
 
				-    dctx->format = ZSTD_f_zstd1;  /* ZSTD_decompressBegin() invokes ZSTD_startingInputLength() with argument dctx->format */
			
 
				     dctx->staticSize  = 0;
			
 
				-    dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
			
 
				     dctx->ddict       = NULL;
			
 
				     dctx->ddictLocal  = NULL;
			
 
				     dctx->dictEnd     = NULL;
			
@@ -113,7 +260,8 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
 
				     dctx->noForwardProgress = 0;
			
 
				     dctx->oversizedDuration = 0;
			
 
				     dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
			
 
				-    dctx->outBufferMode = ZSTD_obm_buffered;
			
 
				+    dctx->ddictSet = NULL;
			
 
				+    ZSTD_DCtx_resetParameters(dctx);
			
 
				 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
			
 
				     dctx->dictContentEndForFuzzing = NULL;
			
 
				 #endif
			
@@ -134,9 +282,9 @@ ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
 
				 
			
 
				 ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
			
 
				 {
			
 
				-    if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
			
 
				+    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
			
 
				 
			
 
				-    {   ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_malloc(sizeof(*dctx), customMem);
			
 
				+    {   ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_customMalloc(sizeof(*dctx), customMem);
			
 
				         if (!dctx) return NULL;
			
 
				         dctx->customMem = customMem;
			
 
				         ZSTD_initDCtx_internal(dctx);
			
@@ -164,13 +312,17 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
 
				     RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx");
			
 
				     {   ZSTD_customMem const cMem = dctx->customMem;
			
 
				         ZSTD_clearDict(dctx);
			
 
				-        ZSTD_free(dctx->inBuff, cMem);
			
 
				+        ZSTD_customFree(dctx->inBuff, cMem);
			
 
				         dctx->inBuff = NULL;
			
 
				 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
			
 
				         if (dctx->legacyContext)
			
 
				             ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion);
			
 
				 #endif
			
 
				-        ZSTD_free(dctx, cMem);
			
 
				+        if (dctx->ddictSet) {
			
 
				+            ZSTD_freeDDictHashSet(dctx->ddictSet, cMem);
			
 
				+            dctx->ddictSet = NULL;
			
 
				+        }
			
 
				+        ZSTD_customFree(dctx, cMem);
			
 
				         return 0;
			
 
				     }
			
 
				 }
			
@@ -179,7 +331,30 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
 
				 void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
			
 
				 {
			
 
				     size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx);
			
 
				-    memcpy(dstDCtx, srcDCtx, toCopy);  /* no need to copy workspace */
			
 
				+    ZSTD_memcpy(dstDCtx, srcDCtx, toCopy);  /* no need to copy workspace */
			
 
				+}
			
 
				+
			
 
				+/* Given a dctx with a digested frame params, re-selects the correct ZSTD_DDict based on
			
 
				+ * the requested dict ID from the frame. If there exists a reference to the correct ZSTD_DDict, then
			
 
				+ * accordingly sets the ddict to be used to decompress the frame.
			
 
				+ *
			
 
				+ * If no DDict is found, then no action is taken, and the ZSTD_DCtx::ddict remains as-is.
			
 
				+ *
			
 
				+ * ZSTD_d_refMultipleDDicts must be enabled for this function to be called.
			
 
				+ */
			
 
				+static void ZSTD_DCtx_selectFrameDDict(ZSTD_DCtx* dctx) {
			
 
				+    assert(dctx->refMultipleDDicts && dctx->ddictSet);
			
 
				+    DEBUGLOG(4, "Adjusting DDict based on requested dict ID from frame");
			
 
				+    if (dctx->ddict) {
			
 
				+        const ZSTD_DDict* frameDDict = ZSTD_DDictHashSet_getDDict(dctx->ddictSet, dctx->fParams.dictID);
			
 
				+        if (frameDDict) {
			
 
				+            DEBUGLOG(4, "DDict found!");
			
 
				+            ZSTD_clearDict(dctx);
			
 
				+            dctx->dictID = dctx->fParams.dictID;
			
 
				+            dctx->ddict = frameDDict;
			
 
				+            dctx->dictUses = ZSTD_use_indefinitely;
			
 
				+        }
			
 
				+    }
			
 
				 }
			
 
				 
			
 
				 
			
@@ -246,7 +421,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
 
				     const BYTE* ip = (const BYTE*)src;
			
 
				     size_t const minInputSize = ZSTD_startingInputLength(format);
			
 
				 
			
 
				-    memset(zfhPtr, 0, sizeof(*zfhPtr));   /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */
			
 
				+    ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr));   /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */
			
 
				     if (srcSize < minInputSize) return minInputSize;
			
 
				     RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter");
			
 
				 
			
@@ -256,7 +431,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
 
				             /* skippable frame */
			
 
				             if (srcSize < ZSTD_SKIPPABLEHEADERSIZE)
			
 
				                 return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */
			
 
				-            memset(zfhPtr, 0, sizeof(*zfhPtr));
			
 
				+            ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr));
			
 
				             zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE);
			
 
				             zfhPtr->frameType = ZSTD_skippableFrame;
			
 
				             return 0;
			
@@ -433,12 +608,19 @@ unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize)
 
				 
			
 
				 /** ZSTD_decodeFrameHeader() :
			
 
				  * `headerSize` must be the size provided by ZSTD_frameHeaderSize().
			
 
				+ * If multiple DDict references are enabled, also will choose the correct DDict to use.
			
 
				  * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
			
 
				 static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize)
			
 
				 {
			
 
				     size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format);
			
 
				     if (ZSTD_isError(result)) return result;    /* invalid header */
			
 
				     RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small");
			
 
				+
			
 
				+    /* Reference DDict requested by frame if dctx references multiple ddicts */
			
 
				+    if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts && dctx->ddictSet) {
			
 
				+        ZSTD_DCtx_selectFrameDDict(dctx);
			
 
				+    }
			
 
				+
			
 
				 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
			
 
				     /* Skip the dictID check in fuzzing mode, because it makes the search
			
 
				      * harder.
			
@@ -446,7 +628,9 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he
 
				     RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID),
			
 
				                     dictionary_wrong, "");
			
 
				 #endif
			
 
				-    if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0);
			
 
				+    dctx->validateChecksum = (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) ? 1 : 0;
			
 
				+    if (dctx->validateChecksum) XXH64_reset(&dctx->xxhState, 0);
			
 
				+    dctx->processedCSize += headerSize;
			
 
				     return 0;
			
 
				 }
			
 
				 
			
@@ -461,7 +645,7 @@ static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret)
 
				 static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize)
			
 
				 {
			
 
				     ZSTD_frameSizeInfo frameSizeInfo;
			
 
				-    memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo));
			
 
				+    ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo));
			
 
				 
			
 
				 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
			
 
				     if (ZSTD_isLegacy(src, srcSize))
			
@@ -516,7 +700,7 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize
 
				             ip += 4;
			
 
				         }
			
 
				 
			
 
				-        frameSizeInfo.compressedSize = ip - ipstart;
			
 
				+        frameSizeInfo.compressedSize = (size_t)(ip - ipstart);
			
 
				         frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN)
			
 
				                                         ? zfh.frameContentSize
			
 
				                                         : nbBlocks * zfh.blockSizeMax;
			
@@ -569,7 +753,7 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
 
				 size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize)
			
 
				 {
			
 
				     DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize);
			
 
				-    ZSTD_checkContinuity(dctx, blockStart);
			
 
				+    ZSTD_checkContinuity(dctx, blockStart, blockSize);
			
 
				     dctx->previousDstEnd = (const char*)blockStart + blockSize;
			
 
				     return blockSize;
			
 
				 }
			
@@ -579,12 +763,12 @@ static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
 
				                           const void* src, size_t srcSize)
			
 
				 {
			
 
				     DEBUGLOG(5, "ZSTD_copyRawBlock");
			
 
				+    RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, "");
			
 
				     if (dst == NULL) {
			
 
				         if (srcSize == 0) return 0;
			
 
				         RETURN_ERROR(dstBuffer_null, "");
			
 
				     }
			
 
				-    RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, "");
			
 
				-    memcpy(dst, src, srcSize);
			
 
				+    ZSTD_memcpy(dst, src, srcSize);
			
 
				     return srcSize;
			
 
				 }
			
 
				 
			
@@ -592,15 +776,41 @@ static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity,
 
				                                BYTE b,
			
 
				                                size_t regenSize)
			
 
				 {
			
 
				+    RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, "");
			
 
				     if (dst == NULL) {
			
 
				         if (regenSize == 0) return 0;
			
 
				         RETURN_ERROR(dstBuffer_null, "");
			
 
				     }
			
 
				-    RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, "");
			
 
				-    memset(dst, b, regenSize);
			
 
				+    ZSTD_memset(dst, b, regenSize);
			
 
				     return regenSize;
			
 
				 }
			
 
				 
			
 
				+static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, unsigned streaming)
			
 
				+{
			
 
				+#if ZSTD_TRACE
			
 
				+    if (dctx->traceCtx && ZSTD_trace_decompress_end != NULL) {
			
 
				+        ZSTD_Trace trace;
			
 
				+        ZSTD_memset(&trace, 0, sizeof(trace));
			
 
				+        trace.version = ZSTD_VERSION_NUMBER;
			
 
				+        trace.streaming = streaming;
			
 
				+        if (dctx->ddict) {
			
 
				+            trace.dictionaryID = ZSTD_getDictID_fromDDict(dctx->ddict);
			
 
				+            trace.dictionarySize = ZSTD_DDict_dictSize(dctx->ddict);
			
 
				+            trace.dictionaryIsCold = dctx->ddictIsCold;
			
 
				+        }
			
 
				+        trace.uncompressedSize = (size_t)uncompressedSize;
			
 
				+        trace.compressedSize = (size_t)compressedSize;
			
 
				+        trace.dctx = dctx;
			
 
				+        ZSTD_trace_decompress_end(dctx->traceCtx, &trace);
			
 
				+    }
			
 
				+#else
			
 
				+    (void)dctx;
			
 
				+    (void)uncompressedSize;
			
 
				+    (void)compressedSize;
			
 
				+    (void)streaming;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				 
			
 
				 /*! ZSTD_decompressFrame() :
			
 
				  * @dctx must be properly initialized
			
@@ -610,8 +820,9 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
 
				                                    void* dst, size_t dstCapacity,
			
 
				                              const void** srcPtr, size_t *srcSizePtr)
			
 
				 {
			
 
				-    const BYTE* ip = (const BYTE*)(*srcPtr);
			
 
				-    BYTE* const ostart = (BYTE* const)dst;
			
 
				+    const BYTE* const istart = (const BYTE*)(*srcPtr);
			
 
				+    const BYTE* ip = istart;
			
 
				+    BYTE* const ostart = (BYTE*)dst;
			
 
				     BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart;
			
 
				     BYTE* op = ostart;
			
 
				     size_t remainingSrcSize = *srcSizePtr;
			
@@ -647,13 +858,13 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
 
				         switch(blockProperties.blockType)
			
 
				         {
			
 
				         case bt_compressed:
			
 
				-            decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize, /* frame */ 1);
			
 
				+            decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1);
			
 
				             break;
			
 
				         case bt_raw :
			
 
				-            decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize);
			
 
				+            decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize);
			
 
				             break;
			
 
				         case bt_rle :
			
 
				-            decodedSize = ZSTD_setRleBlock(op, oend-op, *ip, blockProperties.origSize);
			
 
				+            decodedSize = ZSTD_setRleBlock(op, (size_t)(oend-op), *ip, blockProperties.origSize);
			
 
				             break;
			
 
				         case bt_reserved :
			
 
				         default:
			
@@ -661,7 +872,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
 
				         }
			
 
				 
			
 
				         if (ZSTD_isError(decodedSize)) return decodedSize;
			
 
				-        if (dctx->fParams.checksumFlag)
			
 
				+        if (dctx->validateChecksum)
			
 
				             XXH64_update(&dctx->xxhState, op, decodedSize);
			
 
				         if (decodedSize != 0)
			
 
				             op += decodedSize;
			
@@ -676,19 +887,21 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
 
				                         corruption_detected, "");
			
 
				     }
			
 
				     if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */
			
 
				-        U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState);
			
 
				-        U32 checkRead;
			
 
				         RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, "");
			
 
				-        checkRead = MEM_readLE32(ip);
			
 
				-        RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, "");
			
 
				+        if (!dctx->forceIgnoreChecksum) {
			
 
				+            U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState);
			
 
				+            U32 checkRead;
			
 
				+            checkRead = MEM_readLE32(ip);
			
 
				+            RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, "");
			
 
				+        }
			
 
				         ip += 4;
			
 
				         remainingSrcSize -= 4;
			
 
				     }
			
 
				-
			
 
				+    ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0);
			
 
				     /* Allow caller to get size read */
			
 
				     *srcPtr = ip;
			
 
				     *srcSizePtr = remainingSrcSize;
			
 
				-    return op-ostart;
			
 
				+    return (size_t)(op-ostart);
			
 
				 }
			
 
				 
			
 
				 static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
			
@@ -721,7 +934,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
 
				             decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize);
			
 
				             if (ZSTD_isError(decodedSize)) return decodedSize;
			
 
				 
			
 
				-            assert(decodedSize <=- dstCapacity);
			
 
				+            assert(decodedSize <= dstCapacity);
			
 
				             dst = (BYTE*)dst + decodedSize;
			
 
				             dstCapacity -= decodedSize;
			
 
				 
			
@@ -753,7 +966,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
 
				              * use this in all cases but ddict */
			
 
				             FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), "");
			
 
				         }
			
 
				-        ZSTD_checkContinuity(dctx, dst);
			
 
				+        ZSTD_checkContinuity(dctx, dst, dstCapacity);
			
 
				 
			
 
				         {   const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity,
			
 
				                                                     &src, &srcSize);
			
@@ -761,15 +974,13 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
 
				                 (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown)
			
 
				              && (moreThan1Frame==1),
			
 
				                 srcSize_wrong,
			
 
				-                "at least one frame successfully completed, but following "
			
 
				-                "bytes are garbage: it's more likely to be a srcSize error, "
			
 
				-                "specifying more bytes than compressed size of frame(s). This "
			
 
				-                "error message replaces ERROR(prefix_unknown), which would be "
			
 
				-                "confusing, as the first header is actually correct. Note that "
			
 
				-                "one could be unlucky, it might be a corruption error instead, "
			
 
				-                "happening right at the place where we expect zstd magic "
			
 
				-                "bytes. But this is _much_ less likely than a srcSize field "
			
 
				-                "error.");
			
 
				+                "At least one frame successfully completed, "
			
 
				+                "but following bytes are garbage: "
			
 
				+                "it's more likely to be a srcSize error, "
			
 
				+                "specifying more input bytes than size of frame(s). "
			
 
				+                "Note: one could be unlucky, it might be a corruption error instead, "
			
 
				+                "happening right at the place where we expect zstd magic bytes. "
			
 
				+                "But this is _much_ less likely than a srcSize field error.");
			
 
				             if (ZSTD_isError(res)) return res;
			
 
				             assert(res <= dstCapacity);
			
 
				             if (res != 0)
			
@@ -781,7 +992,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
 
				 
			
 
				     RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed");
			
 
				 
			
 
				-    return (BYTE*)dst - (BYTE*)dststart;
			
 
				+    return (size_t)((BYTE*)dst - (BYTE*)dststart);
			
 
				 }
			
 
				 
			
 
				 size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
			
@@ -890,7 +1101,9 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
 
				     DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize);
			
 
				     /* Sanity check */
			
 
				     RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed");
			
 
				-    if (dstCapacity) ZSTD_checkContinuity(dctx, dst);
			
 
				+    ZSTD_checkContinuity(dctx, dst, dstCapacity);
			
 
				+
			
 
				+    dctx->processedCSize += srcSize;
			
 
				 
			
 
				     switch (dctx->stage)
			
 
				     {
			
@@ -899,21 +1112,21 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
 
				         if (dctx->format == ZSTD_f_zstd1) {  /* allows header */
			
 
				             assert(srcSize >= ZSTD_FRAMEIDSIZE);  /* to read skippable magic number */
			
 
				             if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {        /* skippable frame */
			
 
				-                memcpy(dctx->headerBuffer, src, srcSize);
			
 
				+                ZSTD_memcpy(dctx->headerBuffer, src, srcSize);
			
 
				                 dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize;  /* remaining to load to get full skippable frame header */
			
 
				                 dctx->stage = ZSTDds_decodeSkippableHeader;
			
 
				                 return 0;
			
 
				         }   }
			
 
				         dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format);
			
 
				         if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize;
			
 
				-        memcpy(dctx->headerBuffer, src, srcSize);
			
 
				+        ZSTD_memcpy(dctx->headerBuffer, src, srcSize);
			
 
				         dctx->expected = dctx->headerSize - srcSize;
			
 
				         dctx->stage = ZSTDds_decodeFrameHeader;
			
 
				         return 0;
			
 
				 
			
 
				     case ZSTDds_decodeFrameHeader:
			
 
				         assert(src != NULL);
			
 
				-        memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize);
			
 
				+        ZSTD_memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize);
			
 
				         FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), "");
			
 
				         dctx->expected = ZSTD_blockHeaderSize;
			
 
				         dctx->stage = ZSTDds_decodeBlockHeader;
			
@@ -977,7 +1190,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
 
				             RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum");
			
 
				             DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize);
			
 
				             dctx->decodedSize += rSize;
			
 
				-            if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize);
			
 
				+            if (dctx->validateChecksum) XXH64_update(&dctx->xxhState, dst, rSize);
			
 
				             dctx->previousDstEnd = (char*)dst + rSize;
			
 
				 
			
 
				             /* Stay on the same stage until we are finished streaming the block. */
			
@@ -995,6 +1208,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
 
				                     dctx->expected = 4;
			
 
				                     dctx->stage = ZSTDds_checkChecksum;
			
 
				                 } else {
			
 
				+                    ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1);
			
 
				                     dctx->expected = 0;   /* ends here */
			
 
				                     dctx->stage = ZSTDds_getFrameHeaderSize;
			
 
				                 }
			
@@ -1007,10 +1221,14 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
 
				 
			
 
				     case ZSTDds_checkChecksum:
			
 
				         assert(srcSize == 4);  /* guaranteed by dctx->expected */
			
 
				-        {   U32 const h32 = (U32)XXH64_digest(&dctx->xxhState);
			
 
				-            U32 const check32 = MEM_readLE32(src);
			
 
				-            DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32);
			
 
				-            RETURN_ERROR_IF(check32 != h32, checksum_wrong, "");
			
 
				+        {
			
 
				+            if (dctx->validateChecksum) {
			
 
				+                U32 const h32 = (U32)XXH64_digest(&dctx->xxhState);
			
 
				+                U32 const check32 = MEM_readLE32(src);
			
 
				+                DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32);
			
 
				+                RETURN_ERROR_IF(check32 != h32, checksum_wrong, "");
			
 
				+            }
			
 
				+            ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1);
			
 
				             dctx->expected = 0;
			
 
				             dctx->stage = ZSTDds_getFrameHeaderSize;
			
 
				             return 0;
			
@@ -1019,7 +1237,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
 
				     case ZSTDds_decodeSkippableHeader:
			
 
				         assert(src != NULL);
			
 
				         assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE);
			
 
				-        memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize);   /* complete skippable header */
			
 
				+        ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize);   /* complete skippable header */
			
 
				         dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE);   /* note : dctx->expected can grow seriously large, beyond local buffer size */
			
 
				         dctx->stage = ZSTDds_skipFrame;
			
 
				         return 0;
			
@@ -1075,7 +1293,7 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
 
				                                                 workspace, workspaceSize);
			
 
				 #else
			
 
				         size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable,
			
 
				-                                                dictPtr, dictEnd - dictPtr,
			
 
				+                                                dictPtr, (size_t)(dictEnd - dictPtr),
			
 
				                                                 workspace, workspaceSize);
			
 
				 #endif
			
 
				         RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, "");
			
@@ -1084,40 +1302,46 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
 
				 
			
 
				     {   short offcodeNCount[MaxOff+1];
			
 
				         unsigned offcodeMaxValue = MaxOff, offcodeLog;
			
 
				-        size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
			
 
				+        size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, (size_t)(dictEnd-dictPtr));
			
 
				         RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");
			
 
				         RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, "");
			
 
				         RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");
			
 
				         ZSTD_buildFSETable( entropy->OFTable,
			
 
				                             offcodeNCount, offcodeMaxValue,
			
 
				                             OF_base, OF_bits,
			
 
				-                            offcodeLog);
			
 
				+                            offcodeLog,
			
 
				+                            entropy->workspace, sizeof(entropy->workspace),
			
 
				+                            /* bmi2 */0);
			
 
				         dictPtr += offcodeHeaderSize;
			
 
				     }
			
 
				 
			
 
				     {   short matchlengthNCount[MaxML+1];
			
 
				         unsigned matchlengthMaxValue = MaxML, matchlengthLog;
			
 
				-        size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
			
 
				+        size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, (size_t)(dictEnd-dictPtr));
			
 
				         RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");
			
 
				         RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, "");
			
 
				         RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");
			
 
				         ZSTD_buildFSETable( entropy->MLTable,
			
 
				                             matchlengthNCount, matchlengthMaxValue,
			
 
				                             ML_base, ML_bits,
			
 
				-                            matchlengthLog);
			
 
				+                            matchlengthLog,
			
 
				+                            entropy->workspace, sizeof(entropy->workspace),
			
 
				+                            /* bmi2 */ 0);
			
 
				         dictPtr += matchlengthHeaderSize;
			
 
				     }
			
 
				 
			
 
				     {   short litlengthNCount[MaxLL+1];
			
 
				         unsigned litlengthMaxValue = MaxLL, litlengthLog;
			
 
				-        size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
			
 
				+        size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, (size_t)(dictEnd-dictPtr));
			
 
				         RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");
			
 
				         RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, "");
			
 
				         RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");
			
 
				         ZSTD_buildFSETable( entropy->LLTable,
			
 
				                             litlengthNCount, litlengthMaxValue,
			
 
				                             LL_base, LL_bits,
			
 
				-                            litlengthLog);
			
 
				+                            litlengthLog,
			
 
				+                            entropy->workspace, sizeof(entropy->workspace),
			
 
				+                            /* bmi2 */ 0);
			
 
				         dictPtr += litlengthHeaderSize;
			
 
				     }
			
 
				 
			
@@ -1131,7 +1355,7 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
 
				             entropy->rep[i] = rep;
			
 
				     }   }
			
 
				 
			
 
				-    return dictPtr - (const BYTE*)dict;
			
 
				+    return (size_t)(dictPtr - (const BYTE*)dict);
			
 
				 }
			
 
				 
			
 
				 static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
			
@@ -1158,8 +1382,12 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict
 
				 size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
			
 
				 {
			
 
				     assert(dctx != NULL);
			
 
				+#if ZSTD_TRACE
			
 
				+    dctx->traceCtx = (ZSTD_trace_decompress_begin != NULL) ? ZSTD_trace_decompress_begin(dctx) : 0;
			
 
				+#endif
			
 
				     dctx->expected = ZSTD_startingInputLength(dctx->format);  /* dctx->format must be properly set */
			
 
				     dctx->stage = ZSTDds_getFrameHeaderSize;
			
 
				+    dctx->processedCSize = 0;
			
 
				     dctx->decodedSize = 0;
			
 
				     dctx->previousDstEnd = NULL;
			
 
				     dctx->prefixStart = NULL;
			
@@ -1170,7 +1398,7 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
 
				     dctx->dictID = 0;
			
 
				     dctx->bType = bt_reserved;
			
 
				     ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
			
 
				-    memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue));  /* initial repcodes */
			
 
				+    ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue));  /* initial repcodes */
			
 
				     dctx->LLTptr = dctx->entropy.LLTable;
			
 
				     dctx->MLTptr = dctx->entropy.MLTable;
			
 
				     dctx->OFTptr = dctx->entropy.OFTable;
			
@@ -1373,6 +1601,16 @@ size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
 
				     if (ddict) {
			
 
				         dctx->ddict = ddict;
			
 
				         dctx->dictUses = ZSTD_use_indefinitely;
			
 
				+        if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts) {
			
 
				+            if (dctx->ddictSet == NULL) {
			
 
				+                dctx->ddictSet = ZSTD_createDDictHashSet(dctx->customMem);
			
 
				+                if (!dctx->ddictSet) {
			
 
				+                    RETURN_ERROR(memory_allocation, "Failed to allocate memory for hash set!");
			
 
				+                }
			
 
				+            }
			
 
				+            assert(!dctx->staticSize);  /* Impossible: ddictSet cannot have been allocated if static dctx */
			
 
				+            FORWARD_IF_ERROR(ZSTD_DDictHashSet_addDDict(dctx->ddictSet, ddict, dctx->customMem), "");
			
 
				+        }
			
 
				     }
			
 
				     return 0;
			
 
				 }
			
@@ -1394,7 +1632,7 @@ size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
 
				 
			
 
				 size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format)
			
 
				 {
			
 
				-    return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, format);
			
 
				+    return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, (int)format);
			
 
				 }
			
 
				 
			
 
				 ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
			
@@ -1411,8 +1649,16 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
 
				             ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
			
 
				             return bounds;
			
 
				         case ZSTD_d_stableOutBuffer:
			
 
				-            bounds.lowerBound = (int)ZSTD_obm_buffered;
			
 
				-            bounds.upperBound = (int)ZSTD_obm_stable;
			
 
				+            bounds.lowerBound = (int)ZSTD_bm_buffered;
			
 
				+            bounds.upperBound = (int)ZSTD_bm_stable;
			
 
				+            return bounds;
			
 
				+        case ZSTD_d_forceIgnoreChecksum:
			
 
				+            bounds.lowerBound = (int)ZSTD_d_validateChecksum;
			
 
				+            bounds.upperBound = (int)ZSTD_d_ignoreChecksum;
			
 
				+            return bounds;
			
 
				+        case ZSTD_d_refMultipleDDicts:
			
 
				+            bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict;
			
 
				+            bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts;
			
 
				             return bounds;
			
 
				         default:;
			
 
				     }
			
@@ -1436,6 +1682,29 @@ static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value)
 
				     RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \
			
 
				 }
			
 
				 
			
 
				+size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value)
			
 
				+{
			
 
				+    switch (param) {
			
 
				+        case ZSTD_d_windowLogMax:
			
 
				+            *value = (int)ZSTD_highbit32((U32)dctx->maxWindowSize);
			
 
				+            return 0;
			
 
				+        case ZSTD_d_format:
			
 
				+            *value = (int)dctx->format;
			
 
				+            return 0;
			
 
				+        case ZSTD_d_stableOutBuffer:
			
 
				+            *value = (int)dctx->outBufferMode;
			
 
				+            return 0;
			
 
				+        case ZSTD_d_forceIgnoreChecksum:
			
 
				+            *value = (int)dctx->forceIgnoreChecksum;
			
 
				+            return 0;
			
 
				+        case ZSTD_d_refMultipleDDicts:
			
 
				+            *value = (int)dctx->refMultipleDDicts;
			
 
				+            return 0;
			
 
				+        default:;
			
 
				+    }
			
 
				+    RETURN_ERROR(parameter_unsupported, "");
			
 
				+}
			
 
				+
			
 
				 size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value)
			
 
				 {
			
 
				     RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
			
@@ -1451,7 +1720,18 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value
 
				             return 0;
			
 
				         case ZSTD_d_stableOutBuffer:
			
 
				             CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value);
			
 
				-            dctx->outBufferMode = (ZSTD_outBufferMode_e)value;
			
 
				+            dctx->outBufferMode = (ZSTD_bufferMode_e)value;
			
 
				+            return 0;
			
 
				+        case ZSTD_d_forceIgnoreChecksum:
			
 
				+            CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value);
			
 
				+            dctx->forceIgnoreChecksum = (ZSTD_forceIgnoreChecksum_e)value;
			
 
				+            return 0;
			
 
				+        case ZSTD_d_refMultipleDDicts:
			
 
				+            CHECK_DBOUNDS(ZSTD_d_refMultipleDDicts, value);
			
 
				+            if (dctx->staticSize != 0) {
			
 
				+                RETURN_ERROR(parameter_unsupported, "Static dctx does not support multiple DDicts!");
			
 
				+            }
			
 
				+            dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value;
			
 
				             return 0;
			
 
				         default:;
			
 
				     }
			
@@ -1469,8 +1749,7 @@ size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset)
 
				       || (reset == ZSTD_reset_session_and_parameters) ) {
			
 
				         RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
			
 
				         ZSTD_clearDict(dctx);
			
 
				-        dctx->format = ZSTD_f_zstd1;
			
 
				-        dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
			
 
				+        ZSTD_DCtx_resetParameters(dctx);
			
 
				     }
			
 
				     return 0;
			
 
				 }
			
@@ -1524,7 +1803,7 @@ static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const ne
 
				 {
			
 
				     if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize))
			
 
				         zds->oversizedDuration++;
			
 
				-    else 
			
 
				+    else
			
 
				         zds->oversizedDuration = 0;
			
 
				 }
			
 
				 
			
@@ -1538,7 +1817,7 @@ static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const*
 
				 {
			
 
				     ZSTD_outBuffer const expect = zds->expectedOutBuffer;
			
 
				     /* No requirement when ZSTD_obm_stable is not enabled. */
			
 
				-    if (zds->outBufferMode != ZSTD_obm_stable)
			
 
				+    if (zds->outBufferMode != ZSTD_bm_stable)
			
 
				         return 0;
			
 
				     /* Any buffer is allowed in zdss_init, this must be the same for every other call until
			
 
				      * the context is reset.
			
@@ -1548,7 +1827,7 @@ static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const*
 
				     /* The buffer must match our expectation exactly. */
			
 
				     if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size)
			
 
				         return 0;
			
 
				-    RETURN_ERROR(dstBuffer_wrong, "ZSTD_obm_stable enabled but output differs!");
			
 
				+    RETURN_ERROR(dstBuffer_wrong, "ZSTD_d_stableOutBuffer enabled but output differs!");
			
 
				 }
			
 
				 
			
 
				 /* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream()
			
@@ -1560,7 +1839,7 @@ static size_t ZSTD_decompressContinueStream(
 
				             ZSTD_DStream* zds, char** op, char* oend,
			
 
				             void const* src, size_t srcSize) {
			
 
				     int const isSkipFrame = ZSTD_isSkipFrame(zds);
			
 
				-    if (zds->outBufferMode == ZSTD_obm_buffered) {
			
 
				+    if (zds->outBufferMode == ZSTD_bm_buffered) {
			
 
				         size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart;
			
 
				         size_t const decodedSize = ZSTD_decompressContinue(zds,
			
 
				                 zds->outBuff + zds->outStart, dstSize, src, srcSize);
			
@@ -1573,14 +1852,14 @@ static size_t ZSTD_decompressContinueStream(
 
				         }
			
 
				     } else {
			
 
				         /* Write directly into the output buffer */
			
 
				-        size_t const dstSize = isSkipFrame ? 0 : oend - *op;
			
 
				+        size_t const dstSize = isSkipFrame ? 0 : (size_t)(oend - *op);
			
 
				         size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize);
			
 
				         FORWARD_IF_ERROR(decodedSize, "");
			
 
				         *op += decodedSize;
			
 
				         /* Flushing is not needed. */
			
 
				         zds->streamStage = zdss_read;
			
 
				         assert(*op <= oend);
			
 
				-        assert(zds->outBufferMode == ZSTD_obm_stable);
			
 
				+        assert(zds->outBufferMode == ZSTD_bm_stable);
			
 
				     }
			
 
				     return 0;
			
 
				 }
			
@@ -1635,6 +1914,9 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
 
				             }   }
			
 
				 #endif
			
 
				             {   size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format);
			
 
				+                if (zds->refMultipleDDicts && zds->ddictSet) {
			
 
				+                    ZSTD_DCtx_selectFrameDDict(zds);
			
 
				+                }
			
 
				                 DEBUGLOG(5, "header size : %u", (U32)hSize);
			
 
				                 if (ZSTD_isError(hSize)) {
			
 
				 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
			
@@ -1663,14 +1945,14 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
 
				                     assert(iend >= ip);
			
 
				                     if (toLoad > remainingInput) {   /* not enough input to load full header */
			
 
				                         if (remainingInput > 0) {
			
 
				-                            memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput);
			
 
				+                            ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput);
			
 
				                             zds->lhSize += remainingInput;
			
 
				                         }
			
 
				                         input->pos = input->size;
			
 
				                         return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize;   /* remaining header bytes + next block header */
			
 
				                     }
			
 
				                     assert(ip != NULL);
			
 
				-                    memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
			
 
				+                    ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
			
 
				                     break;
			
 
				             }   }
			
 
				 
			
@@ -1678,10 +1960,10 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
 
				             if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
			
 
				                 && zds->fParams.frameType != ZSTD_skippableFrame
			
 
				                 && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) {
			
 
				-                size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart);
			
 
				+                size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart));
			
 
				                 if (cSize <= (size_t)(iend-istart)) {
			
 
				                     /* shortcut : using single-pass mode */
			
 
				-                    size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, ZSTD_getDDict(zds));
			
 
				+                    size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds));
			
 
				                     if (ZSTD_isError(decompressedSize)) return decompressedSize;
			
 
				                     DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()")
			
 
				                     ip = istart + cSize;
			
@@ -1693,7 +1975,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
 
				             }   }
			
 
				 
			
 
				             /* Check output buffer is large enough for ZSTD_odm_stable. */
			
 
				-            if (zds->outBufferMode == ZSTD_obm_stable
			
 
				+            if (zds->outBufferMode == ZSTD_bm_stable
			
 
				                 && zds->fParams.frameType != ZSTD_skippableFrame
			
 
				                 && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
			
 
				                 && (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) {
			
@@ -1723,7 +2005,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
 
				 
			
 
				             /* Adapt buffer sizes to frame header instructions */
			
 
				             {   size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */);
			
 
				-                size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_obm_buffered
			
 
				+                size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered
			
 
				                         ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize)
			
 
				                         : 0;
			
 
				 
			
@@ -1731,7 +2013,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
 
				 
			
 
				                 {   int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize);
			
 
				                     int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds);
			
 
				-                    
			
 
				+
			
 
				                     if (tooSmall || tooLarge) {
			
 
				                         size_t const bufferSize = neededInBuffSize + neededOutBuffSize;
			
 
				                         DEBUGLOG(4, "inBuff  : from %u to %u",
			
@@ -1745,10 +2027,10 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
 
				                                 bufferSize > zds->staticSize - sizeof(ZSTD_DCtx),
			
 
				                                 memory_allocation, "");
			
 
				                         } else {
			
 
				-                            ZSTD_free(zds->inBuff, zds->customMem);
			
 
				+                            ZSTD_customFree(zds->inBuff, zds->customMem);
			
 
				                             zds->inBuffSize = 0;
			
 
				                             zds->outBuffSize = 0;
			
 
				-                            zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem);
			
 
				+                            zds->inBuff = (char*)ZSTD_customMalloc(bufferSize, zds->customMem);
			
 
				                             RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, "");
			
 
				                         }
			
 
				                         zds->inBuffSize = neededInBuffSize;
			
@@ -1760,7 +2042,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
 
				 
			
 
				         case zdss_read:
			
 
				             DEBUGLOG(5, "stage zdss_read");
			
 
				-            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip);
			
 
				+            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip));
			
 
				                 DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize);
			
 
				                 if (neededInSize==0) {  /* end of frame */
			
 
				                     zds->streamStage = zdss_init;
			
@@ -1790,7 +2072,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
 
				                     RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos,
			
 
				                                     corruption_detected,
			
 
				                                     "should never happen");
			
 
				-                    loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip);
			
 
				+                    loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip));
			
 
				                 }
			
 
				                 ip += loadedSize;
			
 
				                 zds->inPos += loadedSize;
			
@@ -1804,7 +2086,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
 
				             }
			
 
				         case zdss_flush:
			
 
				             {   size_t const toFlushSize = zds->outEnd - zds->outStart;
			
 
				-                size_t const flushedSize = ZSTD_limitCopy(op, oend-op, zds->outBuff + zds->outStart, toFlushSize);
			
 
				+                size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize);
			
 
				                 op += flushedSize;
			
 
				                 zds->outStart += flushedSize;
			
 
				                 if (flushedSize == toFlushSize) {  /* flush completed */
			
--- a/Utilities/cmzstd/lib/decompress/zstd_decompress_block.c
+++ b/Utilities/cmzstd/lib/decompress/zstd_decompress_block.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -14,7 +14,7 @@
 
				 /*-*******************************************************
			
 
				 *  Dependencies
			
 
				 *********************************************************/
			
 
				-#include <string.h>      /* memcpy, memmove, memset */
			
 
				+#include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
			
 
				 #include "../common/compiler.h"    /* prefetch */
			
 
				 #include "../common/cpu.h"         /* bmi2 */
			
 
				 #include "../common/mem.h"         /* low level memory routines */
			
@@ -44,7 +44,7 @@
 
				 /*_*******************************************************
			
 
				 *  Memory operations
			
 
				 **********************************************************/
			
 
				-static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
			
 
				+static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
			
 
				 
			
 
				 
			
 
				 /*-*************************************************************
			
@@ -166,7 +166,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
 
				                 dctx->litSize = litSize;
			
 
				                 dctx->litEntropy = 1;
			
 
				                 if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
			
 
				-                memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
			
 
				+                ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
			
 
				                 return litCSize + lhSize;
			
 
				             }
			
 
				 
			
@@ -191,10 +191,10 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
 
				 
			
 
				                 if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
			
 
				                     RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
			
 
				-                    memcpy(dctx->litBuffer, istart+lhSize, litSize);
			
 
				+                    ZSTD_memcpy(dctx->litBuffer, istart+lhSize, litSize);
			
 
				                     dctx->litPtr = dctx->litBuffer;
			
 
				                     dctx->litSize = litSize;
			
 
				-                    memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
			
 
				+                    ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
			
 
				                     return lhSize+litSize;
			
 
				                 }
			
 
				                 /* direct reference into compressed stream */
			
@@ -223,7 +223,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
 
				                     break;
			
 
				                 }
			
 
				                 RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
			
 
				-                memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
			
 
				+                ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
			
 
				                 dctx->litPtr = dctx->litBuffer;
			
 
				                 dctx->litSize = litSize;
			
 
				                 return lhSize+1;
			
@@ -236,7 +236,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
 
				 
			
 
				 /* Default FSE distribution tables.
			
 
				  * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
			
 
				- * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions
			
 
				+ * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
			
 
				  * They were generated programmatically with following method :
			
 
				  * - start from default distributions, present in /lib/common/zstd_internal.h
			
 
				  * - generate tables normally, using ZSTD_buildFSETable()
			
@@ -364,23 +364,26 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB
 
				  * generate FSE decoding table for one symbol (ll, ml or off)
			
 
				  * cannot fail if input is valid =>
			
 
				  * all inputs are presumed validated at this stage */
			
 
				-void
			
 
				-ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
			
 
				+FORCE_INLINE_TEMPLATE
			
 
				+void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
			
 
				             const short* normalizedCounter, unsigned maxSymbolValue,
			
 
				             const U32* baseValue, const U32* nbAdditionalBits,
			
 
				-            unsigned tableLog)
			
 
				+            unsigned tableLog, void* wksp, size_t wkspSize)
			
 
				 {
			
 
				     ZSTD_seqSymbol* const tableDecode = dt+1;
			
 
				-    U16 symbolNext[MaxSeq+1];
			
 
				-
			
 
				     U32 const maxSV1 = maxSymbolValue + 1;
			
 
				     U32 const tableSize = 1 << tableLog;
			
 
				-    U32 highThreshold = tableSize-1;
			
 
				+
			
 
				+    U16* symbolNext = (U16*)wksp;
			
 
				+    BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
			
 
				+    U32 highThreshold = tableSize - 1;
			
 
				+
			
 
				 
			
 
				     /* Sanity Checks */
			
 
				     assert(maxSymbolValue <= MaxSeq);
			
 
				     assert(tableLog <= MaxFSELog);
			
 
				-
			
 
				+    assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
			
 
				+    (void)wkspSize;
			
 
				     /* Init, lay down lowprob symbols */
			
 
				     {   ZSTD_seqSymbol_header DTableH;
			
 
				         DTableH.tableLog = tableLog;
			
@@ -396,16 +399,69 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
 
				                     assert(normalizedCounter[s]>=0);
			
 
				                     symbolNext[s] = (U16)normalizedCounter[s];
			
 
				         }   }   }
			
 
				-        memcpy(dt, &DTableH, sizeof(DTableH));
			
 
				+        ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
			
 
				     }
			
 
				 
			
 
				     /* Spread symbols */
			
 
				-    {   U32 const tableMask = tableSize-1;
			
 
				+    assert(tableSize <= 512);
			
 
				+    /* Specialized symbol spreading for the case when there are
			
 
				+     * no low probability (-1 count) symbols. When compressing
			
 
				+     * small blocks we avoid low probability symbols to hit this
			
 
				+     * case, since header decoding speed matters more.
			
 
				+     */
			
 
				+    if (highThreshold == tableSize - 1) {
			
 
				+        size_t const tableMask = tableSize-1;
			
 
				+        size_t const step = FSE_TABLESTEP(tableSize);
			
 
				+        /* First lay down the symbols in order.
			
 
				+         * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
			
 
				+         * misses since small blocks generally have small table logs, so nearly
			
 
				+         * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
			
 
				+         * our buffer to handle the over-write.
			
 
				+         */
			
 
				+        {
			
 
				+            U64 const add = 0x0101010101010101ull;
			
 
				+            size_t pos = 0;
			
 
				+            U64 sv = 0;
			
 
				+            U32 s;
			
 
				+            for (s=0; s<maxSV1; ++s, sv += add) {
			
 
				+                int i;
			
 
				+                int const n = normalizedCounter[s];
			
 
				+                MEM_write64(spread + pos, sv);
			
 
				+                for (i = 8; i < n; i += 8) {
			
 
				+                    MEM_write64(spread + pos + i, sv);
			
 
				+                }
			
 
				+                pos += n;
			
 
				+            }
			
 
				+        }
			
 
				+        /* Now we spread those positions across the table.
			
 
				+         * The benefit of doing it in two stages is that we avoid the the
			
 
				+         * variable size inner loop, which caused lots of branch misses.
			
 
				+         * Now we can run through all the positions without any branch misses.
			
 
				+         * We unroll the loop twice, since that is what emperically worked best.
			
 
				+         */
			
 
				+        {
			
 
				+            size_t position = 0;
			
 
				+            size_t s;
			
 
				+            size_t const unroll = 2;
			
 
				+            assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
			
 
				+            for (s = 0; s < (size_t)tableSize; s += unroll) {
			
 
				+                size_t u;
			
 
				+                for (u = 0; u < unroll; ++u) {
			
 
				+                    size_t const uPosition = (position + (u * step)) & tableMask;
			
 
				+                    tableDecode[uPosition].baseValue = spread[s + u];
			
 
				+                }
			
 
				+                position = (position + (unroll * step)) & tableMask;
			
 
				+            }
			
 
				+            assert(position == 0);
			
 
				+        }
			
 
				+    } else {
			
 
				+        U32 const tableMask = tableSize-1;
			
 
				         U32 const step = FSE_TABLESTEP(tableSize);
			
 
				         U32 s, position = 0;
			
 
				         for (s=0; s<maxSV1; s++) {
			
 
				             int i;
			
 
				-            for (i=0; i<normalizedCounter[s]; i++) {
			
 
				+            int const n = normalizedCounter[s];
			
 
				+            for (i=0; i<n; i++) {
			
 
				                 tableDecode[position].baseValue = s;
			
 
				                 position = (position + step) & tableMask;
			
 
				                 while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
			
@@ -414,7 +470,8 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
 
				     }
			
 
				 
			
 
				     /* Build Decoding table */
			
 
				-    {   U32 u;
			
 
				+    {
			
 
				+        U32 u;
			
 
				         for (u=0; u<tableSize; u++) {
			
 
				             U32 const symbol = tableDecode[u].baseValue;
			
 
				             U32 const nextState = symbolNext[symbol]++;
			
@@ -423,7 +480,46 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
 
				             assert(nbAdditionalBits[symbol] < 255);
			
 
				             tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol];
			
 
				             tableDecode[u].baseValue = baseValue[symbol];
			
 
				-    }   }
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+/* Avoids the FORCE_INLINE of the _body() function. */
			
 
				+static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
			
 
				+            const short* normalizedCounter, unsigned maxSymbolValue,
			
 
				+            const U32* baseValue, const U32* nbAdditionalBits,
			
 
				+            unsigned tableLog, void* wksp, size_t wkspSize)
			
 
				+{
			
 
				+    ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
			
 
				+            baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
			
 
				+}
			
 
				+
			
 
				+#if DYNAMIC_BMI2
			
 
				+TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
			
 
				+            const short* normalizedCounter, unsigned maxSymbolValue,
			
 
				+            const U32* baseValue, const U32* nbAdditionalBits,
			
 
				+            unsigned tableLog, void* wksp, size_t wkspSize)
			
 
				+{
			
 
				+    ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
			
 
				+            baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
			
 
				+            const short* normalizedCounter, unsigned maxSymbolValue,
			
 
				+            const U32* baseValue, const U32* nbAdditionalBits,
			
 
				+            unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
			
 
				+{
			
 
				+#if DYNAMIC_BMI2
			
 
				+    if (bmi2) {
			
 
				+        ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
			
 
				+                baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
			
 
				+        return;
			
 
				+    }
			
 
				+#endif
			
 
				+    (void)bmi2;
			
 
				+    ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
			
 
				+            baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
			
 
				 }
			
 
				 
			
 
				 
			
@@ -435,7 +531,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
 
				                                  const void* src, size_t srcSize,
			
 
				                                  const U32* baseValue, const U32* nbAdditionalBits,
			
 
				                                  const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
			
 
				-                                 int ddictIsCold, int nbSeq)
			
 
				+                                 int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
			
 
				+                                 int bmi2)
			
 
				 {
			
 
				     switch(type)
			
 
				     {
			
@@ -467,7 +564,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
 
				             size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
			
 
				             RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
			
 
				             RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
			
 
				-            ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
			
 
				+            ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
			
 
				             *DTablePtr = DTableSpace;
			
 
				             return headerSize;
			
 
				         }
			
@@ -480,7 +577,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
 
				 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
			
 
				                              const void* src, size_t srcSize)
			
 
				 {
			
 
				-    const BYTE* const istart = (const BYTE* const)src;
			
 
				+    const BYTE* const istart = (const BYTE*)src;
			
 
				     const BYTE* const iend = istart + srcSize;
			
 
				     const BYTE* ip = istart;
			
 
				     int nbSeq;
			
@@ -499,7 +596,8 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
 
				     if (nbSeq > 0x7F) {
			
 
				         if (nbSeq == 0xFF) {
			
 
				             RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
			
 
				-            nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
			
 
				+            nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
			
 
				+            ip+=2;
			
 
				         } else {
			
 
				             RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
			
 
				             nbSeq = ((nbSeq-0x80)<<8) + *ip++;
			
@@ -520,7 +618,9 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
 
				                                                       ip, iend-ip,
			
 
				                                                       LL_base, LL_bits,
			
 
				                                                       LL_defaultDTable, dctx->fseEntropy,
			
 
				-                                                      dctx->ddictIsCold, nbSeq);
			
 
				+                                                      dctx->ddictIsCold, nbSeq,
			
 
				+                                                      dctx->workspace, sizeof(dctx->workspace),
			
 
				+                                                      dctx->bmi2);
			
 
				             RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
			
 
				             ip += llhSize;
			
 
				         }
			
@@ -530,7 +630,9 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
 
				                                                       ip, iend-ip,
			
 
				                                                       OF_base, OF_bits,
			
 
				                                                       OF_defaultDTable, dctx->fseEntropy,
			
 
				-                                                      dctx->ddictIsCold, nbSeq);
			
 
				+                                                      dctx->ddictIsCold, nbSeq,
			
 
				+                                                      dctx->workspace, sizeof(dctx->workspace),
			
 
				+                                                      dctx->bmi2);
			
 
				             RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
			
 
				             ip += ofhSize;
			
 
				         }
			
@@ -540,7 +642,9 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
 
				                                                       ip, iend-ip,
			
 
				                                                       ML_base, ML_bits,
			
 
				                                                       ML_defaultDTable, dctx->fseEntropy,
			
 
				-                                                      dctx->ddictIsCold, nbSeq);
			
 
				+                                                      dctx->ddictIsCold, nbSeq,
			
 
				+                                                      dctx->workspace, sizeof(dctx->workspace),
			
 
				+                                                      dctx->bmi2);
			
 
				             RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
			
 
				             ip += mlhSize;
			
 
				         }
			
@@ -554,7 +658,6 @@ typedef struct {
 
				     size_t litLength;
			
 
				     size_t matchLength;
			
 
				     size_t offset;
			
 
				-    const BYTE* match;
			
 
				 } seq_t;
			
 
				 
			
 
				 typedef struct {
			
@@ -568,9 +671,6 @@ typedef struct {
 
				     ZSTD_fseState stateOffb;
			
 
				     ZSTD_fseState stateML;
			
 
				     size_t prevOffset[ZSTD_REP_NUM];
			
 
				-    const BYTE* prefixStart;
			
 
				-    const BYTE* dictEnd;
			
 
				-    size_t pos;
			
 
				 } seqState_t;
			
 
				 
			
 
				 /*! ZSTD_overlapCopy8() :
			
@@ -686,12 +786,12 @@ size_t ZSTD_execSequenceEnd(BYTE* op,
 
				         RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
			
 
				         match = dictEnd - (prefixStart-match);
			
 
				         if (match + sequence.matchLength <= dictEnd) {
			
 
				-            memmove(oLitEnd, match, sequence.matchLength);
			
 
				+            ZSTD_memmove(oLitEnd, match, sequence.matchLength);
			
 
				             return sequenceLength;
			
 
				         }
			
 
				         /* span extDict & currentPrefixSegment */
			
 
				         {   size_t const length1 = dictEnd - match;
			
 
				-            memmove(oLitEnd, match, length1);
			
 
				+            ZSTD_memmove(oLitEnd, match, length1);
			
 
				             op = oLitEnd + length1;
			
 
				             sequence.matchLength -= length1;
			
 
				             match = prefixStart;
			
@@ -752,12 +852,12 @@ size_t ZSTD_execSequence(BYTE* op,
 
				         RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
			
 
				         match = dictEnd + (match - prefixStart);
			
 
				         if (match + sequence.matchLength <= dictEnd) {
			
 
				-            memmove(oLitEnd, match, sequence.matchLength);
			
 
				+            ZSTD_memmove(oLitEnd, match, sequence.matchLength);
			
 
				             return sequenceLength;
			
 
				         }
			
 
				         /* span extDict & currentPrefixSegment */
			
 
				         {   size_t const length1 = dictEnd - match;
			
 
				-            memmove(oLitEnd, match, length1);
			
 
				+            ZSTD_memmove(oLitEnd, match, length1);
			
 
				             op = oLitEnd + length1;
			
 
				             sequence.matchLength -= length1;
			
 
				             match = prefixStart;
			
@@ -832,10 +932,9 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD
 
				         : 0)
			
 
				 
			
 
				 typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
			
 
				-typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
			
 
				 
			
 
				 FORCE_INLINE_TEMPLATE seq_t
			
 
				-ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch)
			
 
				+ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
			
 
				 {
			
 
				     seq_t seq;
			
 
				     ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state];
			
@@ -910,14 +1009,6 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, c
 
				     DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
			
 
				                 (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
			
 
				 
			
 
				-    if (prefetch == ZSTD_p_prefetch) {
			
 
				-        size_t const pos = seqState->pos + seq.litLength;
			
 
				-        const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
			
 
				-        seq.match = matchBase + pos - seq.offset;  /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
			
 
				-                                                    * No consequence though : no memory access will occur, offset is only used for prefetching */
			
 
				-        seqState->pos = pos + seq.matchLength;
			
 
				-    }
			
 
				-
			
 
				     /* ANS state update
			
 
				      * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
			
 
				      * clang-9.2.0 does 7% worse with ZSTD_updateFseState().
			
@@ -948,7 +1039,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, c
 
				 }
			
 
				 
			
 
				 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
			
 
				-static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
			
 
				+MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
			
 
				 {
			
 
				     size_t const windowSize = dctx->fParams.windowSize;
			
 
				     /* No dictionary used. */
			
@@ -969,6 +1060,7 @@ MEM_STATIC void ZSTD_assertValidSequence(
 
				         seq_t const seq,
			
 
				         BYTE const* prefixStart, BYTE const* virtualStart)
			
 
				 {
			
 
				+#if DEBUGLEVEL >= 1
			
 
				     size_t const windowSize = dctx->fParams.windowSize;
			
 
				     size_t const sequenceSize = seq.litLength + seq.matchLength;
			
 
				     BYTE const* const oLitEnd = op + seq.litLength;
			
@@ -986,6 +1078,9 @@ MEM_STATIC void ZSTD_assertValidSequence(
 
				         /* Offset must be within our window. */
			
 
				         assert(seq.offset <= windowSize);
			
 
				     }
			
 
				+#else
			
 
				+    (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
			
 
				+#endif
			
 
				 }
			
 
				 #endif
			
 
				 
			
@@ -1000,7 +1095,7 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
 
				 {
			
 
				     const BYTE* ip = (const BYTE*)seqStart;
			
 
				     const BYTE* const iend = ip + seqSize;
			
 
				-    BYTE* const ostart = (BYTE* const)dst;
			
 
				+    BYTE* const ostart = (BYTE*)dst;
			
 
				     BYTE* const oend = ostart + maxDstSize;
			
 
				     BYTE* op = ostart;
			
 
				     const BYTE* litPtr = dctx->litPtr;
			
@@ -1014,7 +1109,6 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
 
				     /* Regen sequences */
			
 
				     if (nbSeq) {
			
 
				         seqState_t seqState;
			
 
				-        size_t error = 0;
			
 
				         dctx->fseEntropy = 1;
			
 
				         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
			
 
				         RETURN_ERROR_IF(
			
@@ -1048,13 +1142,14 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
 
				          * If you see most cycles served out of the DSB you've hit the good case.
			
 
				          * If it is pretty even then you may be in an okay case.
			
 
				          *
			
 
				-         * I've been able to reproduce this issue on the following CPUs:
			
 
				+         * This issue has been reproduced on the following CPUs:
			
 
				          *   - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
			
 
				          *               Use Instruments->Counters to get DSB/MITE cycles.
			
 
				          *               I never got performance swings, but I was able to
			
 
				          *               go from the good case of mostly DSB to half of the
			
 
				          *               cycles served from MITE.
			
 
				          *   - Coffeelake: Intel i9-9900k
			
 
				+         *   - Coffeelake: Intel i7-9700k
			
 
				          *
			
 
				          * I haven't been able to reproduce the instability or DSB misses on any
			
 
				          * of the following CPUS:
			
@@ -1067,33 +1162,35 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
 
				          *
			
 
				          *   https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
			
 
				          */
			
 
				+        __asm__(".p2align 6");
			
 
				+        __asm__("nop");
			
 
				         __asm__(".p2align 5");
			
 
				         __asm__("nop");
			
 
				+#  if __GNUC__ >= 9
			
 
				+        /* better for gcc-9 and gcc-10, worse for clang and gcc-8 */
			
 
				+        __asm__(".p2align 3");
			
 
				+#  else
			
 
				         __asm__(".p2align 4");
			
 
				+#  endif
			
 
				 #endif
			
 
				         for ( ; ; ) {
			
 
				-            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch);
			
 
				+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
			
 
				             size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
			
 
				 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
			
 
				             assert(!ZSTD_isError(oneSeqSize));
			
 
				             if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
			
 
				 #endif
			
 
				+            if (UNLIKELY(ZSTD_isError(oneSeqSize)))
			
 
				+                return oneSeqSize;
			
 
				             DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
			
 
				+            op += oneSeqSize;
			
 
				+            if (UNLIKELY(!--nbSeq))
			
 
				+                break;
			
 
				             BIT_reloadDStream(&(seqState.DStream));
			
 
				-            /* gcc and clang both don't like early returns in this loop.
			
 
				-             * gcc doesn't like early breaks either.
			
 
				-             * Instead save an error and report it at the end.
			
 
				-             * When there is an error, don't increment op, so we don't
			
 
				-             * overwrite.
			
 
				-             */
			
 
				-            if (UNLIKELY(ZSTD_isError(oneSeqSize))) error = oneSeqSize;
			
 
				-            else op += oneSeqSize;
			
 
				-            if (UNLIKELY(!--nbSeq)) break;
			
 
				         }
			
 
				 
			
 
				         /* check if reached exact end */
			
 
				         DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
			
 
				-        if (ZSTD_isError(error)) return error;
			
 
				         RETURN_ERROR_IF(nbSeq, corruption_detected, "");
			
 
				         RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
			
 
				         /* save reps for next block */
			
@@ -1104,7 +1201,7 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
 
				     {   size_t const lastLLSize = litEnd - litPtr;
			
 
				         RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
			
 
				         if (op != NULL) {
			
 
				-            memcpy(op, litPtr, lastLLSize);
			
 
				+            ZSTD_memcpy(op, litPtr, lastLLSize);
			
 
				             op += lastLLSize;
			
 
				         }
			
 
				     }
			
@@ -1124,6 +1221,24 @@ ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
 
				 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
			
 
				 
			
 
				 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
			
 
				+
			
 
				+FORCE_INLINE_TEMPLATE size_t
			
 
				+ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
			
 
				+                   const BYTE* const prefixStart, const BYTE* const dictEnd)
			
 
				+{
			
 
				+    prefetchPos += sequence.litLength;
			
 
				+    {   const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
			
 
				+        const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
			
 
				+                                                                              * No consequence though : memory address is only used for prefetching, not for dereferencing */
			
 
				+        PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE);   /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
			
 
				+    }
			
 
				+    return prefetchPos + sequence.matchLength;
			
 
				+}
			
 
				+
			
 
				+/* This decoding function employs prefetching
			
 
				+ * to reduce latency impact of cache misses.
			
 
				+ * It's generally employed when block contains a significant portion of long-distance matches
			
 
				+ * or when coupled with a "cold" dictionary */
			
 
				 FORCE_INLINE_TEMPLATE size_t
			
 
				 ZSTD_decompressSequencesLong_body(
			
 
				                                ZSTD_DCtx* dctx,
			
@@ -1134,7 +1249,7 @@ ZSTD_decompressSequencesLong_body(
 
				 {
			
 
				     const BYTE* ip = (const BYTE*)seqStart;
			
 
				     const BYTE* const iend = ip + seqSize;
			
 
				-    BYTE* const ostart = (BYTE* const)dst;
			
 
				+    BYTE* const ostart = (BYTE*)dst;
			
 
				     BYTE* const oend = ostart + maxDstSize;
			
 
				     BYTE* op = ostart;
			
 
				     const BYTE* litPtr = dctx->litPtr;
			
@@ -1146,18 +1261,17 @@ ZSTD_decompressSequencesLong_body(
 
				 
			
 
				     /* Regen sequences */
			
 
				     if (nbSeq) {
			
 
				-#define STORED_SEQS 4
			
 
				+#define STORED_SEQS 8
			
 
				 #define STORED_SEQS_MASK (STORED_SEQS-1)
			
 
				-#define ADVANCED_SEQS 4
			
 
				+#define ADVANCED_SEQS STORED_SEQS
			
 
				         seq_t sequences[STORED_SEQS];
			
 
				         int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
			
 
				         seqState_t seqState;
			
 
				         int seqNb;
			
 
				+        size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
			
 
				+
			
 
				         dctx->fseEntropy = 1;
			
 
				         { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
			
 
				-        seqState.prefixStart = prefixStart;
			
 
				-        seqState.pos = (size_t)(op-prefixStart);
			
 
				-        seqState.dictEnd = dictEnd;
			
 
				         assert(dst != NULL);
			
 
				         assert(iend >= ip);
			
 
				         RETURN_ERROR_IF(
			
@@ -1169,21 +1283,23 @@ ZSTD_decompressSequencesLong_body(
 
				 
			
 
				         /* prepare in advance */
			
 
				         for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
			
 
				-            sequences[seqNb] = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
			
 
				-            PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
			
 
				+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
			
 
				+            prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
			
 
				+            sequences[seqNb] = sequence;
			
 
				         }
			
 
				         RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
			
 
				 
			
 
				         /* decode and decompress */
			
 
				         for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
			
 
				-            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
			
 
				+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
			
 
				             size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
			
 
				 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
			
 
				             assert(!ZSTD_isError(oneSeqSize));
			
 
				             if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
			
 
				 #endif
			
 
				             if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
			
 
				-            PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
			
 
				+
			
 
				+            prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
			
 
				             sequences[seqNb & STORED_SEQS_MASK] = sequence;
			
 
				             op += oneSeqSize;
			
 
				         }
			
@@ -1209,7 +1325,7 @@ ZSTD_decompressSequencesLong_body(
 
				     {   size_t const lastLLSize = litEnd - litPtr;
			
 
				         RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
			
 
				         if (op != NULL) {
			
 
				-            memcpy(op, litPtr, lastLLSize);
			
 
				+            ZSTD_memcpy(op, litPtr, lastLLSize);
			
 
				             op += lastLLSize;
			
 
				         }
			
 
				     }
			
@@ -1409,9 +1525,9 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
 
				 }
			
 
				 
			
 
				 
			
 
				-void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
			
 
				+void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
			
 
				 {
			
 
				-    if (dst != dctx->previousDstEnd) {   /* not contiguous */
			
 
				+    if (dst != dctx->previousDstEnd && dstSize > 0) {   /* not contiguous */
			
 
				         dctx->dictEnd = dctx->previousDstEnd;
			
 
				         dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
			
 
				         dctx->prefixStart = dst;
			
@@ -1425,7 +1541,7 @@ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
 
				                       const void* src, size_t srcSize)
			
 
				 {
			
 
				     size_t dSize;
			
 
				-    ZSTD_checkContinuity(dctx, dst);
			
 
				+    ZSTD_checkContinuity(dctx, dst, dstCapacity);
			
 
				     dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
			
 
				     dctx->previousDstEnd = (char*)dst + dSize;
			
 
				     return dSize;
			
--- a/Utilities/cmzstd/lib/decompress/zstd_decompress_block.h
+++ b/Utilities/cmzstd/lib/decompress/zstd_decompress_block.h
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -15,7 +15,7 @@
 
				 /*-*******************************************************
			
 
				  *  Dependencies
			
 
				  *********************************************************/
			
 
				-#include <stddef.h>   /* size_t */
			
 
				+#include "../common/zstd_deps.h"   /* size_t */
			
 
				 #include "../zstd.h"    /* DCtx, and some public functions */
			
 
				 #include "../common/zstd_internal.h"  /* blockProperties_t, and some public functions */
			
 
				 #include "zstd_decompress_internal.h"  /* ZSTD_seqSymbol */
			
@@ -48,12 +48,15 @@ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
 
				  * this function must be called with valid parameters only
			
 
				  * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
			
 
				  * in which case it cannot fail.
			
 
				+ * The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is
			
 
				+ * defined in zstd_decompress_internal.h.
			
 
				  * Internal use only.
			
 
				  */
			
 
				 void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
			
 
				              const short* normalizedCounter, unsigned maxSymbolValue,
			
 
				              const U32* baseValue, const U32* nbAdditionalBits,
			
 
				-                   unsigned tableLog);
			
 
				+                   unsigned tableLog, void* wksp, size_t wkspSize,
			
 
				+                   int bmi2);
			
 
				 
			
 
				 
			
 
				 #endif /* ZSTD_DEC_BLOCK_H */
			
--- a/Utilities/cmzstd/lib/decompress/zstd_decompress_internal.h
+++ b/Utilities/cmzstd/lib/decompress/zstd_decompress_internal.h
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -27,26 +27,26 @@
 
				 /*-*******************************************************
			
 
				  *  Constants
			
 
				  *********************************************************/
			
 
				-static const U32 LL_base[MaxLL+1] = {
			
 
				+static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
			
 
				                  0,    1,    2,     3,     4,     5,     6,      7,
			
 
				                  8,    9,   10,    11,    12,    13,    14,     15,
			
 
				                 16,   18,   20,    22,    24,    28,    32,     40,
			
 
				                 48,   64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
			
 
				                 0x2000, 0x4000, 0x8000, 0x10000 };
			
 
				 
			
 
				-static const U32 OF_base[MaxOff+1] = {
			
 
				+static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
			
 
				                  0,        1,       1,       5,     0xD,     0x1D,     0x3D,     0x7D,
			
 
				                  0xFD,   0x1FD,   0x3FD,   0x7FD,   0xFFD,   0x1FFD,   0x3FFD,   0x7FFD,
			
 
				                  0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
			
 
				                  0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
			
 
				 
			
 
				-static const U32 OF_bits[MaxOff+1] = {
			
 
				+static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = {
			
 
				                      0,  1,  2,  3,  4,  5,  6,  7,
			
 
				                      8,  9, 10, 11, 12, 13, 14, 15,
			
 
				                     16, 17, 18, 19, 20, 21, 22, 23,
			
 
				                     24, 25, 26, 27, 28, 29, 30, 31 };
			
 
				 
			
 
				-static const U32 ML_base[MaxML+1] = {
			
 
				+static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
			
 
				                      3,  4,  5,    6,     7,     8,     9,    10,
			
 
				                     11, 12, 13,   14,    15,    16,    17,    18,
			
 
				                     19, 20, 21,   22,    23,    24,    25,    26,
			
@@ -73,12 +73,16 @@ static const U32 ML_base[MaxML+1] = {
 
				 
			
 
				  #define SEQSYMBOL_TABLE_SIZE(log)   (1 + (1 << (log)))
			
 
				 
			
 
				+#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
			
 
				+#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
			
 
				+
			
 
				 typedef struct {
			
 
				     ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)];    /* Note : Space reserved for FSE Tables */
			
 
				     ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)];   /* is also used as temporary workspace while building hufTable during DDict creation */
			
 
				     ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)];    /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
			
 
				     HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)];  /* can accommodate HUF_decompress4X */
			
 
				     U32 rep[ZSTD_REP_NUM];
			
 
				+    U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
			
 
				 } ZSTD_entropyDTables_t;
			
 
				 
			
 
				 typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
			
@@ -95,10 +99,12 @@ typedef enum {
 
				     ZSTD_use_once = 1            /* Use the dictionary once and set to ZSTD_dont_use */
			
 
				 } ZSTD_dictUses_e;
			
 
				 
			
 
				-typedef enum {
			
 
				-    ZSTD_obm_buffered = 0,  /* Buffer the output */
			
 
				-    ZSTD_obm_stable = 1     /* ZSTD_outBuffer is stable */
			
 
				-} ZSTD_outBufferMode_e;
			
 
				+/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
			
 
				+typedef struct {
			
 
				+    const ZSTD_DDict** ddictPtrTable;
			
 
				+    size_t ddictPtrTableSize;
			
 
				+    size_t ddictPtrCount;
			
 
				+} ZSTD_DDictHashSet;
			
 
				 
			
 
				 struct ZSTD_DCtx_s
			
 
				 {
			
@@ -114,6 +120,7 @@ struct ZSTD_DCtx_s
 
				     const void* dictEnd;          /* end of previous segment */
			
 
				     size_t expected;
			
 
				     ZSTD_frameHeader fParams;
			
 
				+    U64 processedCSize;
			
 
				     U64 decodedSize;
			
 
				     blockType_e bType;            /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
			
 
				     ZSTD_dStage stage;
			
@@ -122,6 +129,8 @@ struct ZSTD_DCtx_s
 
				     XXH64_state_t xxhState;
			
 
				     size_t headerSize;
			
 
				     ZSTD_format_e format;
			
 
				+    ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum;   /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
			
 
				+    U32 validateChecksum;         /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
			
 
				     const BYTE* litPtr;
			
 
				     ZSTD_customMem customMem;
			
 
				     size_t litSize;
			
@@ -135,6 +144,8 @@ struct ZSTD_DCtx_s
 
				     U32 dictID;
			
 
				     int ddictIsCold;             /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
			
 
				     ZSTD_dictUses_e dictUses;
			
 
				+    ZSTD_DDictHashSet* ddictSet;                    /* Hash set for multiple ddicts */
			
 
				+    ZSTD_refMultipleDDicts_e refMultipleDDicts;     /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
			
 
				 
			
 
				     /* streaming */
			
 
				     ZSTD_dStreamStage streamStage;
			
@@ -152,7 +163,7 @@ struct ZSTD_DCtx_s
 
				     U32 legacyVersion;
			
 
				     U32 hostageByte;
			
 
				     int noForwardProgress;
			
 
				-    ZSTD_outBufferMode_e outBufferMode;
			
 
				+    ZSTD_bufferMode_e outBufferMode;
			
 
				     ZSTD_outBuffer expectedOutBuffer;
			
 
				 
			
 
				     /* workspace */
			
@@ -165,6 +176,11 @@ struct ZSTD_DCtx_s
 
				     void const* dictContentBeginForFuzzing;
			
 
				     void const* dictContentEndForFuzzing;
			
 
				 #endif
			
 
				+
			
 
				+    /* Tracing */
			
 
				+#if ZSTD_TRACE
			
 
				+    ZSTD_TraceCtx traceCtx;
			
 
				+#endif
			
 
				 };  /* typedef'd to ZSTD_DCtx within "zstd.h" */
			
 
				 
			
 
				 
			
@@ -183,7 +199,7 @@ size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
 
				  *  If yes, do nothing (continue on current segment).
			
 
				  *  If not, classify previous segment as "external dictionary", and start a new segment.
			
 
				  *  This function cannot fail. */
			
 
				-void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
			
 
				+void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
			
 
				 
			
 
				 
			
 
				 #endif /* ZSTD_DECOMPRESS_INTERNAL_H */
			
--- a/Utilities/cmzstd/lib/deprecated/zbuff.h
+++ b/Utilities/cmzstd/lib/deprecated/zbuff.h
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
--- a/Utilities/cmzstd/lib/deprecated/zbuff_common.c
+++ b/Utilities/cmzstd/lib/deprecated/zbuff_common.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
--- a/Utilities/cmzstd/lib/deprecated/zbuff_compress.c
+++ b/Utilities/cmzstd/lib/deprecated/zbuff_compress.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
--- a/Utilities/cmzstd/lib/deprecated/zbuff_decompress.c
+++ b/Utilities/cmzstd/lib/deprecated/zbuff_decompress.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
--- a/Utilities/cmzstd/lib/dictBuilder/cover.c
+++ b/Utilities/cmzstd/lib/dictBuilder/cover.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -26,47 +26,57 @@
 
				 #include <string.h> /* memset */
			
 
				 #include <time.h>   /* clock */
			
 
				 
			
 
				+#ifndef ZDICT_STATIC_LINKING_ONLY
			
 
				+#  define ZDICT_STATIC_LINKING_ONLY
			
 
				+#endif
			
 
				+
			
 
				 #include "../common/mem.h" /* read */
			
 
				 #include "../common/pool.h"
			
 
				 #include "../common/threading.h"
			
 
				-#include "cover.h"
			
 
				 #include "../common/zstd_internal.h" /* includes zstd.h */
			
 
				-#ifndef ZDICT_STATIC_LINKING_ONLY
			
 
				-#define ZDICT_STATIC_LINKING_ONLY
			
 
				-#endif
			
 
				-#include "zdict.h"
			
 
				+#include "../zdict.h"
			
 
				+#include "cover.h"
			
 
				 
			
 
				 /*-*************************************
			
 
				 *  Constants
			
 
				 ***************************************/
			
 
				 #define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
			
 
				-#define DEFAULT_SPLITPOINT 1.0
			
 
				+#define COVER_DEFAULT_SPLITPOINT 1.0
			
 
				 
			
 
				 /*-*************************************
			
 
				 *  Console display
			
 
				 ***************************************/
			
 
				+#ifndef LOCALDISPLAYLEVEL
			
 
				 static int g_displayLevel = 2;
			
 
				+#endif
			
 
				+#undef  DISPLAY
			
 
				 #define DISPLAY(...)                                                           \
			
 
				   {                                                                            \
			
 
				     fprintf(stderr, __VA_ARGS__);                                              \
			
 
				     fflush(stderr);                                                            \
			
 
				   }
			
 
				+#undef  LOCALDISPLAYLEVEL
			
 
				 #define LOCALDISPLAYLEVEL(displayLevel, l, ...)                                \
			
 
				   if (displayLevel >= l) {                                                     \
			
 
				     DISPLAY(__VA_ARGS__);                                                      \
			
 
				   } /* 0 : no display;   1: errors;   2: default;  3: details;  4: debug */
			
 
				+#undef  DISPLAYLEVEL
			
 
				 #define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
			
 
				 
			
 
				+#ifndef LOCALDISPLAYUPDATE
			
 
				+static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
			
 
				+static clock_t g_time = 0;
			
 
				+#endif
			
 
				+#undef  LOCALDISPLAYUPDATE
			
 
				 #define LOCALDISPLAYUPDATE(displayLevel, l, ...)                               \
			
 
				   if (displayLevel >= l) {                                                     \
			
 
				-    if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) {             \
			
 
				+    if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) {             \
			
 
				       g_time = clock();                                                        \
			
 
				       DISPLAY(__VA_ARGS__);                                                    \
			
 
				     }                                                                          \
			
 
				   }
			
 
				+#undef  DISPLAYUPDATE
			
 
				 #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
			
 
				-static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
			
 
				-static clock_t g_time = 0;
			
 
				 
			
 
				 /*-*************************************
			
 
				 * Hash table
			
@@ -120,9 +130,9 @@ static int COVER_map_init(COVER_map_t *map, U32 size) {
 
				 /**
			
 
				  * Internal hash function
			
 
				  */
			
 
				-static const U32 prime4bytes = 2654435761U;
			
 
				+static const U32 COVER_prime4bytes = 2654435761U;
			
 
				 static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
			
 
				-  return (key * prime4bytes) >> (32 - map->sizeLog);
			
 
				+  return (key * COVER_prime4bytes) >> (32 - map->sizeLog);
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -215,7 +225,7 @@ typedef struct {
 
				 } COVER_ctx_t;
			
 
				 
			
 
				 /* We need a global context for qsort... */
			
 
				-static COVER_ctx_t *g_ctx = NULL;
			
 
				+static COVER_ctx_t *g_coverCtx = NULL;
			
 
				 
			
 
				 /*-*************************************
			
 
				 *  Helper functions
			
@@ -258,11 +268,11 @@ static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
 
				 
			
 
				 /**
			
 
				  * Same as COVER_cmp() except ties are broken by pointer value
			
 
				- * NOTE: g_ctx must be set to call this function.  A global is required because
			
 
				+ * NOTE: g_coverCtx must be set to call this function.  A global is required because
			
 
				  * qsort doesn't take an opaque pointer.
			
 
				  */
			
 
				-static int COVER_strict_cmp(const void *lp, const void *rp) {
			
 
				-  int result = COVER_cmp(g_ctx, lp, rp);
			
 
				+static int WIN_CDECL COVER_strict_cmp(const void *lp, const void *rp) {
			
 
				+  int result = COVER_cmp(g_coverCtx, lp, rp);
			
 
				   if (result == 0) {
			
 
				     result = lp < rp ? -1 : 1;
			
 
				   }
			
@@ -271,8 +281,8 @@ static int COVER_strict_cmp(const void *lp, const void *rp) {
 
				 /**
			
 
				  * Faster version for d <= 8.
			
 
				  */
			
 
				-static int COVER_strict_cmp8(const void *lp, const void *rp) {
			
 
				-  int result = COVER_cmp8(g_ctx, lp, rp);
			
 
				+static int WIN_CDECL COVER_strict_cmp8(const void *lp, const void *rp) {
			
 
				+  int result = COVER_cmp8(g_coverCtx, lp, rp);
			
 
				   if (result == 0) {
			
 
				     result = lp < rp ? -1 : 1;
			
 
				   }
			
@@ -603,7 +613,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
 
				     /* qsort doesn't take an opaque pointer, so pass as a global.
			
 
				      * On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is.
			
 
				      */
			
 
				-    g_ctx = ctx;
			
 
				+    g_coverCtx = ctx;
			
 
				 #if defined(__OpenBSD__)
			
 
				     mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32),
			
 
				           (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
			
@@ -946,7 +956,7 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection){
 
				   free(selection.dictContent);
			
 
				 }
			
 
				 
			
 
				-COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
			
 
				+COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
			
 
				         size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
			
 
				         size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
			
 
				 
			
@@ -954,8 +964,8 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
 
				   size_t largestCompressed = 0;
			
 
				   BYTE* customDictContentEnd = customDictContent + dictContentSize;
			
 
				 
			
 
				-  BYTE * largestDictbuffer = (BYTE *)malloc(dictContentSize);
			
 
				-  BYTE * candidateDictBuffer = (BYTE *)malloc(dictContentSize);
			
 
				+  BYTE * largestDictbuffer = (BYTE *)malloc(dictBufferCapacity);
			
 
				+  BYTE * candidateDictBuffer = (BYTE *)malloc(dictBufferCapacity);
			
 
				   double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
			
 
				 
			
 
				   if (!largestDictbuffer || !candidateDictBuffer) {
			
@@ -967,7 +977,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
 
				   /* Initial dictionary size and compressed size */
			
 
				   memcpy(largestDictbuffer, customDictContent, dictContentSize);
			
 
				   dictContentSize = ZDICT_finalizeDictionary(
			
 
				-    largestDictbuffer, dictContentSize, customDictContent, dictContentSize,
			
 
				+    largestDictbuffer, dictBufferCapacity, customDictContent, dictContentSize,
			
 
				     samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
			
 
				 
			
 
				   if (ZDICT_isError(dictContentSize)) {
			
@@ -1001,7 +1011,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
 
				   while (dictContentSize < largestDict) {
			
 
				     memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
			
 
				     dictContentSize = ZDICT_finalizeDictionary(
			
 
				-      candidateDictBuffer, dictContentSize, customDictContentEnd - dictContentSize, dictContentSize,
			
 
				+      candidateDictBuffer, dictBufferCapacity, customDictContentEnd - dictContentSize, dictContentSize,
			
 
				       samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
			
 
				 
			
 
				     if (ZDICT_isError(dictContentSize)) {
			
@@ -1053,18 +1063,19 @@ typedef struct COVER_tryParameters_data_s {
 
				  * This function is thread safe if zstd is compiled with multithreaded support.
			
 
				  * It takes its parameters as an *OWNING* opaque pointer to support threading.
			
 
				  */
			
 
				-static void COVER_tryParameters(void *opaque) {
			
 
				+static void COVER_tryParameters(void *opaque)
			
 
				+{
			
 
				   /* Save parameters as local variables */
			
 
				-  COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
			
 
				+  COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t*)opaque;
			
 
				   const COVER_ctx_t *const ctx = data->ctx;
			
 
				   const ZDICT_cover_params_t parameters = data->parameters;
			
 
				   size_t dictBufferCapacity = data->dictBufferCapacity;
			
 
				   size_t totalCompressedSize = ERROR(GENERIC);
			
 
				   /* Allocate space for hash table, dict, and freqs */
			
 
				   COVER_map_t activeDmers;
			
 
				-  BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
			
 
				+  BYTE* const dict = (BYTE*)malloc(dictBufferCapacity);
			
 
				   COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
			
 
				-  U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
			
 
				+  U32* const freqs = (U32*)malloc(ctx->suffixSize * sizeof(U32));
			
 
				   if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
			
 
				     DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
			
 
				     goto _cleanup;
			
@@ -1079,7 +1090,7 @@ static void COVER_tryParameters(void *opaque) {
 
				   {
			
 
				     const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
			
 
				                                               dictBufferCapacity, parameters);
			
 
				-    selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
			
 
				+    selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
			
 
				         ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
			
 
				         totalCompressedSize);
			
 
				 
			
@@ -1094,19 +1105,18 @@ _cleanup:
 
				   free(data);
			
 
				   COVER_map_destroy(&activeDmers);
			
 
				   COVER_dictSelectionFree(selection);
			
 
				-  if (freqs) {
			
 
				-    free(freqs);
			
 
				-  }
			
 
				+  free(freqs);
			
 
				 }
			
 
				 
			
 
				 ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
			
 
				-    void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
			
 
				-    const size_t *samplesSizes, unsigned nbSamples,
			
 
				-    ZDICT_cover_params_t *parameters) {
			
 
				+    void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer,
			
 
				+    const size_t* samplesSizes, unsigned nbSamples,
			
 
				+    ZDICT_cover_params_t* parameters)
			
 
				+{
			
 
				   /* constants */
			
 
				   const unsigned nbThreads = parameters->nbThreads;
			
 
				   const double splitPoint =
			
 
				-      parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
			
 
				+      parameters->splitPoint <= 0.0 ? COVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
			
 
				   const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
			
 
				   const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
			
 
				   const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
			
--- a/Utilities/cmzstd/lib/dictBuilder/cover.h
+++ b/Utilities/cmzstd/lib/dictBuilder/cover.h
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2017-2020, Facebook, Inc.
			
 
				+ * Copyright (c) Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -8,6 +8,10 @@
 
				  * You may select, at your option, one of the above-listed licenses.
			
 
				  */
			
 
				 
			
 
				+#ifndef ZDICT_STATIC_LINKING_ONLY
			
 
				+#  define ZDICT_STATIC_LINKING_ONLY
			
 
				+#endif
			
 
				+
			
 
				 #include <stdio.h>  /* fprintf */
			
 
				 #include <stdlib.h> /* malloc, free, qsort */
			
 
				 #include <string.h> /* memset */
			
@@ -16,10 +20,7 @@
 
				 #include "../common/pool.h"
			
 
				 #include "../common/threading.h"
			
 
				 #include "../common/zstd_internal.h" /* includes zstd.h */
			
 
				-#ifndef ZDICT_STATIC_LINKING_ONLY
			
 
				-#define ZDICT_STATIC_LINKING_ONLY
			
 
				-#endif
			
 
				-#include "zdict.h"
			
 
				+#include "../zdict.h"
			
 
				 
			
 
				 /**
			
 
				  * COVER_best_t is used for two purposes:
			
@@ -152,6 +153,6 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection);
 
				  * smallest dictionary within a specified regression of the compressed size
			
 
				  * from the largest dictionary.
			
 
				  */
			
 
				- COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
			
 
				+ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
			
 
				                        size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
			
 
				                        size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
			
--- a/Utilities/cmzstd/lib/dictBuilder/divsufsort.c
+++ b/Utilities/cmzstd/lib/dictBuilder/divsufsort.c
@@ -1576,7 +1576,7 @@ note:
 
				     /* Construct the inverse suffix array of type B* suffixes using trsort. */
			
 
				     trsort(ISAb, SA, m, 1);
			
 
				 
			
 
				-    /* Set the sorted order of tyoe B* suffixes. */
			
 
				+    /* Set the sorted order of type B* suffixes. */
			
 
				     for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
			
 
				       for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
			
 
				       if(0 <= i) {
			
--- a/Utilities/cmzstd/lib/dictBuilder/fastcover.c
+++ b/Utilities/cmzstd/lib/dictBuilder/fastcover.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2018-2020, Facebook, Inc.
			
 
				+ * Copyright (c) Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -16,15 +16,17 @@
 
				 #include <string.h> /* memset */
			
 
				 #include <time.h>   /* clock */
			
 
				 
			
 
				+#ifndef ZDICT_STATIC_LINKING_ONLY
			
 
				+#  define ZDICT_STATIC_LINKING_ONLY
			
 
				+#endif
			
 
				+
			
 
				 #include "../common/mem.h" /* read */
			
 
				 #include "../common/pool.h"
			
 
				 #include "../common/threading.h"
			
 
				-#include "cover.h"
			
 
				 #include "../common/zstd_internal.h" /* includes zstd.h */
			
 
				-#ifndef ZDICT_STATIC_LINKING_ONLY
			
 
				-#define ZDICT_STATIC_LINKING_ONLY
			
 
				-#endif
			
 
				-#include "zdict.h"
			
 
				+#include "../compress/zstd_compress_internal.h" /* ZSTD_hash*() */
			
 
				+#include "../zdict.h"
			
 
				+#include "cover.h"
			
 
				 
			
 
				 
			
 
				 /*-*************************************
			
@@ -33,7 +35,7 @@
 
				 #define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
			
 
				 #define FASTCOVER_MAX_F 31
			
 
				 #define FASTCOVER_MAX_ACCEL 10
			
 
				-#define DEFAULT_SPLITPOINT 0.75
			
 
				+#define FASTCOVER_DEFAULT_SPLITPOINT 0.75
			
 
				 #define DEFAULT_F 20
			
 
				 #define DEFAULT_ACCEL 1
			
 
				 
			
@@ -41,50 +43,50 @@
 
				 /*-*************************************
			
 
				 *  Console display
			
 
				 ***************************************/
			
 
				+#ifndef LOCALDISPLAYLEVEL
			
 
				 static int g_displayLevel = 2;
			
 
				+#endif
			
 
				+#undef  DISPLAY
			
 
				 #define DISPLAY(...)                                                           \
			
 
				   {                                                                            \
			
 
				     fprintf(stderr, __VA_ARGS__);                                              \
			
 
				     fflush(stderr);                                                            \
			
 
				   }
			
 
				+#undef  LOCALDISPLAYLEVEL
			
 
				 #define LOCALDISPLAYLEVEL(displayLevel, l, ...)                                \
			
 
				   if (displayLevel >= l) {                                                     \
			
 
				     DISPLAY(__VA_ARGS__);                                                      \
			
 
				   } /* 0 : no display;   1: errors;   2: default;  3: details;  4: debug */
			
 
				+#undef  DISPLAYLEVEL
			
 
				 #define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
			
 
				 
			
 
				+#ifndef LOCALDISPLAYUPDATE
			
 
				+static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
			
 
				+static clock_t g_time = 0;
			
 
				+#endif
			
 
				+#undef  LOCALDISPLAYUPDATE
			
 
				 #define LOCALDISPLAYUPDATE(displayLevel, l, ...)                               \
			
 
				   if (displayLevel >= l) {                                                     \
			
 
				-    if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) {             \
			
 
				+    if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) {             \
			
 
				       g_time = clock();                                                        \
			
 
				       DISPLAY(__VA_ARGS__);                                                    \
			
 
				     }                                                                          \
			
 
				   }
			
 
				+#undef  DISPLAYUPDATE
			
 
				 #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
			
 
				-static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
			
 
				-static clock_t g_time = 0;
			
 
				 
			
 
				 
			
 
				 /*-*************************************
			
 
				 * Hash Functions
			
 
				 ***************************************/
			
 
				-static const U64 prime6bytes = 227718039650203ULL;
			
 
				-static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u  << (64-48)) * prime6bytes) >> (64-h)) ; }
			
 
				-static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
			
 
				-
			
 
				-static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
			
 
				-static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
			
 
				-static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
			
 
				-
			
 
				-
			
 
				 /**
			
 
				- * Hash the d-byte value pointed to by p and mod 2^f
			
 
				+ * Hash the d-byte value pointed to by p and mod 2^f into the frequency vector
			
 
				  */
			
 
				-static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 h, unsigned d) {
			
 
				+static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 f, unsigned d) {
			
 
				   if (d == 6) {
			
 
				-    return ZSTD_hash6Ptr(p, h) & ((1 << h) - 1);
			
 
				+    return ZSTD_hash6Ptr(p, f);
			
 
				   }
			
 
				-  return ZSTD_hash8Ptr(p, h) & ((1 << h) - 1);
			
 
				+  return ZSTD_hash8Ptr(p, f);
			
 
				 }
			
 
				 
			
 
				 
			
@@ -461,20 +463,20 @@ typedef struct FASTCOVER_tryParameters_data_s {
 
				  * This function is thread safe if zstd is compiled with multithreaded support.
			
 
				  * It takes its parameters as an *OWNING* opaque pointer to support threading.
			
 
				  */
			
 
				-static void FASTCOVER_tryParameters(void *opaque)
			
 
				+static void FASTCOVER_tryParameters(void* opaque)
			
 
				 {
			
 
				   /* Save parameters as local variables */
			
 
				-  FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t *)opaque;
			
 
				+  FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t*)opaque;
			
 
				   const FASTCOVER_ctx_t *const ctx = data->ctx;
			
 
				   const ZDICT_cover_params_t parameters = data->parameters;
			
 
				   size_t dictBufferCapacity = data->dictBufferCapacity;
			
 
				   size_t totalCompressedSize = ERROR(GENERIC);
			
 
				   /* Initialize array to keep track of frequency of dmer within activeSegment */
			
 
				-  U16* segmentFreqs = (U16 *)calloc(((U64)1 << ctx->f), sizeof(U16));
			
 
				+  U16* segmentFreqs = (U16*)calloc(((U64)1 << ctx->f), sizeof(U16));
			
 
				   /* Allocate space for hash table, dict, and freqs */
			
 
				-  BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
			
 
				+  BYTE *const dict = (BYTE*)malloc(dictBufferCapacity);
			
 
				   COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
			
 
				-  U32 *freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
			
 
				+  U32* freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
			
 
				   if (!segmentFreqs || !dict || !freqs) {
			
 
				     DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
			
 
				     goto _cleanup;
			
@@ -486,7 +488,7 @@ static void FASTCOVER_tryParameters(void *opaque)
 
				                                                     parameters, segmentFreqs);
			
 
				 
			
 
				     const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
			
 
				-    selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
			
 
				+    selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
			
 
				          ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
			
 
				          totalCompressedSize);
			
 
				 
			
@@ -617,7 +619,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
 
				     /* constants */
			
 
				     const unsigned nbThreads = parameters->nbThreads;
			
 
				     const double splitPoint =
			
 
				-        parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
			
 
				+        parameters->splitPoint <= 0.0 ? FASTCOVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
			
 
				     const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
			
 
				     const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
			
 
				     const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
			
--- a/Utilities/cmzstd/lib/dictBuilder/zdict.c
+++ b/Utilities/cmzstd/lib/dictBuilder/zdict.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -23,9 +23,13 @@
 
				 /* Unix Large Files support (>4GB) */
			
 
				 #define _FILE_OFFSET_BITS 64
			
 
				 #if (defined(__sun__) && (!defined(__LP64__)))   /* Sun Solaris 32-bits requires specific definitions */
			
 
				+#  ifndef _LARGEFILE_SOURCE
			
 
				 #  define _LARGEFILE_SOURCE
			
 
				+#  endif
			
 
				 #elif ! defined(__LP64__)                        /* No point defining Large file for 64 bit */
			
 
				+#  ifndef _LARGEFILE64_SOURCE
			
 
				 #  define _LARGEFILE64_SOURCE
			
 
				+#  endif
			
 
				 #endif
			
 
				 
			
 
				 
			
@@ -37,18 +41,19 @@
 
				 #include <stdio.h>         /* fprintf, fopen, ftello64 */
			
 
				 #include <time.h>          /* clock */
			
 
				 
			
 
				+#ifndef ZDICT_STATIC_LINKING_ONLY
			
 
				+#  define ZDICT_STATIC_LINKING_ONLY
			
 
				+#endif
			
 
				+#define HUF_STATIC_LINKING_ONLY
			
 
				+
			
 
				 #include "../common/mem.h"           /* read */
			
 
				 #include "../common/fse.h"           /* FSE_normalizeCount, FSE_writeNCount */
			
 
				-#define HUF_STATIC_LINKING_ONLY
			
 
				 #include "../common/huf.h"           /* HUF_buildCTable, HUF_writeCTable */
			
 
				 #include "../common/zstd_internal.h" /* includes zstd.h */
			
 
				 #include "../common/xxhash.h"        /* XXH64 */
			
 
				-#include "divsufsort.h"
			
 
				-#ifndef ZDICT_STATIC_LINKING_ONLY
			
 
				-#  define ZDICT_STATIC_LINKING_ONLY
			
 
				-#endif
			
 
				-#include "zdict.h"
			
 
				 #include "../compress/zstd_compress_internal.h" /* ZSTD_loadCEntropy() */
			
 
				+#include "../zdict.h"
			
 
				+#include "divsufsort.h"
			
 
				 
			
 
				 
			
 
				 /*-*************************************
			
@@ -62,14 +67,15 @@
 
				 
			
 
				 #define NOISELENGTH 32
			
 
				 
			
 
				-static const int g_compressionLevel_default = 3;
			
 
				 static const U32 g_selectivity_default = 9;
			
 
				 
			
 
				 
			
 
				 /*-*************************************
			
 
				 *  Console display
			
 
				 ***************************************/
			
 
				+#undef  DISPLAY
			
 
				 #define DISPLAY(...)         { fprintf(stderr, __VA_ARGS__); fflush( stderr ); }
			
 
				+#undef  DISPLAYLEVEL
			
 
				 #define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); }    /* 0 : no display;   1: errors;   2: default;  3: details;  4: debug */
			
 
				 
			
 
				 static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; }
			
@@ -105,20 +111,17 @@ size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize)
 
				     size_t headerSize;
			
 
				     if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return ERROR(dictionary_corrupted);
			
 
				 
			
 
				-    {   unsigned offcodeMaxValue = MaxOff;
			
 
				-        ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t));
			
 
				+    {   ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t));
			
 
				         U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE);
			
 
				-        short* offcodeNCount = (short*)malloc((MaxOff+1)*sizeof(short));
			
 
				-        if (!bs || !wksp || !offcodeNCount) {
			
 
				+        if (!bs || !wksp) {
			
 
				             headerSize = ERROR(memory_allocation);
			
 
				         } else {
			
 
				             ZSTD_reset_compressedBlockState(bs);
			
 
				-            headerSize = ZSTD_loadCEntropy(bs, wksp, offcodeNCount, &offcodeMaxValue, dictBuffer, dictSize);
			
 
				+            headerSize = ZSTD_loadCEntropy(bs, wksp, dictBuffer, dictSize);
			
 
				         }
			
 
				 
			
 
				         free(bs);
			
 
				         free(wksp);
			
 
				-        free(offcodeNCount);
			
 
				     }
			
 
				 
			
 
				     return headerSize;
			
@@ -532,6 +535,7 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
 
				     clock_t displayClock = 0;
			
 
				     clock_t const refreshRate = CLOCKS_PER_SEC * 3 / 10;
			
 
				 
			
 
				+#   undef  DISPLAYUPDATE
			
 
				 #   define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
			
 
				             if (ZDICT_clockSpan(displayClock) > refreshRate)  \
			
 
				             { displayClock = clock(); DISPLAY(__VA_ARGS__); \
			
@@ -706,7 +710,7 @@ static void ZDICT_flatLit(unsigned* countLit)
 
				 
			
 
				 #define OFFCODE_MAX 30  /* only applicable to first block */
			
 
				 static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
			
 
				-                                   unsigned compressionLevel,
			
 
				+                                   int compressionLevel,
			
 
				                              const void*  srcBuffer, const size_t* fileSizes, unsigned nbFiles,
			
 
				                              const void* dictBuffer, size_t  dictBufferSize,
			
 
				                                    unsigned notificationLevel)
			
@@ -741,7 +745,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
 
				     memset(repOffset, 0, sizeof(repOffset));
			
 
				     repOffset[1] = repOffset[4] = repOffset[8] = 1;
			
 
				     memset(bestRepOffset, 0, sizeof(bestRepOffset));
			
 
				-    if (compressionLevel==0) compressionLevel = g_compressionLevel_default;
			
 
				+    if (compressionLevel==0) compressionLevel = ZSTD_CLEVEL_DEFAULT;
			
 
				     params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
			
 
				 
			
 
				     esr.dict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent, params.cParams, ZSTD_defaultCMem);
			
@@ -786,7 +790,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
 
				     /* note : the result of this phase should be used to better appreciate the impact on statistics */
			
 
				 
			
 
				     total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u];
			
 
				-    errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax);
			
 
				+    errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax, /* useLowProbCount */ 1);
			
 
				     if (FSE_isError(errorCode)) {
			
 
				         eSize = errorCode;
			
 
				         DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n");
			
@@ -795,7 +799,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
 
				     Offlog = (U32)errorCode;
			
 
				 
			
 
				     total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u];
			
 
				-    errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML);
			
 
				+    errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML, /* useLowProbCount */ 1);
			
 
				     if (FSE_isError(errorCode)) {
			
 
				         eSize = errorCode;
			
 
				         DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n");
			
@@ -804,7 +808,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
 
				     mlLog = (U32)errorCode;
			
 
				 
			
 
				     total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u];
			
 
				-    errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
			
 
				+    errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL, /* useLowProbCount */ 1);
			
 
				     if (FSE_isError(errorCode)) {
			
 
				         eSize = errorCode;
			
 
				         DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n");
			
@@ -893,7 +897,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
 
				     size_t hSize;
			
 
				 #define HBUFFSIZE 256   /* should prove large enough for all entropy headers */
			
 
				     BYTE header[HBUFFSIZE];
			
 
				-    int const compressionLevel = (params.compressionLevel == 0) ? g_compressionLevel_default : params.compressionLevel;
			
 
				+    int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
			
 
				     U32 const notificationLevel = params.notificationLevel;
			
 
				 
			
 
				     /* check conditions */
			
@@ -939,7 +943,7 @@ static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
 
				         const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
			
 
				         ZDICT_params_t params)
			
 
				 {
			
 
				-    int const compressionLevel = (params.compressionLevel == 0) ? g_compressionLevel_default : params.compressionLevel;
			
 
				+    int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
			
 
				     U32 const notificationLevel = params.notificationLevel;
			
 
				     size_t hSize = 8;
			
 
				 
			
@@ -968,16 +972,11 @@ static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
 
				     return MIN(dictBufferCapacity, hSize+dictContentSize);
			
 
				 }
			
 
				 
			
 
				-/* Hidden declaration for dbio.c */
			
 
				-size_t ZDICT_trainFromBuffer_unsafe_legacy(
			
 
				-                            void* dictBuffer, size_t maxDictSize,
			
 
				-                            const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
			
 
				-                            ZDICT_legacy_params_t params);
			
 
				 /*! ZDICT_trainFromBuffer_unsafe_legacy() :
			
 
				-*   Warning : `samplesBuffer` must be followed by noisy guard band.
			
 
				+*   Warning : `samplesBuffer` must be followed by noisy guard band !!!
			
 
				 *   @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
			
 
				 */
			
 
				-size_t ZDICT_trainFromBuffer_unsafe_legacy(
			
 
				+static size_t ZDICT_trainFromBuffer_unsafe_legacy(
			
 
				                             void* dictBuffer, size_t maxDictSize,
			
 
				                             const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
			
 
				                             ZDICT_legacy_params_t params)
			
@@ -1114,8 +1113,8 @@ size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
 
				     memset(&params, 0, sizeof(params));
			
 
				     params.d = 8;
			
 
				     params.steps = 4;
			
 
				-    /* Default to level 6 since no compression level information is available */
			
 
				-    params.zParams.compressionLevel = 3;
			
 
				+    /* Use default level since no compression level information is available */
			
 
				+    params.zParams.compressionLevel = ZSTD_CLEVEL_DEFAULT;
			
 
				 #if defined(DEBUGLEVEL) && (DEBUGLEVEL>=1)
			
 
				     params.zParams.notificationLevel = DEBUGLEVEL;
			
 
				 #endif
			
--- a/Utilities/cmzstd/lib/dictBuilder/zdict.h
+++ b/Utilities/cmzstd/lib/dictBuilder/zdict.h
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -36,6 +36,145 @@ extern "C" {
 
				 #  define ZDICTLIB_API ZDICTLIB_VISIBILITY
			
 
				 #endif
			
 
				 
			
 
				+/*******************************************************************************
			
 
				+ * Zstd dictionary builder
			
 
				+ *
			
 
				+ * FAQ
			
 
				+ * ===
			
 
				+ * Why should I use a dictionary?
			
 
				+ * ------------------------------
			
 
				+ *
			
 
				+ * Zstd can use dictionaries to improve compression ratio of small data.
			
 
				+ * Traditionally small files don't compress well because there is very little
			
 
				+ * repetion in a single sample, since it is small. But, if you are compressing
			
 
				+ * many similar files, like a bunch of JSON records that share the same
			
 
				+ * structure, you can train a dictionary on ahead of time on some samples of
			
 
				+ * these files. Then, zstd can use the dictionary to find repetitions that are
			
 
				+ * present across samples. This can vastly improve compression ratio.
			
 
				+ *
			
 
				+ * When is a dictionary useful?
			
 
				+ * ----------------------------
			
 
				+ *
			
 
				+ * Dictionaries are useful when compressing many small files that are similar.
			
 
				+ * The larger a file is, the less benefit a dictionary will have. Generally,
			
 
				+ * we don't expect dictionary compression to be effective past 100KB. And the
			
 
				+ * smaller a file is, the more we would expect the dictionary to help.
			
 
				+ *
			
 
				+ * How do I use a dictionary?
			
 
				+ * --------------------------
			
 
				+ *
			
 
				+ * Simply pass the dictionary to the zstd compressor with
			
 
				+ * `ZSTD_CCtx_loadDictionary()`. The same dictionary must then be passed to
			
 
				+ * the decompressor, using `ZSTD_DCtx_loadDictionary()`. There are other
			
 
				+ * more advanced functions that allow selecting some options, see zstd.h for
			
 
				+ * complete documentation.
			
 
				+ *
			
 
				+ * What is a zstd dictionary?
			
 
				+ * --------------------------
			
 
				+ *
			
 
				+ * A zstd dictionary has two pieces: Its header, and its content. The header
			
 
				+ * contains a magic number, the dictionary ID, and entropy tables. These
			
 
				+ * entropy tables allow zstd to save on header costs in the compressed file,
			
 
				+ * which really matters for small data. The content is just bytes, which are
			
 
				+ * repeated content that is common across many samples.
			
 
				+ *
			
 
				+ * What is a raw content dictionary?
			
 
				+ * ---------------------------------
			
 
				+ *
			
 
				+ * A raw content dictionary is just bytes. It doesn't have a zstd dictionary
			
 
				+ * header, a dictionary ID, or entropy tables. Any buffer is a valid raw
			
 
				+ * content dictionary.
			
 
				+ *
			
 
				+ * How do I train a dictionary?
			
 
				+ * ----------------------------
			
 
				+ *
			
 
				+ * Gather samples from your use case. These samples should be similar to each
			
 
				+ * other. If you have several use cases, you could try to train one dictionary
			
 
				+ * per use case.
			
 
				+ *
			
 
				+ * Pass those samples to `ZDICT_trainFromBuffer()` and that will train your
			
 
				+ * dictionary. There are a few advanced versions of this function, but this
			
 
				+ * is a great starting point. If you want to further tune your dictionary
			
 
				+ * you could try `ZDICT_optimizeTrainFromBuffer_cover()`. If that is too slow
			
 
				+ * you can try `ZDICT_optimizeTrainFromBuffer_fastCover()`.
			
 
				+ *
			
 
				+ * If the dictionary training function fails, that is likely because you
			
 
				+ * either passed too few samples, or a dictionary would not be effective
			
 
				+ * for your data. Look at the messages that the dictionary trainer printed,
			
 
				+ * if it doesn't say too few samples, then a dictionary would not be effective.
			
 
				+ *
			
 
				+ * How large should my dictionary be?
			
 
				+ * ----------------------------------
			
 
				+ *
			
 
				+ * A reasonable dictionary size, the `dictBufferCapacity`, is about 100KB.
			
 
				+ * The zstd CLI defaults to a 110KB dictionary. You likely don't need a
			
 
				+ * dictionary larger than that. But, most use cases can get away with a
			
 
				+ * smaller dictionary. The advanced dictionary builders can automatically
			
 
				+ * shrink the dictionary for you, and select a the smallest size that
			
 
				+ * doesn't hurt compression ratio too much. See the `shrinkDict` parameter.
			
 
				+ * A smaller dictionary can save memory, and potentially speed up
			
 
				+ * compression.
			
 
				+ *
			
 
				+ * How many samples should I provide to the dictionary builder?
			
 
				+ * ------------------------------------------------------------
			
 
				+ *
			
 
				+ * We generally recommend passing ~100x the size of the dictionary
			
 
				+ * in samples. A few thousand should suffice. Having too few samples
			
 
				+ * can hurt the dictionaries effectiveness. Having more samples will
			
 
				+ * only improve the dictionaries effectiveness. But having too many
			
 
				+ * samples can slow down the dictionary builder.
			
 
				+ *
			
 
				+ * How do I determine if a dictionary will be effective?
			
 
				+ * -----------------------------------------------------
			
 
				+ *
			
 
				+ * Simply train a dictionary and try it out. You can use zstd's built in
			
 
				+ * benchmarking tool to test the dictionary effectiveness.
			
 
				+ *
			
 
				+ *   # Benchmark levels 1-3 without a dictionary
			
 
				+ *   zstd -b1e3 -r /path/to/my/files
			
 
				+ *   # Benchmark levels 1-3 with a dictioanry
			
 
				+ *   zstd -b1e3 -r /path/to/my/files -D /path/to/my/dictionary
			
 
				+ *
			
 
				+ * When should I retrain a dictionary?
			
 
				+ * -----------------------------------
			
 
				+ *
			
 
				+ * You should retrain a dictionary when its effectiveness drops. Dictionary
			
 
				+ * effectiveness drops as the data you are compressing changes. Generally, we do
			
 
				+ * expect dictionaries to "decay" over time, as your data changes, but the rate
			
 
				+ * at which they decay depends on your use case. Internally, we regularly
			
 
				+ * retrain dictionaries, and if the new dictionary performs significantly
			
 
				+ * better than the old dictionary, we will ship the new dictionary.
			
 
				+ *
			
 
				+ * I have a raw content dictionary, how do I turn it into a zstd dictionary?
			
 
				+ * -------------------------------------------------------------------------
			
 
				+ *
			
 
				+ * If you have a raw content dictionary, e.g. by manually constructing it, or
			
 
				+ * using a third-party dictionary builder, you can turn it into a zstd
			
 
				+ * dictionary by using `ZDICT_finalizeDictionary()`. You'll also have to
			
 
				+ * provide some samples of the data. It will add the zstd header to the
			
 
				+ * raw content, which contains a dictionary ID and entropy tables, which
			
 
				+ * will improve compression ratio, and allow zstd to write the dictionary ID
			
 
				+ * into the frame, if you so choose.
			
 
				+ *
			
 
				+ * Do I have to use zstd's dictionary builder?
			
 
				+ * -------------------------------------------
			
 
				+ *
			
 
				+ * No! You can construct dictionary content however you please, it is just
			
 
				+ * bytes. It will always be valid as a raw content dictionary. If you want
			
 
				+ * a zstd dictionary, which can improve compression ratio, use
			
 
				+ * `ZDICT_finalizeDictionary()`.
			
 
				+ *
			
 
				+ * What is the attack surface of a zstd dictionary?
			
 
				+ * ------------------------------------------------
			
 
				+ *
			
 
				+ * Zstd is heavily fuzz tested, including loading fuzzed dictionaries, so
			
 
				+ * zstd should never crash, or access out-of-bounds memory no matter what
			
 
				+ * the dictionary is. However, if an attacker can control the dictionary
			
 
				+ * during decompression, they can cause zstd to generate arbitrary bytes,
			
 
				+ * just like if they controlled the compressed data.
			
 
				+ *
			
 
				+ ******************************************************************************/
			
 
				+
			
 
				 
			
 
				 /*! ZDICT_trainFromBuffer():
			
 
				  *  Train a dictionary from an array of samples.
			
@@ -64,7 +203,14 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCap
 
				 typedef struct {
			
 
				     int      compressionLevel;   /*< optimize for a specific zstd compression level; 0 means default */
			
 
				     unsigned notificationLevel;  /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
			
 
				-    unsigned dictID;             /*< force dictID value; 0 means auto mode (32-bits random value) */
			
 
				+    unsigned dictID;             /*< force dictID value; 0 means auto mode (32-bits random value)
			
 
				+                                  *   NOTE: The zstd format reserves some dictionary IDs for future use.
			
 
				+                                  *         You may use them in private settings, but be warned that they
			
 
				+                                  *         may be used by zstd in a public dictionary registry in the future.
			
 
				+                                  *         These dictionary IDs are:
			
 
				+                                  *           - low range  : <= 32767
			
 
				+                                  *           - high range : >= (2^31)
			
 
				+                                  */
			
 
				 } ZDICT_params_t;
			
 
				 
			
 
				 /*! ZDICT_finalizeDictionary():
			
@@ -264,10 +410,11 @@ typedef struct {
 
				  *  Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
			
 
				  */
			
 
				 ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
			
 
				-    void *dictBuffer, size_t dictBufferCapacity,
			
 
				-    const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
			
 
				+    void* dictBuffer, size_t dictBufferCapacity,
			
 
				+    const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
			
 
				     ZDICT_legacy_params_t parameters);
			
 
				 
			
 
				+
			
 
				 /* Deprecation warnings */
			
 
				 /* It is generally possible to disable deprecation warnings from compiler,
			
 
				    for example with -Wno-deprecated-declarations for gcc
			
@@ -279,7 +426,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
 
				 #  define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
			
 
				 #  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
			
 
				 #    define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API
			
 
				-#  elif (ZDICT_GCC_VERSION >= 405) || defined(__clang__)
			
 
				+#  elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405)
			
 
				 #    define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
			
 
				 #  elif (ZDICT_GCC_VERSION >= 301)
			
 
				 #    define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))
			
--- a/Utilities/cmzstd/lib/zstd.h
+++ b/Utilities/cmzstd/lib/zstd.h
--- a/Utilities/cmzstd/lib/common/zstd_errors.h
+++ b/Utilities/cmzstd/lib/common/zstd_errors.h
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
			
 
				+ * Copyright (c) Yann Collet, Facebook, Inc.
			
 
				  * All rights reserved.
			
 
				  *
			
 
				  * This source code is licensed under both the BSD-style license (found in the
			
@@ -77,6 +77,7 @@ typedef enum {
 
				   ZSTD_error_frameIndex_tooLarge = 100,
			
 
				   ZSTD_error_seekableIO          = 102,
			
 
				   ZSTD_error_dstBuffer_wrong     = 104,
			
 
				+  ZSTD_error_srcBuffer_wrong     = 105,
			
 
				   ZSTD_error_maxCode = 120  /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
			
 
				 } ZSTD_ErrorCode;