bitstream.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465
  1. /* ******************************************************************
  2. * bitstream
  3. * Part of FSE library
  4. * Copyright (c) Yann Collet, Facebook, Inc.
  5. *
  6. * You can contact the author at :
  7. * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
  8. *
  9. * This source code is licensed under both the BSD-style license (found in the
  10. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  11. * in the COPYING file in the root directory of this source tree).
  12. * You may select, at your option, one of the above-listed licenses.
  13. ****************************************************************** */
  14. #ifndef BITSTREAM_H_MODULE
  15. #define BITSTREAM_H_MODULE
  16. #include <assert.h>
  17. #if defined (__cplusplus)
  18. extern "C" {
  19. #endif
  20. /*
  21. * This API consists of small unitary functions, which must be inlined for best performance.
  22. * Since link-time-optimization is not available for all compilers,
  23. * these functions are defined into a .h to be included.
  24. */
  25. /*-****************************************
  26. * Dependencies
  27. ******************************************/
  28. #include "mem.h" /* unaligned access routines */
  29. #include "compiler.h" /* UNLIKELY() */
  30. #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
  31. #include "error_private.h" /* error codes and messages */
  32. /*=========================================
  33. * Target specific
  34. =========================================*/
  35. #ifndef ZSTD_NO_INTRINSICS
  36. # if defined(__BMI__) && defined(__GNUC__)
  37. # include <immintrin.h> /* support for bextr (experimental) */
  38. # elif defined(__ICCARM__)
  39. # include <intrinsics.h>
  40. # endif
  41. #endif
  42. #define STREAM_ACCUMULATOR_MIN_32 25
  43. #define STREAM_ACCUMULATOR_MIN_64 57
  44. #define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
  45. /*-******************************************
  46. * bitStream encoding API (write forward)
  47. ********************************************/
  48. /* bitStream can mix input from multiple sources.
  49. * A critical property of these streams is that they encode and decode in **reverse** direction.
  50. * So the first bit sequence you add will be the last to be read, like a LIFO stack.
  51. */
  52. typedef struct {
  53. size_t bitContainer;
  54. unsigned bitPos;
  55. char* startPtr;
  56. char* ptr;
  57. char* endPtr;
  58. } BIT_CStream_t;
  59. MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
  60. MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
  61. MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC);
  62. MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
  63. /* Start with initCStream, providing the size of buffer to write into.
  64. * bitStream will never write outside of this buffer.
  65. * `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
  66. *
  67. * bits are first added to a local register.
  68. * Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
  69. * Writing data into memory is an explicit operation, performed by the flushBits function.
  70. * Hence keep track how many bits are potentially stored into local register to avoid register overflow.
  71. * After a flushBits, a maximum of 7 bits might still be stored into local register.
  72. *
  73. * Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
  74. *
  75. * Last operation is to close the bitStream.
  76. * The function returns the final size of CStream in bytes.
  77. * If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
  78. */
  79. /*-********************************************
  80. * bitStream decoding API (read backward)
  81. **********************************************/
  82. typedef struct {
  83. size_t bitContainer;
  84. unsigned bitsConsumed;
  85. const char* ptr;
  86. const char* start;
  87. const char* limitPtr;
  88. } BIT_DStream_t;
  89. typedef enum { BIT_DStream_unfinished = 0,
  90. BIT_DStream_endOfBuffer = 1,
  91. BIT_DStream_completed = 2,
  92. BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */
  93. /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
  94. MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
  95. MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
  96. MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
  97. MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
  98. /* Start by invoking BIT_initDStream().
  99. * A chunk of the bitStream is then stored into a local register.
  100. * Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
  101. * You can then retrieve bitFields stored into the local register, **in reverse order**.
  102. * Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
  103. * A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
  104. * Otherwise, it can be less than that, so proceed accordingly.
  105. * Checking if DStream has reached its end can be performed with BIT_endOfDStream().
  106. */
  107. /*-****************************************
  108. * unsafe API
  109. ******************************************/
  110. MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
  111. /* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
  112. MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
  113. /* unsafe version; does not check buffer overflow */
  114. MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
  115. /* faster, but works only if nbBits >= 1 */
  116. /*-**************************************************************
  117. * Internal functions
  118. ****************************************************************/
  119. MEM_STATIC unsigned BIT_highbit32 (U32 val)
  120. {
  121. assert(val != 0);
  122. {
  123. # if defined(_MSC_VER) /* Visual */
  124. # if STATIC_BMI2 == 1
  125. return _lzcnt_u32(val) ^ 31;
  126. # else
  127. unsigned long r = 0;
  128. return _BitScanReverse(&r, val) ? (unsigned)r : 0;
  129. # endif
  130. # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
  131. return __builtin_clz (val) ^ 31;
  132. # elif defined(__ICCARM__) /* IAR Intrinsic */
  133. return 31 - __CLZ(val);
  134. # else /* Software version */
  135. static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
  136. 11, 14, 16, 18, 22, 25, 3, 30,
  137. 8, 12, 20, 28, 15, 17, 24, 7,
  138. 19, 27, 23, 6, 26, 5, 4, 31 };
  139. U32 v = val;
  140. v |= v >> 1;
  141. v |= v >> 2;
  142. v |= v >> 4;
  143. v |= v >> 8;
  144. v |= v >> 16;
  145. return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
  146. # endif
  147. }
  148. }
  149. /*===== Local Constants =====*/
  150. static const unsigned BIT_mask[] = {
  151. 0, 1, 3, 7, 0xF, 0x1F,
  152. 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF,
  153. 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF,
  154. 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,
  155. 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
  156. 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */
  157. #define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))
  158. /*-**************************************************************
  159. * bitStream encoding
  160. ****************************************************************/
  161. /*! BIT_initCStream() :
  162. * `dstCapacity` must be > sizeof(size_t)
  163. * @return : 0 if success,
  164. * otherwise an error code (can be tested using ERR_isError()) */
  165. MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
  166. void* startPtr, size_t dstCapacity)
  167. {
  168. bitC->bitContainer = 0;
  169. bitC->bitPos = 0;
  170. bitC->startPtr = (char*)startPtr;
  171. bitC->ptr = bitC->startPtr;
  172. bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
  173. if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
  174. return 0;
  175. }
  176. /*! BIT_addBits() :
  177. * can add up to 31 bits into `bitC`.
  178. * Note : does not check for register overflow ! */
  179. MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
  180. size_t value, unsigned nbBits)
  181. {
  182. DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
  183. assert(nbBits < BIT_MASK_SIZE);
  184. assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
  185. bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
  186. bitC->bitPos += nbBits;
  187. }
  188. /*! BIT_addBitsFast() :
  189. * works only if `value` is _clean_,
  190. * meaning all high bits above nbBits are 0 */
  191. MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
  192. size_t value, unsigned nbBits)
  193. {
  194. assert((value>>nbBits) == 0);
  195. assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
  196. bitC->bitContainer |= value << bitC->bitPos;
  197. bitC->bitPos += nbBits;
  198. }
  199. /*! BIT_flushBitsFast() :
  200. * assumption : bitContainer has not overflowed
  201. * unsafe version; does not check buffer overflow */
  202. MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
  203. {
  204. size_t const nbBytes = bitC->bitPos >> 3;
  205. assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
  206. assert(bitC->ptr <= bitC->endPtr);
  207. MEM_writeLEST(bitC->ptr, bitC->bitContainer);
  208. bitC->ptr += nbBytes;
  209. bitC->bitPos &= 7;
  210. bitC->bitContainer >>= nbBytes*8;
  211. }
  212. /*! BIT_flushBits() :
  213. * assumption : bitContainer has not overflowed
  214. * safe version; check for buffer overflow, and prevents it.
  215. * note : does not signal buffer overflow.
  216. * overflow will be revealed later on using BIT_closeCStream() */
  217. MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
  218. {
  219. size_t const nbBytes = bitC->bitPos >> 3;
  220. assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
  221. assert(bitC->ptr <= bitC->endPtr);
  222. MEM_writeLEST(bitC->ptr, bitC->bitContainer);
  223. bitC->ptr += nbBytes;
  224. if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
  225. bitC->bitPos &= 7;
  226. bitC->bitContainer >>= nbBytes*8;
  227. }
  228. /*! BIT_closeCStream() :
  229. * @return : size of CStream, in bytes,
  230. * or 0 if it could not fit into dstBuffer */
  231. MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
  232. {
  233. BIT_addBitsFast(bitC, 1, 1); /* endMark */
  234. BIT_flushBits(bitC);
  235. if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
  236. return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
  237. }
  238. /*-********************************************************
  239. * bitStream decoding
  240. **********************************************************/
  241. /*! BIT_initDStream() :
  242. * Initialize a BIT_DStream_t.
  243. * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
  244. * `srcSize` must be the *exact* size of the bitStream, in bytes.
  245. * @return : size of stream (== srcSize), or an errorCode if a problem is detected
  246. */
  247. MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
  248. {
  249. if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
  250. bitD->start = (const char*)srcBuffer;
  251. bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
  252. if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */
  253. bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
  254. bitD->bitContainer = MEM_readLEST(bitD->ptr);
  255. { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
  256. bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
  257. if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
  258. } else {
  259. bitD->ptr = bitD->start;
  260. bitD->bitContainer = *(const BYTE*)(bitD->start);
  261. switch(srcSize)
  262. {
  263. case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
  264. /* fall-through */
  265. case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
  266. /* fall-through */
  267. case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
  268. /* fall-through */
  269. case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
  270. /* fall-through */
  271. case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
  272. /* fall-through */
  273. case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8;
  274. /* fall-through */
  275. default: break;
  276. }
  277. { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
  278. bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
  279. if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */
  280. }
  281. bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
  282. }
  283. return srcSize;
  284. }
  285. MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
  286. {
  287. return bitContainer >> start;
  288. }
  289. MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
  290. {
  291. U32 const regMask = sizeof(bitContainer)*8 - 1;
  292. /* if start > regMask, bitstream is corrupted, and result is undefined */
  293. assert(nbBits < BIT_MASK_SIZE);
  294. return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
  295. }
  296. MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
  297. {
  298. #if defined(STATIC_BMI2) && STATIC_BMI2 == 1
  299. return _bzhi_u64(bitContainer, nbBits);
  300. #else
  301. assert(nbBits < BIT_MASK_SIZE);
  302. return bitContainer & BIT_mask[nbBits];
  303. #endif
  304. }
  305. /*! BIT_lookBits() :
  306. * Provides next n bits from local register.
  307. * local register is not modified.
  308. * On 32-bits, maxNbBits==24.
  309. * On 64-bits, maxNbBits==56.
  310. * @return : value extracted */
  311. MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
  312. {
  313. /* arbitrate between double-shift and shift+mask */
  314. #if 1
  315. /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
  316. * bitstream is likely corrupted, and result is undefined */
  317. return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
  318. #else
  319. /* this code path is slower on my os-x laptop */
  320. U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
  321. return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
  322. #endif
  323. }
  324. /*! BIT_lookBitsFast() :
  325. * unsafe version; only works if nbBits >= 1 */
  326. MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
  327. {
  328. U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
  329. assert(nbBits >= 1);
  330. return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
  331. }
  332. MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
  333. {
  334. bitD->bitsConsumed += nbBits;
  335. }
  336. /*! BIT_readBits() :
  337. * Read (consume) next n bits from local register and update.
  338. * Pay attention to not read more than nbBits contained into local register.
  339. * @return : extracted value. */
  340. MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
  341. {
  342. size_t const value = BIT_lookBits(bitD, nbBits);
  343. BIT_skipBits(bitD, nbBits);
  344. return value;
  345. }
  346. /*! BIT_readBitsFast() :
  347. * unsafe version; only works only if nbBits >= 1 */
  348. MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
  349. {
  350. size_t const value = BIT_lookBitsFast(bitD, nbBits);
  351. assert(nbBits >= 1);
  352. BIT_skipBits(bitD, nbBits);
  353. return value;
  354. }
  355. /*! BIT_reloadDStreamFast() :
  356. * Similar to BIT_reloadDStream(), but with two differences:
  357. * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
  358. * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this
  359. * point you must use BIT_reloadDStream() to reload.
  360. */
  361. MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
  362. {
  363. if (UNLIKELY(bitD->ptr < bitD->limitPtr))
  364. return BIT_DStream_overflow;
  365. assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
  366. bitD->ptr -= bitD->bitsConsumed >> 3;
  367. bitD->bitsConsumed &= 7;
  368. bitD->bitContainer = MEM_readLEST(bitD->ptr);
  369. return BIT_DStream_unfinished;
  370. }
  371. /*! BIT_reloadDStream() :
  372. * Refill `bitD` from buffer previously set in BIT_initDStream() .
  373. * This function is safe, it guarantees it will not read beyond src buffer.
  374. * @return : status of `BIT_DStream_t` internal register.
  375. * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
  376. MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
  377. {
  378. if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */
  379. return BIT_DStream_overflow;
  380. if (bitD->ptr >= bitD->limitPtr) {
  381. return BIT_reloadDStreamFast(bitD);
  382. }
  383. if (bitD->ptr == bitD->start) {
  384. if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
  385. return BIT_DStream_completed;
  386. }
  387. /* start < ptr < limitPtr */
  388. { U32 nbBytes = bitD->bitsConsumed >> 3;
  389. BIT_DStream_status result = BIT_DStream_unfinished;
  390. if (bitD->ptr - nbBytes < bitD->start) {
  391. nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */
  392. result = BIT_DStream_endOfBuffer;
  393. }
  394. bitD->ptr -= nbBytes;
  395. bitD->bitsConsumed -= nbBytes*8;
  396. bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
  397. return result;
  398. }
  399. }
  400. /*! BIT_endOfDStream() :
  401. * @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
  402. */
  403. MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
  404. {
  405. return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
  406. }
  407. #if defined (__cplusplus)
  408. }
  409. #endif
  410. #endif /* BITSTREAM_H_MODULE */