lz_encoder.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616
  1. ///////////////////////////////////////////////////////////////////////////////
  2. //
  3. /// \file lz_encoder.c
  4. /// \brief LZ in window
  5. ///
  6. // Authors: Igor Pavlov
  7. // Lasse Collin
  8. //
  9. // This file has been put into the public domain.
  10. // You can do whatever you want with this file.
  11. //
  12. ///////////////////////////////////////////////////////////////////////////////
  13. #include "lz_encoder.h"
  14. #include "lz_encoder_hash.h"
  15. // See lz_encoder_hash.h. This is a bit hackish but avoids making
  16. // endianness a conditional in makefiles.
  17. #if defined(WORDS_BIGENDIAN) && !defined(HAVE_SMALL)
  18. # include "lz_encoder_hash_table.h"
  19. #endif
  20. #include "memcmplen.h"
  21. typedef struct {
  22. /// LZ-based encoder e.g. LZMA
  23. lzma_lz_encoder lz;
  24. /// History buffer and match finder
  25. lzma_mf mf;
  26. /// Next coder in the chain
  27. lzma_next_coder next;
  28. } lzma_coder;
  29. /// \brief Moves the data in the input window to free space for new data
  30. ///
  31. /// mf->buffer is a sliding input window, which keeps mf->keep_size_before
  32. /// bytes of input history available all the time. Now and then we need to
  33. /// "slide" the buffer to make space for the new data to the end of the
  34. /// buffer. At the same time, data older than keep_size_before is dropped.
  35. ///
  36. static void
  37. move_window(lzma_mf *mf)
  38. {
  39. // Align the move to a multiple of 16 bytes. Some LZ-based encoders
  40. // like LZMA use the lowest bits of mf->read_pos to know the
  41. // alignment of the uncompressed data. We also get better speed
  42. // for memmove() with aligned buffers.
  43. assert(mf->read_pos > mf->keep_size_before);
  44. const uint32_t move_offset
  45. = (mf->read_pos - mf->keep_size_before) & ~UINT32_C(15);
  46. assert(mf->write_pos > move_offset);
  47. const size_t move_size = mf->write_pos - move_offset;
  48. assert(move_offset + move_size <= mf->size);
  49. memmove(mf->buffer, mf->buffer + move_offset, move_size);
  50. mf->offset += move_offset;
  51. mf->read_pos -= move_offset;
  52. mf->read_limit -= move_offset;
  53. mf->write_pos -= move_offset;
  54. return;
  55. }
  56. /// \brief Tries to fill the input window (mf->buffer)
  57. ///
  58. /// If we are the last encoder in the chain, our input data is in in[].
  59. /// Otherwise we call the next filter in the chain to process in[] and
  60. /// write its output to mf->buffer.
  61. ///
  62. /// This function must not be called once it has returned LZMA_STREAM_END.
  63. ///
  64. static lzma_ret
  65. fill_window(lzma_coder *coder, const lzma_allocator *allocator,
  66. const uint8_t *in, size_t *in_pos, size_t in_size,
  67. lzma_action action)
  68. {
  69. assert(coder->mf.read_pos <= coder->mf.write_pos);
  70. // Move the sliding window if needed.
  71. if (coder->mf.read_pos >= coder->mf.size - coder->mf.keep_size_after)
  72. move_window(&coder->mf);
  73. // Maybe this is ugly, but lzma_mf uses uint32_t for most things
  74. // (which I find cleanest), but we need size_t here when filling
  75. // the history window.
  76. size_t write_pos = coder->mf.write_pos;
  77. lzma_ret ret;
  78. if (coder->next.code == NULL) {
  79. // Not using a filter, simply memcpy() as much as possible.
  80. lzma_bufcpy(in, in_pos, in_size, coder->mf.buffer,
  81. &write_pos, coder->mf.size);
  82. ret = action != LZMA_RUN && *in_pos == in_size
  83. ? LZMA_STREAM_END : LZMA_OK;
  84. } else {
  85. ret = coder->next.code(coder->next.coder, allocator,
  86. in, in_pos, in_size,
  87. coder->mf.buffer, &write_pos,
  88. coder->mf.size, action);
  89. }
  90. coder->mf.write_pos = write_pos;
  91. // Silence Valgrind. lzma_memcmplen() can read extra bytes
  92. // and Valgrind will give warnings if those bytes are uninitialized
  93. // because Valgrind cannot see that the values of the uninitialized
  94. // bytes are eventually ignored.
  95. memzero(coder->mf.buffer + write_pos, LZMA_MEMCMPLEN_EXTRA);
  96. // If end of stream has been reached or flushing completed, we allow
  97. // the encoder to process all the input (that is, read_pos is allowed
  98. // to reach write_pos). Otherwise we keep keep_size_after bytes
  99. // available as prebuffer.
  100. if (ret == LZMA_STREAM_END) {
  101. assert(*in_pos == in_size);
  102. ret = LZMA_OK;
  103. coder->mf.action = action;
  104. coder->mf.read_limit = coder->mf.write_pos;
  105. } else if (coder->mf.write_pos > coder->mf.keep_size_after) {
  106. // This needs to be done conditionally, because if we got
  107. // only little new input, there may be too little input
  108. // to do any encoding yet.
  109. coder->mf.read_limit = coder->mf.write_pos
  110. - coder->mf.keep_size_after;
  111. }
  112. // Restart the match finder after finished LZMA_SYNC_FLUSH.
  113. if (coder->mf.pending > 0
  114. && coder->mf.read_pos < coder->mf.read_limit) {
  115. // Match finder may update coder->pending and expects it to
  116. // start from zero, so use a temporary variable.
  117. const uint32_t pending = coder->mf.pending;
  118. coder->mf.pending = 0;
  119. // Rewind read_pos so that the match finder can hash
  120. // the pending bytes.
  121. assert(coder->mf.read_pos >= pending);
  122. coder->mf.read_pos -= pending;
  123. // Call the skip function directly instead of using
  124. // mf_skip(), since we don't want to touch mf->read_ahead.
  125. coder->mf.skip(&coder->mf, pending);
  126. }
  127. return ret;
  128. }
  129. static lzma_ret
  130. lz_encode(void *coder_ptr, const lzma_allocator *allocator,
  131. const uint8_t *restrict in, size_t *restrict in_pos,
  132. size_t in_size,
  133. uint8_t *restrict out, size_t *restrict out_pos,
  134. size_t out_size, lzma_action action)
  135. {
  136. lzma_coder *coder = coder_ptr;
  137. while (*out_pos < out_size
  138. && (*in_pos < in_size || action != LZMA_RUN)) {
  139. // Read more data to coder->mf.buffer if needed.
  140. if (coder->mf.action == LZMA_RUN && coder->mf.read_pos
  141. >= coder->mf.read_limit)
  142. return_if_error(fill_window(coder, allocator,
  143. in, in_pos, in_size, action));
  144. // Encode
  145. const lzma_ret ret = coder->lz.code(coder->lz.coder,
  146. &coder->mf, out, out_pos, out_size);
  147. if (ret != LZMA_OK) {
  148. // Setting this to LZMA_RUN for cases when we are
  149. // flushing. It doesn't matter when finishing or if
  150. // an error occurred.
  151. coder->mf.action = LZMA_RUN;
  152. return ret;
  153. }
  154. }
  155. return LZMA_OK;
  156. }
  157. static bool
  158. lz_encoder_prepare(lzma_mf *mf, const lzma_allocator *allocator,
  159. const lzma_lz_options *lz_options)
  160. {
  161. // For now, the dictionary size is limited to 1.5 GiB. This may grow
  162. // in the future if needed, but it needs a little more work than just
  163. // changing this check.
  164. if (lz_options->dict_size < LZMA_DICT_SIZE_MIN
  165. || lz_options->dict_size
  166. > (UINT32_C(1) << 30) + (UINT32_C(1) << 29)
  167. || lz_options->nice_len > lz_options->match_len_max)
  168. return true;
  169. mf->keep_size_before = lz_options->before_size + lz_options->dict_size;
  170. mf->keep_size_after = lz_options->after_size
  171. + lz_options->match_len_max;
  172. // To avoid constant memmove()s, allocate some extra space. Since
  173. // memmove()s become more expensive when the size of the buffer
  174. // increases, we reserve more space when a large dictionary is
  175. // used to make the memmove() calls rarer.
  176. //
  177. // This works with dictionaries up to about 3 GiB. If bigger
  178. // dictionary is wanted, some extra work is needed:
  179. // - Several variables in lzma_mf have to be changed from uint32_t
  180. // to size_t.
  181. // - Memory usage calculation needs something too, e.g. use uint64_t
  182. // for mf->size.
  183. uint32_t reserve = lz_options->dict_size / 2;
  184. if (reserve > (UINT32_C(1) << 30))
  185. reserve /= 2;
  186. reserve += (lz_options->before_size + lz_options->match_len_max
  187. + lz_options->after_size) / 2 + (UINT32_C(1) << 19);
  188. const uint32_t old_size = mf->size;
  189. mf->size = mf->keep_size_before + reserve + mf->keep_size_after;
  190. // Deallocate the old history buffer if it exists but has different
  191. // size than what is needed now.
  192. if (mf->buffer != NULL && old_size != mf->size) {
  193. lzma_free(mf->buffer, allocator);
  194. mf->buffer = NULL;
  195. }
  196. // Match finder options
  197. mf->match_len_max = lz_options->match_len_max;
  198. mf->nice_len = lz_options->nice_len;
  199. // cyclic_size has to stay smaller than 2 Gi. Note that this doesn't
  200. // mean limiting dictionary size to less than 2 GiB. With a match
  201. // finder that uses multibyte resolution (hashes start at e.g. every
  202. // fourth byte), cyclic_size would stay below 2 Gi even when
  203. // dictionary size is greater than 2 GiB.
  204. //
  205. // It would be possible to allow cyclic_size >= 2 Gi, but then we
  206. // would need to be careful to use 64-bit types in various places
  207. // (size_t could do since we would need bigger than 32-bit address
  208. // space anyway). It would also require either zeroing a multigigabyte
  209. // buffer at initialization (waste of time and RAM) or allow
  210. // normalization in lz_encoder_mf.c to access uninitialized
  211. // memory to keep the code simpler. The current way is simple and
  212. // still allows pretty big dictionaries, so I don't expect these
  213. // limits to change.
  214. mf->cyclic_size = lz_options->dict_size + 1;
  215. // Validate the match finder ID and setup the function pointers.
  216. switch (lz_options->match_finder) {
  217. #ifdef HAVE_MF_HC3
  218. case LZMA_MF_HC3:
  219. mf->find = &lzma_mf_hc3_find;
  220. mf->skip = &lzma_mf_hc3_skip;
  221. break;
  222. #endif
  223. #ifdef HAVE_MF_HC4
  224. case LZMA_MF_HC4:
  225. mf->find = &lzma_mf_hc4_find;
  226. mf->skip = &lzma_mf_hc4_skip;
  227. break;
  228. #endif
  229. #ifdef HAVE_MF_BT2
  230. case LZMA_MF_BT2:
  231. mf->find = &lzma_mf_bt2_find;
  232. mf->skip = &lzma_mf_bt2_skip;
  233. break;
  234. #endif
  235. #ifdef HAVE_MF_BT3
  236. case LZMA_MF_BT3:
  237. mf->find = &lzma_mf_bt3_find;
  238. mf->skip = &lzma_mf_bt3_skip;
  239. break;
  240. #endif
  241. #ifdef HAVE_MF_BT4
  242. case LZMA_MF_BT4:
  243. mf->find = &lzma_mf_bt4_find;
  244. mf->skip = &lzma_mf_bt4_skip;
  245. break;
  246. #endif
  247. default:
  248. return true;
  249. }
  250. // Calculate the sizes of mf->hash and mf->son and check that
  251. // nice_len is big enough for the selected match finder.
  252. const uint32_t hash_bytes = lz_options->match_finder & 0x0F;
  253. if (hash_bytes > mf->nice_len)
  254. return true;
  255. const bool is_bt = (lz_options->match_finder & 0x10) != 0;
  256. uint32_t hs;
  257. if (hash_bytes == 2) {
  258. hs = 0xFFFF;
  259. } else {
  260. // Round dictionary size up to the next 2^n - 1 so it can
  261. // be used as a hash mask.
  262. hs = lz_options->dict_size - 1;
  263. hs |= hs >> 1;
  264. hs |= hs >> 2;
  265. hs |= hs >> 4;
  266. hs |= hs >> 8;
  267. hs >>= 1;
  268. hs |= 0xFFFF;
  269. if (hs > (UINT32_C(1) << 24)) {
  270. if (hash_bytes == 3)
  271. hs = (UINT32_C(1) << 24) - 1;
  272. else
  273. hs >>= 1;
  274. }
  275. }
  276. mf->hash_mask = hs;
  277. ++hs;
  278. if (hash_bytes > 2)
  279. hs += HASH_2_SIZE;
  280. if (hash_bytes > 3)
  281. hs += HASH_3_SIZE;
  282. /*
  283. No match finder uses this at the moment.
  284. if (mf->hash_bytes > 4)
  285. hs += HASH_4_SIZE;
  286. */
  287. const uint32_t old_hash_count = mf->hash_count;
  288. const uint32_t old_sons_count = mf->sons_count;
  289. mf->hash_count = hs;
  290. mf->sons_count = mf->cyclic_size;
  291. if (is_bt)
  292. mf->sons_count *= 2;
  293. // Deallocate the old hash array if it exists and has different size
  294. // than what is needed now.
  295. if (old_hash_count != mf->hash_count
  296. || old_sons_count != mf->sons_count) {
  297. lzma_free(mf->hash, allocator);
  298. mf->hash = NULL;
  299. lzma_free(mf->son, allocator);
  300. mf->son = NULL;
  301. }
  302. // Maximum number of match finder cycles
  303. mf->depth = lz_options->depth;
  304. if (mf->depth == 0) {
  305. if (is_bt)
  306. mf->depth = 16 + mf->nice_len / 2;
  307. else
  308. mf->depth = 4 + mf->nice_len / 4;
  309. }
  310. return false;
  311. }
  312. static bool
  313. lz_encoder_init(lzma_mf *mf, const lzma_allocator *allocator,
  314. const lzma_lz_options *lz_options)
  315. {
  316. // Allocate the history buffer.
  317. if (mf->buffer == NULL) {
  318. // lzma_memcmplen() is used for the dictionary buffer
  319. // so we need to allocate a few extra bytes to prevent
  320. // it from reading past the end of the buffer.
  321. mf->buffer = lzma_alloc(mf->size + LZMA_MEMCMPLEN_EXTRA,
  322. allocator);
  323. if (mf->buffer == NULL)
  324. return true;
  325. // Keep Valgrind happy with lzma_memcmplen() and initialize
  326. // the extra bytes whose value may get read but which will
  327. // effectively get ignored.
  328. memzero(mf->buffer + mf->size, LZMA_MEMCMPLEN_EXTRA);
  329. }
  330. // Use cyclic_size as initial mf->offset. This allows
  331. // avoiding a few branches in the match finders. The downside is
  332. // that match finder needs to be normalized more often, which may
  333. // hurt performance with huge dictionaries.
  334. mf->offset = mf->cyclic_size;
  335. mf->read_pos = 0;
  336. mf->read_ahead = 0;
  337. mf->read_limit = 0;
  338. mf->write_pos = 0;
  339. mf->pending = 0;
  340. #if UINT32_MAX >= SIZE_MAX / 4
  341. // Check for integer overflow. (Huge dictionaries are not
  342. // possible on 32-bit CPU.)
  343. if (mf->hash_count > SIZE_MAX / sizeof(uint32_t)
  344. || mf->sons_count > SIZE_MAX / sizeof(uint32_t))
  345. return true;
  346. #endif
  347. // Allocate and initialize the hash table. Since EMPTY_HASH_VALUE
  348. // is zero, we can use lzma_alloc_zero() or memzero() for mf->hash.
  349. //
  350. // We don't need to initialize mf->son, but not doing that may
  351. // make Valgrind complain in normalization (see normalize() in
  352. // lz_encoder_mf.c). Skipping the initialization is *very* good
  353. // when big dictionary is used but only small amount of data gets
  354. // actually compressed: most of the mf->son won't get actually
  355. // allocated by the kernel, so we avoid wasting RAM and improve
  356. // initialization speed a lot.
  357. if (mf->hash == NULL) {
  358. mf->hash = lzma_alloc_zero(mf->hash_count * sizeof(uint32_t),
  359. allocator);
  360. mf->son = lzma_alloc(mf->sons_count * sizeof(uint32_t),
  361. allocator);
  362. if (mf->hash == NULL || mf->son == NULL) {
  363. lzma_free(mf->hash, allocator);
  364. mf->hash = NULL;
  365. lzma_free(mf->son, allocator);
  366. mf->son = NULL;
  367. return true;
  368. }
  369. } else {
  370. /*
  371. for (uint32_t i = 0; i < mf->hash_count; ++i)
  372. mf->hash[i] = EMPTY_HASH_VALUE;
  373. */
  374. memzero(mf->hash, mf->hash_count * sizeof(uint32_t));
  375. }
  376. mf->cyclic_pos = 0;
  377. // Handle preset dictionary.
  378. if (lz_options->preset_dict != NULL
  379. && lz_options->preset_dict_size > 0) {
  380. // If the preset dictionary is bigger than the actual
  381. // dictionary, use only the tail.
  382. mf->write_pos = my_min(lz_options->preset_dict_size, mf->size);
  383. memcpy(mf->buffer, lz_options->preset_dict
  384. + lz_options->preset_dict_size - mf->write_pos,
  385. mf->write_pos);
  386. mf->action = LZMA_SYNC_FLUSH;
  387. mf->skip(mf, mf->write_pos);
  388. }
  389. mf->action = LZMA_RUN;
  390. return false;
  391. }
  392. extern uint64_t
  393. lzma_lz_encoder_memusage(const lzma_lz_options *lz_options)
  394. {
  395. // Old buffers must not exist when calling lz_encoder_prepare().
  396. lzma_mf mf = {
  397. .buffer = NULL,
  398. .hash = NULL,
  399. .son = NULL,
  400. .hash_count = 0,
  401. .sons_count = 0,
  402. };
  403. // Setup the size information into mf.
  404. if (lz_encoder_prepare(&mf, NULL, lz_options))
  405. return UINT64_MAX;
  406. // Calculate the memory usage.
  407. return ((uint64_t)(mf.hash_count) + mf.sons_count) * sizeof(uint32_t)
  408. + mf.size + sizeof(lzma_coder);
  409. }
  410. static void
  411. lz_encoder_end(void *coder_ptr, const lzma_allocator *allocator)
  412. {
  413. lzma_coder *coder = coder_ptr;
  414. lzma_next_end(&coder->next, allocator);
  415. lzma_free(coder->mf.son, allocator);
  416. lzma_free(coder->mf.hash, allocator);
  417. lzma_free(coder->mf.buffer, allocator);
  418. if (coder->lz.end != NULL)
  419. coder->lz.end(coder->lz.coder, allocator);
  420. else
  421. lzma_free(coder->lz.coder, allocator);
  422. lzma_free(coder, allocator);
  423. return;
  424. }
  425. static lzma_ret
  426. lz_encoder_update(void *coder_ptr, const lzma_allocator *allocator,
  427. const lzma_filter *filters_null lzma_attribute((__unused__)),
  428. const lzma_filter *reversed_filters)
  429. {
  430. lzma_coder *coder = coder_ptr;
  431. if (coder->lz.options_update == NULL)
  432. return LZMA_PROG_ERROR;
  433. return_if_error(coder->lz.options_update(
  434. coder->lz.coder, reversed_filters));
  435. return lzma_next_filter_update(
  436. &coder->next, allocator, reversed_filters + 1);
  437. }
  438. extern lzma_ret
  439. lzma_lz_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
  440. const lzma_filter_info *filters,
  441. lzma_ret (*lz_init)(lzma_lz_encoder *lz,
  442. const lzma_allocator *allocator, const void *options,
  443. lzma_lz_options *lz_options))
  444. {
  445. #ifdef HAVE_SMALL
  446. // We need that the CRC32 table has been initialized.
  447. lzma_crc32_init();
  448. #endif
  449. // Allocate and initialize the base data structure.
  450. lzma_coder *coder = next->coder;
  451. if (coder == NULL) {
  452. coder = lzma_alloc(sizeof(lzma_coder), allocator);
  453. if (coder == NULL)
  454. return LZMA_MEM_ERROR;
  455. next->coder = coder;
  456. next->code = &lz_encode;
  457. next->end = &lz_encoder_end;
  458. next->update = &lz_encoder_update;
  459. coder->lz.coder = NULL;
  460. coder->lz.code = NULL;
  461. coder->lz.end = NULL;
  462. // mf.size is initialized to silence Valgrind
  463. // when used on optimized binaries (GCC may reorder
  464. // code in a way that Valgrind gets unhappy).
  465. coder->mf.buffer = NULL;
  466. coder->mf.size = 0;
  467. coder->mf.hash = NULL;
  468. coder->mf.son = NULL;
  469. coder->mf.hash_count = 0;
  470. coder->mf.sons_count = 0;
  471. coder->next = LZMA_NEXT_CODER_INIT;
  472. }
  473. // Initialize the LZ-based encoder.
  474. lzma_lz_options lz_options;
  475. return_if_error(lz_init(&coder->lz, allocator,
  476. filters[0].options, &lz_options));
  477. // Setup the size information into coder->mf and deallocate
  478. // old buffers if they have wrong size.
  479. if (lz_encoder_prepare(&coder->mf, allocator, &lz_options))
  480. return LZMA_OPTIONS_ERROR;
  481. // Allocate new buffers if needed, and do the rest of
  482. // the initialization.
  483. if (lz_encoder_init(&coder->mf, allocator, &lz_options))
  484. return LZMA_MEM_ERROR;
  485. // Initialize the next filter in the chain, if any.
  486. return lzma_next_filter_init(&coder->next, allocator, filters + 1);
  487. }
  488. extern LZMA_API(lzma_bool)
  489. lzma_mf_is_supported(lzma_match_finder mf)
  490. {
  491. bool ret = false;
  492. #ifdef HAVE_MF_HC3
  493. if (mf == LZMA_MF_HC3)
  494. ret = true;
  495. #endif
  496. #ifdef HAVE_MF_HC4
  497. if (mf == LZMA_MF_HC4)
  498. ret = true;
  499. #endif
  500. #ifdef HAVE_MF_BT2
  501. if (mf == LZMA_MF_BT2)
  502. ret = true;
  503. #endif
  504. #ifdef HAVE_MF_BT3
  505. if (mf == LZMA_MF_BT3)
  506. ret = true;
  507. #endif
  508. #ifdef HAVE_MF_BT4
  509. if (mf == LZMA_MF_BT4)
  510. ret = true;
  511. #endif
  512. return ret;
  513. }