lzma_decoder.c 27 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064
  1. ///////////////////////////////////////////////////////////////////////////////
  2. //
  3. /// \file lzma_decoder.c
  4. /// \brief LZMA decoder
  5. ///
  6. // Authors: Igor Pavlov
  7. // Lasse Collin
  8. //
  9. // This file has been put into the public domain.
  10. // You can do whatever you want with this file.
  11. //
  12. ///////////////////////////////////////////////////////////////////////////////
  13. #include "lz_decoder.h"
  14. #include "lzma_common.h"
  15. #include "lzma_decoder.h"
  16. #include "range_decoder.h"
  17. // The macros unroll loops with switch statements.
  18. // Silence warnings about missing fall-through comments.
  19. #if TUKLIB_GNUC_REQ(7, 0)
  20. # pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
  21. #endif
  22. #ifdef HAVE_SMALL
  23. // Macros for (somewhat) size-optimized code.
  24. #define seq_4(seq) seq
  25. #define seq_6(seq) seq
  26. #define seq_8(seq) seq
  27. #define seq_len(seq) \
  28. seq ## _CHOICE, \
  29. seq ## _CHOICE2, \
  30. seq ## _BITTREE
  31. #define len_decode(target, ld, pos_state, seq) \
  32. do { \
  33. case seq ## _CHOICE: \
  34. rc_if_0(ld.choice, seq ## _CHOICE) { \
  35. rc_update_0(ld.choice); \
  36. probs = ld.low[pos_state];\
  37. limit = LEN_LOW_SYMBOLS; \
  38. target = MATCH_LEN_MIN; \
  39. } else { \
  40. rc_update_1(ld.choice); \
  41. case seq ## _CHOICE2: \
  42. rc_if_0(ld.choice2, seq ## _CHOICE2) { \
  43. rc_update_0(ld.choice2); \
  44. probs = ld.mid[pos_state]; \
  45. limit = LEN_MID_SYMBOLS; \
  46. target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
  47. } else { \
  48. rc_update_1(ld.choice2); \
  49. probs = ld.high; \
  50. limit = LEN_HIGH_SYMBOLS; \
  51. target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS \
  52. + LEN_MID_SYMBOLS; \
  53. } \
  54. } \
  55. symbol = 1; \
  56. case seq ## _BITTREE: \
  57. do { \
  58. rc_bit(probs[symbol], , , seq ## _BITTREE); \
  59. } while (symbol < limit); \
  60. target += symbol - limit; \
  61. } while (0)
  62. #else // HAVE_SMALL
  63. // Unrolled versions
  64. #define seq_4(seq) \
  65. seq ## 0, \
  66. seq ## 1, \
  67. seq ## 2, \
  68. seq ## 3
  69. #define seq_6(seq) \
  70. seq ## 0, \
  71. seq ## 1, \
  72. seq ## 2, \
  73. seq ## 3, \
  74. seq ## 4, \
  75. seq ## 5
  76. #define seq_8(seq) \
  77. seq ## 0, \
  78. seq ## 1, \
  79. seq ## 2, \
  80. seq ## 3, \
  81. seq ## 4, \
  82. seq ## 5, \
  83. seq ## 6, \
  84. seq ## 7
  85. #define seq_len(seq) \
  86. seq ## _CHOICE, \
  87. seq ## _LOW0, \
  88. seq ## _LOW1, \
  89. seq ## _LOW2, \
  90. seq ## _CHOICE2, \
  91. seq ## _MID0, \
  92. seq ## _MID1, \
  93. seq ## _MID2, \
  94. seq ## _HIGH0, \
  95. seq ## _HIGH1, \
  96. seq ## _HIGH2, \
  97. seq ## _HIGH3, \
  98. seq ## _HIGH4, \
  99. seq ## _HIGH5, \
  100. seq ## _HIGH6, \
  101. seq ## _HIGH7
  102. #define len_decode(target, ld, pos_state, seq) \
  103. do { \
  104. symbol = 1; \
  105. case seq ## _CHOICE: \
  106. rc_if_0(ld.choice, seq ## _CHOICE) { \
  107. rc_update_0(ld.choice); \
  108. rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW0); \
  109. rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW1); \
  110. rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW2); \
  111. target = symbol - LEN_LOW_SYMBOLS + MATCH_LEN_MIN; \
  112. } else { \
  113. rc_update_1(ld.choice); \
  114. case seq ## _CHOICE2: \
  115. rc_if_0(ld.choice2, seq ## _CHOICE2) { \
  116. rc_update_0(ld.choice2); \
  117. rc_bit_case(ld.mid[pos_state][symbol], , , \
  118. seq ## _MID0); \
  119. rc_bit_case(ld.mid[pos_state][symbol], , , \
  120. seq ## _MID1); \
  121. rc_bit_case(ld.mid[pos_state][symbol], , , \
  122. seq ## _MID2); \
  123. target = symbol - LEN_MID_SYMBOLS \
  124. + MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
  125. } else { \
  126. rc_update_1(ld.choice2); \
  127. rc_bit_case(ld.high[symbol], , , seq ## _HIGH0); \
  128. rc_bit_case(ld.high[symbol], , , seq ## _HIGH1); \
  129. rc_bit_case(ld.high[symbol], , , seq ## _HIGH2); \
  130. rc_bit_case(ld.high[symbol], , , seq ## _HIGH3); \
  131. rc_bit_case(ld.high[symbol], , , seq ## _HIGH4); \
  132. rc_bit_case(ld.high[symbol], , , seq ## _HIGH5); \
  133. rc_bit_case(ld.high[symbol], , , seq ## _HIGH6); \
  134. rc_bit_case(ld.high[symbol], , , seq ## _HIGH7); \
  135. target = symbol - LEN_HIGH_SYMBOLS \
  136. + MATCH_LEN_MIN \
  137. + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \
  138. } \
  139. } \
  140. } while (0)
  141. #endif // HAVE_SMALL
  142. /// Length decoder probabilities; see comments in lzma_common.h.
  143. typedef struct {
  144. probability choice;
  145. probability choice2;
  146. probability low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
  147. probability mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
  148. probability high[LEN_HIGH_SYMBOLS];
  149. } lzma_length_decoder;
  150. typedef struct {
  151. ///////////////////
  152. // Probabilities //
  153. ///////////////////
  154. /// Literals; see comments in lzma_common.h.
  155. probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
  156. /// If 1, it's a match. Otherwise it's a single 8-bit literal.
  157. probability is_match[STATES][POS_STATES_MAX];
  158. /// If 1, it's a repeated match. The distance is one of rep0 .. rep3.
  159. probability is_rep[STATES];
  160. /// If 0, distance of a repeated match is rep0.
  161. /// Otherwise check is_rep1.
  162. probability is_rep0[STATES];
  163. /// If 0, distance of a repeated match is rep1.
  164. /// Otherwise check is_rep2.
  165. probability is_rep1[STATES];
  166. /// If 0, distance of a repeated match is rep2. Otherwise it is rep3.
  167. probability is_rep2[STATES];
  168. /// If 1, the repeated match has length of one byte. Otherwise
  169. /// the length is decoded from rep_len_decoder.
  170. probability is_rep0_long[STATES][POS_STATES_MAX];
  171. /// Probability tree for the highest two bits of the match distance.
  172. /// There is a separate probability tree for match lengths of
  173. /// 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
  174. probability dist_slot[DIST_STATES][DIST_SLOTS];
  175. /// Probability trees for additional bits for match distance when the
  176. /// distance is in the range [4, 127].
  177. probability pos_special[FULL_DISTANCES - DIST_MODEL_END];
  178. /// Probability tree for the lowest four bits of a match distance
  179. /// that is equal to or greater than 128.
  180. probability pos_align[ALIGN_SIZE];
  181. /// Length of a normal match
  182. lzma_length_decoder match_len_decoder;
  183. /// Length of a repeated match
  184. lzma_length_decoder rep_len_decoder;
  185. ///////////////////
  186. // Decoder state //
  187. ///////////////////
  188. // Range coder
  189. lzma_range_decoder rc;
  190. // Types of the most recently seen LZMA symbols
  191. lzma_lzma_state state;
  192. uint32_t rep0; ///< Distance of the latest match
  193. uint32_t rep1; ///< Distance of second latest match
  194. uint32_t rep2; ///< Distance of third latest match
  195. uint32_t rep3; ///< Distance of fourth latest match
  196. uint32_t pos_mask; // (1U << pb) - 1
  197. uint32_t literal_context_bits;
  198. uint32_t literal_pos_mask;
  199. /// Uncompressed size as bytes, or LZMA_VLI_UNKNOWN if end of
  200. /// payload marker is expected.
  201. lzma_vli uncompressed_size;
  202. ////////////////////////////////
  203. // State of incomplete symbol //
  204. ////////////////////////////////
  205. /// Position where to continue the decoder loop
  206. enum {
  207. SEQ_NORMALIZE,
  208. SEQ_IS_MATCH,
  209. seq_8(SEQ_LITERAL),
  210. seq_8(SEQ_LITERAL_MATCHED),
  211. SEQ_LITERAL_WRITE,
  212. SEQ_IS_REP,
  213. seq_len(SEQ_MATCH_LEN),
  214. seq_6(SEQ_DIST_SLOT),
  215. SEQ_DIST_MODEL,
  216. SEQ_DIRECT,
  217. seq_4(SEQ_ALIGN),
  218. SEQ_EOPM,
  219. SEQ_IS_REP0,
  220. SEQ_SHORTREP,
  221. SEQ_IS_REP0_LONG,
  222. SEQ_IS_REP1,
  223. SEQ_IS_REP2,
  224. seq_len(SEQ_REP_LEN),
  225. SEQ_COPY,
  226. } sequence;
  227. /// Base of the current probability tree
  228. probability *probs;
  229. /// Symbol being decoded. This is also used as an index variable in
  230. /// bittree decoders: probs[symbol]
  231. uint32_t symbol;
  232. /// Used as a loop termination condition on bittree decoders and
  233. /// direct bits decoder.
  234. uint32_t limit;
  235. /// Matched literal decoder: 0x100 or 0 to help avoiding branches.
  236. /// Bittree reverse decoders: Offset of the next bit: 1 << offset
  237. uint32_t offset;
  238. /// If decoding a literal: match byte.
  239. /// If decoding a match: length of the match.
  240. uint32_t len;
  241. } lzma_lzma1_decoder;
  242. static lzma_ret
  243. lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
  244. const uint8_t *restrict in,
  245. size_t *restrict in_pos, size_t in_size)
  246. {
  247. lzma_lzma1_decoder *restrict coder = coder_ptr;
  248. ////////////////////
  249. // Initialization //
  250. ////////////////////
  251. {
  252. const lzma_ret ret = rc_read_init(
  253. &coder->rc, in, in_pos, in_size);
  254. if (ret != LZMA_STREAM_END)
  255. return ret;
  256. }
  257. ///////////////
  258. // Variables //
  259. ///////////////
  260. // Making local copies of often-used variables improves both
  261. // speed and readability.
  262. lzma_dict dict = *dictptr;
  263. const size_t dict_start = dict.pos;
  264. // Range decoder
  265. rc_to_local(coder->rc, *in_pos);
  266. // State
  267. uint32_t state = coder->state;
  268. uint32_t rep0 = coder->rep0;
  269. uint32_t rep1 = coder->rep1;
  270. uint32_t rep2 = coder->rep2;
  271. uint32_t rep3 = coder->rep3;
  272. const uint32_t pos_mask = coder->pos_mask;
  273. // These variables are actually needed only if we last time ran
  274. // out of input in the middle of the decoder loop.
  275. probability *probs = coder->probs;
  276. uint32_t symbol = coder->symbol;
  277. uint32_t limit = coder->limit;
  278. uint32_t offset = coder->offset;
  279. uint32_t len = coder->len;
  280. const uint32_t literal_pos_mask = coder->literal_pos_mask;
  281. const uint32_t literal_context_bits = coder->literal_context_bits;
  282. // Temporary variables
  283. uint32_t pos_state = dict.pos & pos_mask;
  284. lzma_ret ret = LZMA_OK;
  285. // If uncompressed size is known, there must be no end of payload
  286. // marker.
  287. const bool no_eopm = coder->uncompressed_size
  288. != LZMA_VLI_UNKNOWN;
  289. if (no_eopm && coder->uncompressed_size < dict.limit - dict.pos)
  290. dict.limit = dict.pos + (size_t)(coder->uncompressed_size);
  291. // The main decoder loop. The "switch" is used to restart the decoder at
  292. // correct location. Once restarted, the "switch" is no longer used.
  293. switch (coder->sequence)
  294. while (true) {
  295. // Calculate new pos_state. This is skipped on the first loop
  296. // since we already calculated it when setting up the local
  297. // variables.
  298. pos_state = dict.pos & pos_mask;
  299. case SEQ_NORMALIZE:
  300. case SEQ_IS_MATCH:
  301. if (unlikely(no_eopm && dict.pos == dict.limit))
  302. break;
  303. rc_if_0(coder->is_match[state][pos_state], SEQ_IS_MATCH) {
  304. rc_update_0(coder->is_match[state][pos_state]);
  305. // It's a literal i.e. a single 8-bit byte.
  306. probs = literal_subcoder(coder->literal,
  307. literal_context_bits, literal_pos_mask,
  308. dict.pos, dict_get(&dict, 0));
  309. symbol = 1;
  310. if (is_literal_state(state)) {
  311. // Decode literal without match byte.
  312. #ifdef HAVE_SMALL
  313. case SEQ_LITERAL:
  314. do {
  315. rc_bit(probs[symbol], , , SEQ_LITERAL);
  316. } while (symbol < (1 << 8));
  317. #else
  318. rc_bit_case(probs[symbol], , , SEQ_LITERAL0);
  319. rc_bit_case(probs[symbol], , , SEQ_LITERAL1);
  320. rc_bit_case(probs[symbol], , , SEQ_LITERAL2);
  321. rc_bit_case(probs[symbol], , , SEQ_LITERAL3);
  322. rc_bit_case(probs[symbol], , , SEQ_LITERAL4);
  323. rc_bit_case(probs[symbol], , , SEQ_LITERAL5);
  324. rc_bit_case(probs[symbol], , , SEQ_LITERAL6);
  325. rc_bit_case(probs[symbol], , , SEQ_LITERAL7);
  326. #endif
  327. } else {
  328. // Decode literal with match byte.
  329. //
  330. // We store the byte we compare against
  331. // ("match byte") to "len" to minimize the
  332. // number of variables we need to store
  333. // between decoder calls.
  334. len = (uint32_t)(dict_get(&dict, rep0)) << 1;
  335. // The usage of "offset" allows omitting some
  336. // branches, which should give tiny speed
  337. // improvement on some CPUs. "offset" gets
  338. // set to zero if match_bit didn't match.
  339. offset = 0x100;
  340. #ifdef HAVE_SMALL
  341. case SEQ_LITERAL_MATCHED:
  342. do {
  343. const uint32_t match_bit
  344. = len & offset;
  345. const uint32_t subcoder_index
  346. = offset + match_bit
  347. + symbol;
  348. rc_bit(probs[subcoder_index],
  349. offset &= ~match_bit,
  350. offset &= match_bit,
  351. SEQ_LITERAL_MATCHED);
  352. // It seems to be faster to do this
  353. // here instead of putting it to the
  354. // beginning of the loop and then
  355. // putting the "case" in the middle
  356. // of the loop.
  357. len <<= 1;
  358. } while (symbol < (1 << 8));
  359. #else
  360. // Unroll the loop.
  361. uint32_t match_bit;
  362. uint32_t subcoder_index;
  363. # define d(seq) \
  364. case seq: \
  365. match_bit = len & offset; \
  366. subcoder_index = offset + match_bit + symbol; \
  367. rc_bit(probs[subcoder_index], \
  368. offset &= ~match_bit, \
  369. offset &= match_bit, \
  370. seq)
  371. d(SEQ_LITERAL_MATCHED0);
  372. len <<= 1;
  373. d(SEQ_LITERAL_MATCHED1);
  374. len <<= 1;
  375. d(SEQ_LITERAL_MATCHED2);
  376. len <<= 1;
  377. d(SEQ_LITERAL_MATCHED3);
  378. len <<= 1;
  379. d(SEQ_LITERAL_MATCHED4);
  380. len <<= 1;
  381. d(SEQ_LITERAL_MATCHED5);
  382. len <<= 1;
  383. d(SEQ_LITERAL_MATCHED6);
  384. len <<= 1;
  385. d(SEQ_LITERAL_MATCHED7);
  386. # undef d
  387. #endif
  388. }
  389. //update_literal(state);
  390. // Use a lookup table to update to literal state,
  391. // since compared to other state updates, this would
  392. // need two branches.
  393. static const lzma_lzma_state next_state[] = {
  394. STATE_LIT_LIT,
  395. STATE_LIT_LIT,
  396. STATE_LIT_LIT,
  397. STATE_LIT_LIT,
  398. STATE_MATCH_LIT_LIT,
  399. STATE_REP_LIT_LIT,
  400. STATE_SHORTREP_LIT_LIT,
  401. STATE_MATCH_LIT,
  402. STATE_REP_LIT,
  403. STATE_SHORTREP_LIT,
  404. STATE_MATCH_LIT,
  405. STATE_REP_LIT
  406. };
  407. state = next_state[state];
  408. case SEQ_LITERAL_WRITE:
  409. if (unlikely(dict_put(&dict, symbol))) {
  410. coder->sequence = SEQ_LITERAL_WRITE;
  411. goto out;
  412. }
  413. continue;
  414. }
  415. // Instead of a new byte we are going to get a byte range
  416. // (distance and length) which will be repeated from our
  417. // output history.
  418. rc_update_1(coder->is_match[state][pos_state]);
  419. case SEQ_IS_REP:
  420. rc_if_0(coder->is_rep[state], SEQ_IS_REP) {
  421. // Not a repeated match
  422. rc_update_0(coder->is_rep[state]);
  423. update_match(state);
  424. // The latest three match distances are kept in
  425. // memory in case there are repeated matches.
  426. rep3 = rep2;
  427. rep2 = rep1;
  428. rep1 = rep0;
  429. // Decode the length of the match.
  430. len_decode(len, coder->match_len_decoder,
  431. pos_state, SEQ_MATCH_LEN);
  432. // Prepare to decode the highest two bits of the
  433. // match distance.
  434. probs = coder->dist_slot[get_dist_state(len)];
  435. symbol = 1;
  436. #ifdef HAVE_SMALL
  437. case SEQ_DIST_SLOT:
  438. do {
  439. rc_bit(probs[symbol], , , SEQ_DIST_SLOT);
  440. } while (symbol < DIST_SLOTS);
  441. #else
  442. rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT0);
  443. rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT1);
  444. rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT2);
  445. rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT3);
  446. rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT4);
  447. rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT5);
  448. #endif
  449. // Get rid of the highest bit that was needed for
  450. // indexing of the probability array.
  451. symbol -= DIST_SLOTS;
  452. assert(symbol <= 63);
  453. if (symbol < DIST_MODEL_START) {
  454. // Match distances [0, 3] have only two bits.
  455. rep0 = symbol;
  456. } else {
  457. // Decode the lowest [1, 29] bits of
  458. // the match distance.
  459. limit = (symbol >> 1) - 1;
  460. assert(limit >= 1 && limit <= 30);
  461. rep0 = 2 + (symbol & 1);
  462. if (symbol < DIST_MODEL_END) {
  463. // Prepare to decode the low bits for
  464. // a distance of [4, 127].
  465. assert(limit <= 5);
  466. rep0 <<= limit;
  467. assert(rep0 <= 96);
  468. // -1 is fine, because we start
  469. // decoding at probs[1], not probs[0].
  470. // NOTE: This violates the C standard,
  471. // since we are doing pointer
  472. // arithmetic past the beginning of
  473. // the array.
  474. assert((int32_t)(rep0 - symbol - 1)
  475. >= -1);
  476. assert((int32_t)(rep0 - symbol - 1)
  477. <= 82);
  478. probs = coder->pos_special + rep0
  479. - symbol - 1;
  480. symbol = 1;
  481. offset = 0;
  482. case SEQ_DIST_MODEL:
  483. #ifdef HAVE_SMALL
  484. do {
  485. rc_bit(probs[symbol], ,
  486. rep0 += 1U << offset,
  487. SEQ_DIST_MODEL);
  488. } while (++offset < limit);
  489. #else
  490. switch (limit) {
  491. case 5:
  492. assert(offset == 0);
  493. rc_bit(probs[symbol], ,
  494. rep0 += 1U,
  495. SEQ_DIST_MODEL);
  496. ++offset;
  497. --limit;
  498. case 4:
  499. rc_bit(probs[symbol], ,
  500. rep0 += 1U << offset,
  501. SEQ_DIST_MODEL);
  502. ++offset;
  503. --limit;
  504. case 3:
  505. rc_bit(probs[symbol], ,
  506. rep0 += 1U << offset,
  507. SEQ_DIST_MODEL);
  508. ++offset;
  509. --limit;
  510. case 2:
  511. rc_bit(probs[symbol], ,
  512. rep0 += 1U << offset,
  513. SEQ_DIST_MODEL);
  514. ++offset;
  515. --limit;
  516. case 1:
  517. // We need "symbol" only for
  518. // indexing the probability
  519. // array, thus we can use
  520. // rc_bit_last() here to omit
  521. // the unneeded updating of
  522. // "symbol".
  523. rc_bit_last(probs[symbol], ,
  524. rep0 += 1U << offset,
  525. SEQ_DIST_MODEL);
  526. }
  527. #endif
  528. } else {
  529. // The distance is >= 128. Decode the
  530. // lower bits without probabilities
  531. // except the lowest four bits.
  532. assert(symbol >= 14);
  533. assert(limit >= 6);
  534. limit -= ALIGN_BITS;
  535. assert(limit >= 2);
  536. case SEQ_DIRECT:
  537. // Not worth manual unrolling
  538. do {
  539. rc_direct(rep0, SEQ_DIRECT);
  540. } while (--limit > 0);
  541. // Decode the lowest four bits using
  542. // probabilities.
  543. rep0 <<= ALIGN_BITS;
  544. symbol = 1;
  545. #ifdef HAVE_SMALL
  546. offset = 0;
  547. case SEQ_ALIGN:
  548. do {
  549. rc_bit(coder->pos_align[
  550. symbol], ,
  551. rep0 += 1U << offset,
  552. SEQ_ALIGN);
  553. } while (++offset < ALIGN_BITS);
  554. #else
  555. case SEQ_ALIGN0:
  556. rc_bit(coder->pos_align[symbol], ,
  557. rep0 += 1, SEQ_ALIGN0);
  558. case SEQ_ALIGN1:
  559. rc_bit(coder->pos_align[symbol], ,
  560. rep0 += 2, SEQ_ALIGN1);
  561. case SEQ_ALIGN2:
  562. rc_bit(coder->pos_align[symbol], ,
  563. rep0 += 4, SEQ_ALIGN2);
  564. case SEQ_ALIGN3:
  565. // Like in SEQ_DIST_MODEL, we don't
  566. // need "symbol" for anything else
  567. // than indexing the probability array.
  568. rc_bit_last(coder->pos_align[symbol], ,
  569. rep0 += 8, SEQ_ALIGN3);
  570. #endif
  571. if (rep0 == UINT32_MAX) {
  572. // End of payload marker was
  573. // found. It must not be
  574. // present if uncompressed
  575. // size is known.
  576. if (coder->uncompressed_size
  577. != LZMA_VLI_UNKNOWN) {
  578. ret = LZMA_DATA_ERROR;
  579. goto out;
  580. }
  581. case SEQ_EOPM:
  582. // LZMA1 stream with
  583. // end-of-payload marker.
  584. rc_normalize(SEQ_EOPM);
  585. ret = LZMA_STREAM_END;
  586. goto out;
  587. }
  588. }
  589. }
  590. // Validate the distance we just decoded.
  591. if (unlikely(!dict_is_distance_valid(&dict, rep0))) {
  592. ret = LZMA_DATA_ERROR;
  593. goto out;
  594. }
  595. } else {
  596. rc_update_1(coder->is_rep[state]);
  597. // Repeated match
  598. //
  599. // The match distance is a value that we have had
  600. // earlier. The latest four match distances are
  601. // available as rep0, rep1, rep2 and rep3. We will
  602. // now decode which of them is the new distance.
  603. //
  604. // There cannot be a match if we haven't produced
  605. // any output, so check that first.
  606. if (unlikely(!dict_is_distance_valid(&dict, 0))) {
  607. ret = LZMA_DATA_ERROR;
  608. goto out;
  609. }
  610. case SEQ_IS_REP0:
  611. rc_if_0(coder->is_rep0[state], SEQ_IS_REP0) {
  612. rc_update_0(coder->is_rep0[state]);
  613. // The distance is rep0.
  614. case SEQ_IS_REP0_LONG:
  615. rc_if_0(coder->is_rep0_long[state][pos_state],
  616. SEQ_IS_REP0_LONG) {
  617. rc_update_0(coder->is_rep0_long[
  618. state][pos_state]);
  619. update_short_rep(state);
  620. case SEQ_SHORTREP:
  621. if (unlikely(dict_put(&dict, dict_get(
  622. &dict, rep0)))) {
  623. coder->sequence = SEQ_SHORTREP;
  624. goto out;
  625. }
  626. continue;
  627. }
  628. // Repeating more than one byte at
  629. // distance of rep0.
  630. rc_update_1(coder->is_rep0_long[
  631. state][pos_state]);
  632. } else {
  633. rc_update_1(coder->is_rep0[state]);
  634. case SEQ_IS_REP1:
  635. // The distance is rep1, rep2 or rep3. Once
  636. // we find out which one of these three, it
  637. // is stored to rep0 and rep1, rep2 and rep3
  638. // are updated accordingly.
  639. rc_if_0(coder->is_rep1[state], SEQ_IS_REP1) {
  640. rc_update_0(coder->is_rep1[state]);
  641. const uint32_t distance = rep1;
  642. rep1 = rep0;
  643. rep0 = distance;
  644. } else {
  645. rc_update_1(coder->is_rep1[state]);
  646. case SEQ_IS_REP2:
  647. rc_if_0(coder->is_rep2[state],
  648. SEQ_IS_REP2) {
  649. rc_update_0(coder->is_rep2[
  650. state]);
  651. const uint32_t distance = rep2;
  652. rep2 = rep1;
  653. rep1 = rep0;
  654. rep0 = distance;
  655. } else {
  656. rc_update_1(coder->is_rep2[
  657. state]);
  658. const uint32_t distance = rep3;
  659. rep3 = rep2;
  660. rep2 = rep1;
  661. rep1 = rep0;
  662. rep0 = distance;
  663. }
  664. }
  665. }
  666. update_long_rep(state);
  667. // Decode the length of the repeated match.
  668. len_decode(len, coder->rep_len_decoder,
  669. pos_state, SEQ_REP_LEN);
  670. }
  671. /////////////////////////////////
  672. // Repeat from history buffer. //
  673. /////////////////////////////////
  674. // The length is always between these limits. There is no way
  675. // to trigger the algorithm to set len outside this range.
  676. assert(len >= MATCH_LEN_MIN);
  677. assert(len <= MATCH_LEN_MAX);
  678. case SEQ_COPY:
  679. // Repeat len bytes from distance of rep0.
  680. if (unlikely(dict_repeat(&dict, rep0, &len))) {
  681. coder->sequence = SEQ_COPY;
  682. goto out;
  683. }
  684. }
  685. rc_normalize(SEQ_NORMALIZE);
  686. coder->sequence = SEQ_IS_MATCH;
  687. out:
  688. // Save state
  689. // NOTE: Must not copy dict.limit.
  690. dictptr->pos = dict.pos;
  691. dictptr->full = dict.full;
  692. rc_from_local(coder->rc, *in_pos);
  693. coder->state = state;
  694. coder->rep0 = rep0;
  695. coder->rep1 = rep1;
  696. coder->rep2 = rep2;
  697. coder->rep3 = rep3;
  698. coder->probs = probs;
  699. coder->symbol = symbol;
  700. coder->limit = limit;
  701. coder->offset = offset;
  702. coder->len = len;
  703. // Update the remaining amount of uncompressed data if uncompressed
  704. // size was known.
  705. if (coder->uncompressed_size != LZMA_VLI_UNKNOWN) {
  706. coder->uncompressed_size -= dict.pos - dict_start;
  707. // Since there cannot be end of payload marker if the
  708. // uncompressed size was known, we check here if we
  709. // finished decoding.
  710. if (coder->uncompressed_size == 0 && ret == LZMA_OK
  711. && coder->sequence != SEQ_NORMALIZE)
  712. ret = coder->sequence == SEQ_IS_MATCH
  713. ? LZMA_STREAM_END : LZMA_DATA_ERROR;
  714. }
  715. // We can do an additional check in the range decoder to catch some
  716. // corrupted files.
  717. if (ret == LZMA_STREAM_END) {
  718. if (!rc_is_finished(coder->rc))
  719. ret = LZMA_DATA_ERROR;
  720. // Reset the range decoder so that it is ready to reinitialize
  721. // for a new LZMA2 chunk.
  722. rc_reset(coder->rc);
  723. }
  724. return ret;
  725. }
  726. static void
  727. lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size)
  728. {
  729. lzma_lzma1_decoder *coder = coder_ptr;
  730. coder->uncompressed_size = uncompressed_size;
  731. }
  732. static void
  733. lzma_decoder_reset(void *coder_ptr, const void *opt)
  734. {
  735. lzma_lzma1_decoder *coder = coder_ptr;
  736. const lzma_options_lzma *options = opt;
  737. // NOTE: We assume that lc/lp/pb are valid since they were
  738. // successfully decoded with lzma_lzma_decode_properties().
  739. // Calculate pos_mask. We don't need pos_bits as is for anything.
  740. coder->pos_mask = (1U << options->pb) - 1;
  741. // Initialize the literal decoder.
  742. literal_init(coder->literal, options->lc, options->lp);
  743. coder->literal_context_bits = options->lc;
  744. coder->literal_pos_mask = (1U << options->lp) - 1;
  745. // State
  746. coder->state = STATE_LIT_LIT;
  747. coder->rep0 = 0;
  748. coder->rep1 = 0;
  749. coder->rep2 = 0;
  750. coder->rep3 = 0;
  751. coder->pos_mask = (1U << options->pb) - 1;
  752. // Range decoder
  753. rc_reset(coder->rc);
  754. // Bit and bittree decoders
  755. for (uint32_t i = 0; i < STATES; ++i) {
  756. for (uint32_t j = 0; j <= coder->pos_mask; ++j) {
  757. bit_reset(coder->is_match[i][j]);
  758. bit_reset(coder->is_rep0_long[i][j]);
  759. }
  760. bit_reset(coder->is_rep[i]);
  761. bit_reset(coder->is_rep0[i]);
  762. bit_reset(coder->is_rep1[i]);
  763. bit_reset(coder->is_rep2[i]);
  764. }
  765. for (uint32_t i = 0; i < DIST_STATES; ++i)
  766. bittree_reset(coder->dist_slot[i], DIST_SLOT_BITS);
  767. for (uint32_t i = 0; i < FULL_DISTANCES - DIST_MODEL_END; ++i)
  768. bit_reset(coder->pos_special[i]);
  769. bittree_reset(coder->pos_align, ALIGN_BITS);
  770. // Len decoders (also bit/bittree)
  771. const uint32_t num_pos_states = 1U << options->pb;
  772. bit_reset(coder->match_len_decoder.choice);
  773. bit_reset(coder->match_len_decoder.choice2);
  774. bit_reset(coder->rep_len_decoder.choice);
  775. bit_reset(coder->rep_len_decoder.choice2);
  776. for (uint32_t pos_state = 0; pos_state < num_pos_states; ++pos_state) {
  777. bittree_reset(coder->match_len_decoder.low[pos_state],
  778. LEN_LOW_BITS);
  779. bittree_reset(coder->match_len_decoder.mid[pos_state],
  780. LEN_MID_BITS);
  781. bittree_reset(coder->rep_len_decoder.low[pos_state],
  782. LEN_LOW_BITS);
  783. bittree_reset(coder->rep_len_decoder.mid[pos_state],
  784. LEN_MID_BITS);
  785. }
  786. bittree_reset(coder->match_len_decoder.high, LEN_HIGH_BITS);
  787. bittree_reset(coder->rep_len_decoder.high, LEN_HIGH_BITS);
  788. coder->sequence = SEQ_IS_MATCH;
  789. coder->probs = NULL;
  790. coder->symbol = 0;
  791. coder->limit = 0;
  792. coder->offset = 0;
  793. coder->len = 0;
  794. return;
  795. }
  796. extern lzma_ret
  797. lzma_lzma_decoder_create(lzma_lz_decoder *lz, const lzma_allocator *allocator,
  798. const void *opt, lzma_lz_options *lz_options)
  799. {
  800. if (lz->coder == NULL) {
  801. lz->coder = lzma_alloc(sizeof(lzma_lzma1_decoder), allocator);
  802. if (lz->coder == NULL)
  803. return LZMA_MEM_ERROR;
  804. lz->code = &lzma_decode;
  805. lz->reset = &lzma_decoder_reset;
  806. lz->set_uncompressed = &lzma_decoder_uncompressed;
  807. }
  808. // All dictionary sizes are OK here. LZ decoder will take care of
  809. // the special cases.
  810. const lzma_options_lzma *options = opt;
  811. lz_options->dict_size = options->dict_size;
  812. lz_options->preset_dict = options->preset_dict;
  813. lz_options->preset_dict_size = options->preset_dict_size;
  814. return LZMA_OK;
  815. }
  816. /// Allocate and initialize LZMA decoder. This is used only via LZ
  817. /// initialization (lzma_lzma_decoder_init() passes function pointer to
  818. /// the LZ initialization).
  819. static lzma_ret
  820. lzma_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator,
  821. const void *options, lzma_lz_options *lz_options)
  822. {
  823. if (!is_lclppb_valid(options))
  824. return LZMA_PROG_ERROR;
  825. return_if_error(lzma_lzma_decoder_create(
  826. lz, allocator, options, lz_options));
  827. lzma_decoder_reset(lz->coder, options);
  828. lzma_decoder_uncompressed(lz->coder, LZMA_VLI_UNKNOWN);
  829. return LZMA_OK;
  830. }
  831. extern lzma_ret
  832. lzma_lzma_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
  833. const lzma_filter_info *filters)
  834. {
  835. // LZMA can only be the last filter in the chain. This is enforced
  836. // by the raw_decoder initialization.
  837. assert(filters[1].init == NULL);
  838. return lzma_lz_decoder_init(next, allocator, filters,
  839. &lzma_decoder_init);
  840. }
  841. extern bool
  842. lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte)
  843. {
  844. if (byte > (4 * 5 + 4) * 9 + 8)
  845. return true;
  846. // See the file format specification to understand this.
  847. options->pb = byte / (9 * 5);
  848. byte -= options->pb * 9 * 5;
  849. options->lp = byte / 9;
  850. options->lc = byte - options->lp * 9;
  851. return options->lc + options->lp > LZMA_LCLP_MAX;
  852. }
  853. extern uint64_t
  854. lzma_lzma_decoder_memusage_nocheck(const void *options)
  855. {
  856. const lzma_options_lzma *const opt = options;
  857. return sizeof(lzma_lzma1_decoder)
  858. + lzma_lz_decoder_memusage(opt->dict_size);
  859. }
  860. extern uint64_t
  861. lzma_lzma_decoder_memusage(const void *options)
  862. {
  863. if (!is_lclppb_valid(options))
  864. return UINT64_MAX;
  865. return lzma_lzma_decoder_memusage_nocheck(options);
  866. }
  867. extern lzma_ret
  868. lzma_lzma_props_decode(void **options, const lzma_allocator *allocator,
  869. const uint8_t *props, size_t props_size)
  870. {
  871. if (props_size != 5)
  872. return LZMA_OPTIONS_ERROR;
  873. lzma_options_lzma *opt
  874. = lzma_alloc(sizeof(lzma_options_lzma), allocator);
  875. if (opt == NULL)
  876. return LZMA_MEM_ERROR;
  877. if (lzma_lzma_lclppb_decode(opt, props[0]))
  878. goto error;
  879. // All dictionary sizes are accepted, including zero. LZ decoder
  880. // will automatically use a dictionary at least a few KiB even if
  881. // a smaller dictionary is requested.
  882. opt->dict_size = read32le(props + 1);
  883. opt->preset_dict = NULL;
  884. opt->preset_dict_size = 0;
  885. *options = opt;
  886. return LZMA_OK;
  887. error:
  888. lzma_free(opt, allocator);
  889. return LZMA_OPTIONS_ERROR;
  890. }