archive_read_support_compression_xz.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644
  1. /*-
  2. * Copyright (c) 2009 Michihiro NAKAJIMA
  3. * Copyright (c) 2003-2008 Tim Kientzle and Miklos Vajna
  4. * All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. * 1. Redistributions of source code must retain the above copyright
  10. * notice, this list of conditions and the following disclaimer.
  11. * 2. Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in the
  13. * documentation and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  16. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  17. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  18. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  19. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  20. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  21. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  22. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  24. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include "archive_platform.h"
  27. __FBSDID("$FreeBSD$");
  28. #ifdef HAVE_ERRNO_H
  29. #include <errno.h>
  30. #endif
  31. #include <stdio.h>
  32. #ifdef HAVE_STDLIB_H
  33. #include <stdlib.h>
  34. #endif
  35. #ifdef HAVE_STRING_H
  36. #include <string.h>
  37. #endif
  38. #ifdef HAVE_UNISTD_H
  39. #include <unistd.h>
  40. #endif
  41. #if HAVE_LZMA_H
  42. #include <lzma.h>
  43. #elif HAVE_LZMADEC_H
  44. #include <lzmadec.h>
  45. #endif
  46. #include "archive.h"
  47. #include "archive_private.h"
  48. #include "archive_read_private.h"
  49. #if HAVE_LZMA_H && HAVE_LIBLZMA
  50. struct private_data {
  51. lzma_stream stream;
  52. unsigned char *out_block;
  53. size_t out_block_size;
  54. int64_t total_out;
  55. char eof; /* True = found end of compressed data. */
  56. };
  57. /* Combined lzma/xz filter */
  58. static ssize_t xz_filter_read(struct archive_read_filter *, const void **);
  59. static int xz_filter_close(struct archive_read_filter *);
  60. static int xz_lzma_bidder_init(struct archive_read_filter *);
  61. #elif HAVE_LZMADEC_H && HAVE_LIBLZMADEC
  62. struct private_data {
  63. lzmadec_stream stream;
  64. unsigned char *out_block;
  65. size_t out_block_size;
  66. int64_t total_out;
  67. char eof; /* True = found end of compressed data. */
  68. };
  69. /* Lzma-only filter */
  70. static ssize_t lzma_filter_read(struct archive_read_filter *, const void **);
  71. static int lzma_filter_close(struct archive_read_filter *);
  72. #endif
  73. /*
  74. * Note that we can detect xz and lzma compressed files even if we
  75. * can't decompress them. (In fact, we like detecting them because we
  76. * can give better error messages.) So the bid framework here gets
  77. * compiled even if no lzma library is available.
  78. */
  79. static int xz_bidder_bid(struct archive_read_filter_bidder *,
  80. struct archive_read_filter *);
  81. static int xz_bidder_init(struct archive_read_filter *);
  82. static int lzma_bidder_bid(struct archive_read_filter_bidder *,
  83. struct archive_read_filter *);
  84. static int lzma_bidder_init(struct archive_read_filter *);
  85. int
  86. archive_read_support_compression_xz(struct archive *_a)
  87. {
  88. struct archive_read *a = (struct archive_read *)_a;
  89. struct archive_read_filter_bidder *bidder = __archive_read_get_bidder(a);
  90. archive_clear_error(_a);
  91. if (bidder == NULL)
  92. return (ARCHIVE_FATAL);
  93. bidder->data = NULL;
  94. bidder->bid = xz_bidder_bid;
  95. bidder->init = xz_bidder_init;
  96. bidder->options = NULL;
  97. bidder->free = NULL;
  98. #if HAVE_LZMA_H && HAVE_LIBLZMA
  99. return (ARCHIVE_OK);
  100. #else
  101. archive_set_error(_a, ARCHIVE_ERRNO_MISC,
  102. "Using external unxz program for xz decompression");
  103. return (ARCHIVE_WARN);
  104. #endif
  105. }
  106. int
  107. archive_read_support_compression_lzma(struct archive *_a)
  108. {
  109. struct archive_read *a = (struct archive_read *)_a;
  110. struct archive_read_filter_bidder *bidder = __archive_read_get_bidder(a);
  111. archive_clear_error(_a);
  112. if (bidder == NULL)
  113. return (ARCHIVE_FATAL);
  114. bidder->data = NULL;
  115. bidder->bid = lzma_bidder_bid;
  116. bidder->init = lzma_bidder_init;
  117. bidder->options = NULL;
  118. bidder->free = NULL;
  119. #if HAVE_LZMA_H && HAVE_LIBLZMA
  120. return (ARCHIVE_OK);
  121. #elif HAVE_LZMADEC_H && HAVE_LIBLZMADEC
  122. return (ARCHIVE_OK);
  123. #else
  124. archive_set_error(_a, ARCHIVE_ERRNO_MISC,
  125. "Using external unlzma program for lzma decompression");
  126. return (ARCHIVE_WARN);
  127. #endif
  128. }
  129. /*
  130. * Test whether we can handle this data.
  131. */
  132. static int
  133. xz_bidder_bid(struct archive_read_filter_bidder *self,
  134. struct archive_read_filter *filter)
  135. {
  136. const unsigned char *buffer;
  137. ssize_t avail;
  138. int bits_checked;
  139. (void)self; /* UNUSED */
  140. buffer = __archive_read_filter_ahead(filter, 6, &avail);
  141. if (buffer == NULL)
  142. return (0);
  143. /*
  144. * Verify Header Magic Bytes : FD 37 7A 58 5A 00
  145. */
  146. bits_checked = 0;
  147. if (buffer[0] != 0xFD)
  148. return (0);
  149. bits_checked += 8;
  150. if (buffer[1] != 0x37)
  151. return (0);
  152. bits_checked += 8;
  153. if (buffer[2] != 0x7A)
  154. return (0);
  155. bits_checked += 8;
  156. if (buffer[3] != 0x58)
  157. return (0);
  158. bits_checked += 8;
  159. if (buffer[4] != 0x5A)
  160. return (0);
  161. bits_checked += 8;
  162. if (buffer[5] != 0x00)
  163. return (0);
  164. bits_checked += 8;
  165. return (bits_checked);
  166. }
  167. /*
  168. * Test whether we can handle this data.
  169. *
  170. * <sigh> LZMA has a rather poor file signature. Zeros do not
  171. * make good signature bytes as a rule, and the only non-zero byte
  172. * here is an ASCII character. For example, an uncompressed tar
  173. * archive whose first file is ']' would satisfy this check. It may
  174. * be necessary to exclude LZMA from compression_all() because of
  175. * this. Clients of libarchive would then have to explicitly enable
  176. * LZMA checking instead of (or in addition to) compression_all() when
  177. * they have other evidence (file name, command-line option) to go on.
  178. */
  179. static int
  180. lzma_bidder_bid(struct archive_read_filter_bidder *self,
  181. struct archive_read_filter *filter)
  182. {
  183. const unsigned char *buffer;
  184. ssize_t avail;
  185. int bits_checked;
  186. (void)self; /* UNUSED */
  187. buffer = __archive_read_filter_ahead(filter, 6, &avail);
  188. if (buffer == NULL)
  189. return (0);
  190. /* First byte of raw LZMA stream is always 0x5d. */
  191. bits_checked = 0;
  192. if (buffer[0] != 0x5d)
  193. return (0);
  194. bits_checked += 8;
  195. /* Second through fifth bytes are dictionary code, stored in
  196. * little-endian order. The two least-significant bytes are
  197. * always zero. */
  198. if (buffer[1] != 0 || buffer[2] != 0)
  199. return (0);
  200. bits_checked += 16;
  201. /* ??? TODO: Fix this. ??? */
  202. /* NSIS format check uses this, but I've seen tar.lzma
  203. * archives where this byte is 0xff, not 0. Can it
  204. * ever be anything other than 0 or 0xff?
  205. */
  206. #if 0
  207. if (buffer[5] != 0)
  208. return (0);
  209. bits_checked += 8;
  210. #endif
  211. /* TODO: The above test is still very weak. It would be
  212. * good to do better. */
  213. return (bits_checked);
  214. }
  215. #if HAVE_LZMA_H && HAVE_LIBLZMA
  216. /*
  217. * liblzma 4.999.7 and later support both lzma and xz streams.
  218. */
  219. static int
  220. xz_bidder_init(struct archive_read_filter *self)
  221. {
  222. self->code = ARCHIVE_COMPRESSION_XZ;
  223. self->name = "xz";
  224. return (xz_lzma_bidder_init(self));
  225. }
  226. static int
  227. lzma_bidder_init(struct archive_read_filter *self)
  228. {
  229. self->code = ARCHIVE_COMPRESSION_LZMA;
  230. self->name = "lzma";
  231. return (xz_lzma_bidder_init(self));
  232. }
  233. /*
  234. * Setup the callbacks.
  235. */
  236. static int
  237. xz_lzma_bidder_init(struct archive_read_filter *self)
  238. {
  239. static const size_t out_block_size = 64 * 1024;
  240. void *out_block;
  241. struct private_data *state;
  242. int ret;
  243. state = (struct private_data *)calloc(sizeof(*state), 1);
  244. out_block = (unsigned char *)malloc(out_block_size);
  245. if (state == NULL || out_block == NULL) {
  246. archive_set_error(&self->archive->archive, ENOMEM,
  247. "Can't allocate data for xz decompression");
  248. free(out_block);
  249. free(state);
  250. return (ARCHIVE_FATAL);
  251. }
  252. self->data = state;
  253. state->out_block_size = out_block_size;
  254. state->out_block = out_block;
  255. self->read = xz_filter_read;
  256. self->skip = NULL; /* not supported */
  257. self->close = xz_filter_close;
  258. state->stream.avail_in = 0;
  259. state->stream.next_out = state->out_block;
  260. state->stream.avail_out = state->out_block_size;
  261. /* Initialize compression library.
  262. * TODO: I don't know what value is best for memlimit.
  263. * maybe, it needs to check memory size which
  264. * running system has.
  265. */
  266. if (self->code == ARCHIVE_COMPRESSION_XZ)
  267. ret = lzma_stream_decoder(&(state->stream),
  268. (1U << 30),/* memlimit */
  269. LZMA_CONCATENATED);
  270. else
  271. ret = lzma_alone_decoder(&(state->stream),
  272. (1U << 30));/* memlimit */
  273. if (ret == LZMA_OK)
  274. return (ARCHIVE_OK);
  275. /* Library setup failed: Choose an error message and clean up. */
  276. switch (ret) {
  277. case LZMA_MEM_ERROR:
  278. archive_set_error(&self->archive->archive, ENOMEM,
  279. "Internal error initializing compression library: "
  280. "Cannot allocate memory");
  281. break;
  282. case LZMA_OPTIONS_ERROR:
  283. archive_set_error(&self->archive->archive,
  284. ARCHIVE_ERRNO_MISC,
  285. "Internal error initializing compression library: "
  286. "Invalid or unsupported options");
  287. break;
  288. default:
  289. archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
  290. "Internal error initializing lzma library");
  291. break;
  292. }
  293. free(state->out_block);
  294. free(state);
  295. self->data = NULL;
  296. return (ARCHIVE_FATAL);
  297. }
  298. /*
  299. * Return the next block of decompressed data.
  300. */
  301. static ssize_t
  302. xz_filter_read(struct archive_read_filter *self, const void **p)
  303. {
  304. struct private_data *state;
  305. size_t decompressed;
  306. ssize_t avail_in;
  307. int ret;
  308. state = (struct private_data *)self->data;
  309. /* Empty our output buffer. */
  310. state->stream.next_out = state->out_block;
  311. state->stream.avail_out = state->out_block_size;
  312. /* Try to fill the output buffer. */
  313. while (state->stream.avail_out > 0 && !state->eof) {
  314. state->stream.next_in =
  315. __archive_read_filter_ahead(self->upstream, 1, &avail_in);
  316. if (state->stream.next_in == NULL && avail_in < 0)
  317. return (ARCHIVE_FATAL);
  318. state->stream.avail_in = avail_in;
  319. /* Decompress as much as we can in one pass. */
  320. ret = lzma_code(&(state->stream),
  321. (state->stream.avail_in == 0)? LZMA_FINISH: LZMA_RUN);
  322. switch (ret) {
  323. case LZMA_STREAM_END: /* Found end of stream. */
  324. state->eof = 1;
  325. /* FALL THROUGH */
  326. case LZMA_OK: /* Decompressor made some progress. */
  327. __archive_read_filter_consume(self->upstream,
  328. avail_in - state->stream.avail_in);
  329. break;
  330. case LZMA_MEM_ERROR:
  331. archive_set_error(&self->archive->archive, ENOMEM,
  332. "Lzma library error: Cannot allocate memory");
  333. return (ARCHIVE_FATAL);
  334. case LZMA_MEMLIMIT_ERROR:
  335. archive_set_error(&self->archive->archive, ENOMEM,
  336. "Lzma library error: Out of memory");
  337. return (ARCHIVE_FATAL);
  338. case LZMA_FORMAT_ERROR:
  339. archive_set_error(&self->archive->archive,
  340. ARCHIVE_ERRNO_MISC,
  341. "Lzma library error: format not recognized");
  342. return (ARCHIVE_FATAL);
  343. case LZMA_OPTIONS_ERROR:
  344. archive_set_error(&self->archive->archive,
  345. ARCHIVE_ERRNO_MISC,
  346. "Lzma library error: Invalid options");
  347. return (ARCHIVE_FATAL);
  348. case LZMA_DATA_ERROR:
  349. archive_set_error(&self->archive->archive,
  350. ARCHIVE_ERRNO_MISC,
  351. "Lzma library error: Corrupted input data");
  352. return (ARCHIVE_FATAL);
  353. case LZMA_BUF_ERROR:
  354. archive_set_error(&self->archive->archive,
  355. ARCHIVE_ERRNO_MISC,
  356. "Lzma library error: No progress is possible");
  357. return (ARCHIVE_FATAL);
  358. default:
  359. /* Return an error. */
  360. archive_set_error(&self->archive->archive,
  361. ARCHIVE_ERRNO_MISC,
  362. "Lzma decompression failed: Unknown error");
  363. return (ARCHIVE_FATAL);
  364. }
  365. }
  366. decompressed = state->stream.next_out - state->out_block;
  367. state->total_out += decompressed;
  368. if (decompressed == 0)
  369. *p = NULL;
  370. else
  371. *p = state->out_block;
  372. return (decompressed);
  373. }
  374. /*
  375. * Clean up the decompressor.
  376. */
  377. static int
  378. xz_filter_close(struct archive_read_filter *self)
  379. {
  380. struct private_data *state;
  381. state = (struct private_data *)self->data;
  382. lzma_end(&(state->stream));
  383. free(state->out_block);
  384. free(state);
  385. return (ARCHIVE_OK);
  386. }
  387. #else
  388. #if HAVE_LZMADEC_H && HAVE_LIBLZMADEC
  389. /*
  390. * If we have the older liblzmadec library, then we can handle
  391. * LZMA streams but not XZ streams.
  392. */
  393. /*
  394. * Setup the callbacks.
  395. */
  396. static int
  397. lzma_bidder_init(struct archive_read_filter *self)
  398. {
  399. static const size_t out_block_size = 64 * 1024;
  400. void *out_block;
  401. struct private_data *state;
  402. ssize_t ret, avail_in;
  403. self->code = ARCHIVE_COMPRESSION_LZMA;
  404. self->name = "lzma";
  405. state = (struct private_data *)calloc(sizeof(*state), 1);
  406. out_block = (unsigned char *)malloc(out_block_size);
  407. if (state == NULL || out_block == NULL) {
  408. archive_set_error(&self->archive->archive, ENOMEM,
  409. "Can't allocate data for lzma decompression");
  410. free(out_block);
  411. free(state);
  412. return (ARCHIVE_FATAL);
  413. }
  414. self->data = state;
  415. state->out_block_size = out_block_size;
  416. state->out_block = out_block;
  417. self->read = lzma_filter_read;
  418. self->skip = NULL; /* not supported */
  419. self->close = lzma_filter_close;
  420. /* Prime the lzma library with 18 bytes of input. */
  421. state->stream.next_in = (unsigned char *)(uintptr_t)
  422. __archive_read_filter_ahead(self->upstream, 18, &avail_in);
  423. if (state->stream.next_in == NULL)
  424. return (ARCHIVE_FATAL);
  425. state->stream.avail_in = avail_in;
  426. state->stream.next_out = state->out_block;
  427. state->stream.avail_out = state->out_block_size;
  428. /* Initialize compression library. */
  429. ret = lzmadec_init(&(state->stream));
  430. __archive_read_filter_consume(self->upstream,
  431. avail_in - state->stream.avail_in);
  432. if (ret == LZMADEC_OK)
  433. return (ARCHIVE_OK);
  434. /* Library setup failed: Clean up. */
  435. archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
  436. "Internal error initializing lzma library");
  437. /* Override the error message if we know what really went wrong. */
  438. switch (ret) {
  439. case LZMADEC_HEADER_ERROR:
  440. archive_set_error(&self->archive->archive,
  441. ARCHIVE_ERRNO_MISC,
  442. "Internal error initializing compression library: "
  443. "invalid header");
  444. break;
  445. case LZMADEC_MEM_ERROR:
  446. archive_set_error(&self->archive->archive, ENOMEM,
  447. "Internal error initializing compression library: "
  448. "out of memory");
  449. break;
  450. }
  451. free(state->out_block);
  452. free(state);
  453. self->data = NULL;
  454. return (ARCHIVE_FATAL);
  455. }
  456. /*
  457. * Return the next block of decompressed data.
  458. */
  459. static ssize_t
  460. lzma_filter_read(struct archive_read_filter *self, const void **p)
  461. {
  462. struct private_data *state;
  463. size_t decompressed;
  464. ssize_t avail_in, ret;
  465. state = (struct private_data *)self->data;
  466. /* Empty our output buffer. */
  467. state->stream.next_out = state->out_block;
  468. state->stream.avail_out = state->out_block_size;
  469. /* Try to fill the output buffer. */
  470. while (state->stream.avail_out > 0 && !state->eof) {
  471. state->stream.next_in = (unsigned char *)(uintptr_t)
  472. __archive_read_filter_ahead(self->upstream, 1, &avail_in);
  473. if (state->stream.next_in == NULL && avail_in < 0)
  474. return (ARCHIVE_FATAL);
  475. state->stream.avail_in = avail_in;
  476. /* Decompress as much as we can in one pass. */
  477. ret = lzmadec_decode(&(state->stream), avail_in == 0);
  478. switch (ret) {
  479. case LZMADEC_STREAM_END: /* Found end of stream. */
  480. state->eof = 1;
  481. /* FALL THROUGH */
  482. case LZMADEC_OK: /* Decompressor made some progress. */
  483. __archive_read_filter_consume(self->upstream,
  484. avail_in - state->stream.avail_in);
  485. break;
  486. case LZMADEC_BUF_ERROR: /* Insufficient input data? */
  487. archive_set_error(&self->archive->archive,
  488. ARCHIVE_ERRNO_MISC,
  489. "Insufficient compressed data");
  490. return (ARCHIVE_FATAL);
  491. default:
  492. /* Return an error. */
  493. archive_set_error(&self->archive->archive,
  494. ARCHIVE_ERRNO_MISC,
  495. "Lzma decompression failed");
  496. return (ARCHIVE_FATAL);
  497. }
  498. }
  499. decompressed = state->stream.next_out - state->out_block;
  500. state->total_out += decompressed;
  501. if (decompressed == 0)
  502. *p = NULL;
  503. else
  504. *p = state->out_block;
  505. return (decompressed);
  506. }
  507. /*
  508. * Clean up the decompressor.
  509. */
  510. static int
  511. lzma_filter_close(struct archive_read_filter *self)
  512. {
  513. struct private_data *state;
  514. int ret;
  515. state = (struct private_data *)self->data;
  516. ret = ARCHIVE_OK;
  517. switch (lzmadec_end(&(state->stream))) {
  518. case LZMADEC_OK:
  519. break;
  520. default:
  521. archive_set_error(&(self->archive->archive),
  522. ARCHIVE_ERRNO_MISC,
  523. "Failed to clean up %s compressor",
  524. self->archive->archive.compression_name);
  525. ret = ARCHIVE_FATAL;
  526. }
  527. free(state->out_block);
  528. free(state);
  529. return (ret);
  530. }
  531. #else
  532. /*
  533. *
  534. * If we have no suitable library on this system, we can't actually do
  535. * the decompression. We can, however, still detect compressed
  536. * archives and emit a useful message.
  537. *
  538. */
  539. static int
  540. lzma_bidder_init(struct archive_read_filter *self)
  541. {
  542. int r;
  543. r = __archive_read_program(self, "unlzma");
  544. /* Note: We set the format here even if __archive_read_program()
  545. * above fails. We do, after all, know what the format is
  546. * even if we weren't able to read it. */
  547. self->code = ARCHIVE_COMPRESSION_LZMA;
  548. self->name = "lzma";
  549. return (r);
  550. }
  551. #endif /* HAVE_LZMADEC_H */
  552. static int
  553. xz_bidder_init(struct archive_read_filter *self)
  554. {
  555. int r;
  556. r = __archive_read_program(self, "unxz");
  557. /* Note: We set the format here even if __archive_read_program()
  558. * above fails. We do, after all, know what the format is
  559. * even if we weren't able to read it. */
  560. self->code = ARCHIVE_COMPRESSION_XZ;
  561. self->name = "xz";
  562. return (r);
  563. }
  564. #endif /* HAVE_LZMA_H */