archive_write_set_compression_compress.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. /*-
  2. * Copyright (c) 2008 Joerg Sonnenberger
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  15. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  16. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  17. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  18. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  19. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  20. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  21. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  22. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  23. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24. */
  25. /*-
  26. * Copyright (c) 1985, 1986, 1992, 1993
  27. * The Regents of the University of California. All rights reserved.
  28. *
  29. * This code is derived from software contributed to Berkeley by
  30. * Diomidis Spinellis and James A. Woods, derived from original
  31. * work by Spencer Thomas and Joseph Orost.
  32. *
  33. * Redistribution and use in source and binary forms, with or without
  34. * modification, are permitted provided that the following conditions
  35. * are met:
  36. * 1. Redistributions of source code must retain the above copyright
  37. * notice, this list of conditions and the following disclaimer.
  38. * 2. Redistributions in binary form must reproduce the above copyright
  39. * notice, this list of conditions and the following disclaimer in the
  40. * documentation and/or other materials provided with the distribution.
  41. * 3. Neither the name of the University nor the names of its contributors
  42. * may be used to endorse or promote products derived from this software
  43. * without specific prior written permission.
  44. *
  45. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  46. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  47. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  48. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  49. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  50. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  51. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  52. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  53. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  54. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  55. * SUCH DAMAGE.
  56. */
  57. #include "archive_platform.h"
  58. __FBSDID("$FreeBSD: src/lib/libarchive/archive_write_set_compression_compress.c,v 1.1 2008/03/14 20:35:37 kientzle Exp $");
  59. #ifdef HAVE_ERRNO_H
  60. #include <errno.h>
  61. #endif
  62. #ifdef HAVE_STDLIB_H
  63. #include <stdlib.h>
  64. #endif
  65. #ifdef HAVE_STRING_H
  66. #include <string.h>
  67. #endif
  68. #include "archive.h"
  69. #include "archive_private.h"
  70. #include "archive_write_private.h"
  71. #define HSIZE 69001 /* 95% occupancy */
  72. #define HSHIFT 8 /* 8 - trunc(log2(HSIZE / 65536)) */
  73. #define CHECK_GAP 10000 /* Ratio check interval. */
  74. #define MAXCODE(bits) ((1 << (bits)) - 1)
  75. /*
  76. * the next two codes should not be changed lightly, as they must not
  77. * lie within the contiguous general code space.
  78. */
  79. #define FIRST 257 /* First free entry. */
  80. #define CLEAR 256 /* Table clear output code. */
  81. struct private_data {
  82. off_t in_count, out_count, checkpoint;
  83. int code_len; /* Number of bits/code. */
  84. int cur_maxcode; /* Maximum code, given n_bits. */
  85. int max_maxcode; /* Should NEVER generate this code. */
  86. int hashtab [HSIZE];
  87. unsigned short codetab [HSIZE];
  88. int first_free; /* First unused entry. */
  89. int compress_ratio;
  90. int cur_code, cur_fcode;
  91. int bit_offset;
  92. unsigned char bit_buf;
  93. unsigned char *compressed;
  94. size_t compressed_buffer_size;
  95. size_t compressed_offset;
  96. };
  97. static int archive_compressor_compress_finish(struct archive_write *);
  98. static int archive_compressor_compress_init(struct archive_write *);
  99. static int archive_compressor_compress_write(struct archive_write *,
  100. const void *, size_t);
  101. /*
  102. * Allocate, initialize and return a archive object.
  103. */
  104. int
  105. archive_write_set_compression_compress(struct archive *_a)
  106. {
  107. struct archive_write *a = (struct archive_write *)_a;
  108. __archive_check_magic(&a->archive, ARCHIVE_WRITE_MAGIC,
  109. ARCHIVE_STATE_NEW, "archive_write_set_compression_compress");
  110. a->compressor.init = &archive_compressor_compress_init;
  111. a->archive.compression_code = ARCHIVE_COMPRESSION_COMPRESS;
  112. a->archive.compression_name = "compress";
  113. return (ARCHIVE_OK);
  114. }
  115. /*
  116. * Setup callback.
  117. */
  118. static int
  119. archive_compressor_compress_init(struct archive_write *a)
  120. {
  121. int ret;
  122. struct private_data *state;
  123. a->archive.compression_code = ARCHIVE_COMPRESSION_COMPRESS;
  124. a->archive.compression_name = "compress";
  125. if (a->bytes_per_block < 4) {
  126. archive_set_error(&a->archive, EINVAL,
  127. "Can't write Compress header as single block");
  128. return (ARCHIVE_FATAL);
  129. }
  130. if (a->client_opener != NULL) {
  131. ret = (a->client_opener)(&a->archive, a->client_data);
  132. if (ret != ARCHIVE_OK)
  133. return (ret);
  134. }
  135. state = (struct private_data *)malloc(sizeof(*state));
  136. if (state == NULL) {
  137. archive_set_error(&a->archive, ENOMEM,
  138. "Can't allocate data for compression");
  139. return (ARCHIVE_FATAL);
  140. }
  141. memset(state, 0, sizeof(*state));
  142. state->compressed_buffer_size = a->bytes_per_block;
  143. state->compressed = malloc(state->compressed_buffer_size);
  144. if (state->compressed == NULL) {
  145. archive_set_error(&a->archive, ENOMEM,
  146. "Can't allocate data for compression buffer");
  147. free(state);
  148. return (ARCHIVE_FATAL);
  149. }
  150. a->compressor.write = archive_compressor_compress_write;
  151. a->compressor.finish = archive_compressor_compress_finish;
  152. state->max_maxcode = 0x10000; /* Should NEVER generate this code. */
  153. state->in_count = 0; /* Length of input. */
  154. state->bit_buf = 0;
  155. state->bit_offset = 0;
  156. state->out_count = 3; /* Includes 3-byte header mojo. */
  157. state->compress_ratio = 0;
  158. state->checkpoint = CHECK_GAP;
  159. state->code_len = 9;
  160. state->cur_maxcode = MAXCODE(state->code_len);
  161. state->first_free = FIRST;
  162. memset(state->hashtab, 0xff, sizeof(state->hashtab));
  163. /* Prime output buffer with a gzip header. */
  164. state->compressed[0] = 0x1f; /* Compress */
  165. state->compressed[1] = 0x9d;
  166. state->compressed[2] = 0x90; /* Block mode, 16bit max */
  167. state->compressed_offset = 3;
  168. a->compressor.data = state;
  169. return (0);
  170. }
  171. /*-
  172. * Output the given code.
  173. * Inputs:
  174. * code: A n_bits-bit integer. If == -1, then EOF. This assumes
  175. * that n_bits =< (long)wordsize - 1.
  176. * Outputs:
  177. * Outputs code to the file.
  178. * Assumptions:
  179. * Chars are 8 bits long.
  180. * Algorithm:
  181. * Maintain a BITS character long buffer (so that 8 codes will
  182. * fit in it exactly). Use the VAX insv instruction to insert each
  183. * code in turn. When the buffer fills up empty it and start over.
  184. */
  185. static unsigned char rmask[9] =
  186. {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
  187. static int
  188. output_byte(struct archive_write *a, unsigned char c)
  189. {
  190. struct private_data *state = a->compressor.data;
  191. ssize_t bytes_written;
  192. state->compressed[state->compressed_offset++] = c;
  193. ++state->out_count;
  194. if (state->compressed_buffer_size == state->compressed_offset) {
  195. bytes_written = (a->client_writer)(&a->archive,
  196. a->client_data,
  197. state->compressed, state->compressed_buffer_size);
  198. if (bytes_written <= 0)
  199. return ARCHIVE_FATAL;
  200. a->archive.raw_position += bytes_written;
  201. state->compressed_offset = 0;
  202. }
  203. return ARCHIVE_OK;
  204. }
  205. static int
  206. output_code(struct archive_write *a, int ocode)
  207. {
  208. struct private_data *state = a->compressor.data;
  209. int bits, ret, clear_flg, bit_offset;
  210. clear_flg = ocode == CLEAR;
  211. bits = state->code_len;
  212. /*
  213. * Since ocode is always >= 8 bits, only need to mask the first
  214. * hunk on the left.
  215. */
  216. bit_offset = state->bit_offset % 8;
  217. state->bit_buf |= (ocode << bit_offset) & 0xff;
  218. output_byte(a, state->bit_buf);
  219. bits = state->code_len - (8 - bit_offset);
  220. ocode >>= 8 - bit_offset;
  221. /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
  222. if (bits >= 8) {
  223. output_byte(a, ocode & 0xff);
  224. ocode >>= 8;
  225. bits -= 8;
  226. }
  227. /* Last bits. */
  228. state->bit_offset += state->code_len;
  229. state->bit_buf = ocode & rmask[bits];
  230. if (state->bit_offset == state->code_len * 8)
  231. state->bit_offset = 0;
  232. /*
  233. * If the next entry is going to be too big for the ocode size,
  234. * then increase it, if possible.
  235. */
  236. if (clear_flg || state->first_free > state->cur_maxcode) {
  237. /*
  238. * Write the whole buffer, because the input side won't
  239. * discover the size increase until after it has read it.
  240. */
  241. if (state->bit_offset > 0) {
  242. while (state->bit_offset < state->code_len * 8) {
  243. ret = output_byte(a, state->bit_buf);
  244. if (ret != ARCHIVE_OK)
  245. return ret;
  246. state->bit_offset += 8;
  247. state->bit_buf = 0;
  248. }
  249. }
  250. state->bit_buf = 0;
  251. state->bit_offset = 0;
  252. if (clear_flg) {
  253. state->code_len = 9;
  254. state->cur_maxcode = MAXCODE(state->code_len);
  255. } else {
  256. state->code_len++;
  257. if (state->code_len == 16)
  258. state->cur_maxcode = state->max_maxcode;
  259. else
  260. state->cur_maxcode = MAXCODE(state->code_len);
  261. }
  262. }
  263. return (ARCHIVE_OK);
  264. }
  265. static int
  266. output_flush(struct archive_write *a)
  267. {
  268. struct private_data *state = a->compressor.data;
  269. int ret;
  270. /* At EOF, write the rest of the buffer. */
  271. if (state->bit_offset % 8) {
  272. state->code_len = (state->bit_offset % 8 + 7) / 8;
  273. ret = output_byte(a, state->bit_buf);
  274. if (ret != ARCHIVE_OK)
  275. return ret;
  276. }
  277. return (ARCHIVE_OK);
  278. }
  279. /*
  280. * Write data to the compressed stream.
  281. */
  282. static int
  283. archive_compressor_compress_write(struct archive_write *a, const void *buff,
  284. size_t length)
  285. {
  286. struct private_data *state;
  287. int i;
  288. int ratio;
  289. int c, disp, ret;
  290. const unsigned char *bp;
  291. state = (struct private_data *)a->compressor.data;
  292. if (a->client_writer == NULL) {
  293. archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
  294. "No write callback is registered? "
  295. "This is probably an internal programming error.");
  296. return (ARCHIVE_FATAL);
  297. }
  298. if (length == 0)
  299. return ARCHIVE_OK;
  300. bp = buff;
  301. if (state->in_count == 0) {
  302. state->cur_code = *bp++;
  303. ++state->in_count;
  304. --length;
  305. }
  306. while (length--) {
  307. c = *bp++;
  308. state->in_count++;
  309. state->cur_fcode = (c << 16) + state->cur_code;
  310. i = ((c << HSHIFT) ^ state->cur_code); /* Xor hashing. */
  311. if (state->hashtab[i] == state->cur_fcode) {
  312. state->cur_code = state->codetab[i];
  313. continue;
  314. }
  315. if (state->hashtab[i] < 0) /* Empty slot. */
  316. goto nomatch;
  317. /* Secondary hash (after G. Knott). */
  318. if (i == 0)
  319. disp = 1;
  320. else
  321. disp = HSIZE - i;
  322. probe:
  323. if ((i -= disp) < 0)
  324. i += HSIZE;
  325. if (state->hashtab[i] == state->cur_fcode) {
  326. state->cur_code = state->codetab[i];
  327. continue;
  328. }
  329. if (state->hashtab[i] >= 0)
  330. goto probe;
  331. nomatch:
  332. ret = output_code(a, state->cur_code);
  333. if (ret != ARCHIVE_OK)
  334. return ret;
  335. state->cur_code = c;
  336. if (state->first_free < state->max_maxcode) {
  337. state->codetab[i] = state->first_free++; /* code -> hashtable */
  338. state->hashtab[i] = state->cur_fcode;
  339. continue;
  340. }
  341. if (state->in_count < state->checkpoint)
  342. continue;
  343. state->checkpoint = state->in_count + CHECK_GAP;
  344. if (state->in_count <= 0x007fffff)
  345. ratio = state->in_count * 256 / state->out_count;
  346. else if ((ratio = state->out_count / 256) == 0)
  347. ratio = 0x7fffffff;
  348. else
  349. ratio = state->in_count / ratio;
  350. if (ratio > state->compress_ratio)
  351. state->compress_ratio = ratio;
  352. else {
  353. state->compress_ratio = 0;
  354. memset(state->hashtab, 0xff, sizeof(state->hashtab));
  355. state->first_free = FIRST;
  356. ret = output_code(a, CLEAR);
  357. if (ret != ARCHIVE_OK)
  358. return ret;
  359. }
  360. }
  361. return (ARCHIVE_OK);
  362. }
  363. /*
  364. * Finish the compression...
  365. */
  366. static int
  367. archive_compressor_compress_finish(struct archive_write *a)
  368. {
  369. ssize_t block_length, target_block_length, bytes_written;
  370. int ret;
  371. struct private_data *state;
  372. unsigned tocopy;
  373. state = (struct private_data *)a->compressor.data;
  374. ret = 0;
  375. if (a->client_writer == NULL) {
  376. archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
  377. "No write callback is registered? "
  378. "This is probably an internal programming error.");
  379. ret = ARCHIVE_FATAL;
  380. goto cleanup;
  381. }
  382. /* By default, always pad the uncompressed data. */
  383. if (a->pad_uncompressed) {
  384. while (state->in_count % a->bytes_per_block != 0) {
  385. tocopy = a->bytes_per_block -
  386. (state->in_count % a->bytes_per_block);
  387. if (tocopy > a->null_length)
  388. tocopy = a->null_length;
  389. ret = archive_compressor_compress_write(a, a->nulls,
  390. tocopy);
  391. if (ret != ARCHIVE_OK)
  392. goto cleanup;
  393. }
  394. }
  395. ret = output_code(a, state->cur_code);
  396. if (ret != ARCHIVE_OK)
  397. goto cleanup;
  398. ret = output_flush(a);
  399. if (ret != ARCHIVE_OK)
  400. goto cleanup;
  401. /* Optionally, pad the final compressed block. */
  402. block_length = state->compressed_offset;
  403. /* Tricky calculation to determine size of last block. */
  404. if (a->bytes_in_last_block <= 0)
  405. /* Default or Zero: pad to full block */
  406. target_block_length = a->bytes_per_block;
  407. else
  408. /* Round length to next multiple of bytes_in_last_block. */
  409. target_block_length = a->bytes_in_last_block *
  410. ( (block_length + a->bytes_in_last_block - 1) /
  411. a->bytes_in_last_block);
  412. if (target_block_length > a->bytes_per_block)
  413. target_block_length = a->bytes_per_block;
  414. if (block_length < target_block_length) {
  415. memset(state->compressed + state->compressed_offset, 0,
  416. target_block_length - block_length);
  417. block_length = target_block_length;
  418. }
  419. /* Write the last block */
  420. bytes_written = (a->client_writer)(&a->archive, a->client_data,
  421. state->compressed, block_length);
  422. if (bytes_written <= 0)
  423. ret = ARCHIVE_FATAL;
  424. else
  425. a->archive.raw_position += bytes_written;
  426. cleanup:
  427. free(state->compressed);
  428. free(state);
  429. return (ret);
  430. }