archive_read_open_filename.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582
  1. /*-
  2. * Copyright (c) 2003-2010 Tim Kientzle
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  15. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  16. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  17. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  18. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  19. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  20. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  21. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  22. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  23. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24. */
  25. #include "archive_platform.h"
  26. __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_open_filename.c 201093 2009-12-28 02:28:44Z kientzle $");
  27. #ifdef HAVE_SYS_IOCTL_H
  28. #include <sys/ioctl.h>
  29. #endif
  30. #ifdef HAVE_SYS_STAT_H
  31. #include <sys/stat.h>
  32. #endif
  33. #ifdef HAVE_ERRNO_H
  34. #include <errno.h>
  35. #endif
  36. #ifdef HAVE_FCNTL_H
  37. #include <fcntl.h>
  38. #endif
  39. #ifdef HAVE_IO_H
  40. #include <io.h>
  41. #endif
  42. #ifdef HAVE_STDLIB_H
  43. #include <stdlib.h>
  44. #endif
  45. #ifdef HAVE_STRING_H
  46. #include <string.h>
  47. #endif
  48. #ifdef HAVE_UNISTD_H
  49. #include <unistd.h>
  50. #endif
  51. #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
  52. #include <sys/disk.h>
  53. #elif defined(__NetBSD__) || defined(__OpenBSD__)
  54. #include <sys/disklabel.h>
  55. #include <sys/dkio.h>
  56. #elif defined(__DragonFly__)
  57. #include <sys/diskslice.h>
  58. #endif
  59. #include "archive.h"
  60. #include "archive_private.h"
  61. #include "archive_string.h"
  62. #ifndef O_BINARY
  63. #define O_BINARY 0
  64. #endif
  65. #ifndef O_CLOEXEC
  66. #define O_CLOEXEC 0
  67. #endif
  68. struct read_file_data {
  69. int fd;
  70. size_t block_size;
  71. void *buffer;
  72. mode_t st_mode; /* Mode bits for opened file. */
  73. char use_lseek;
  74. enum fnt_e { FNT_STDIN, FNT_MBS, FNT_WCS } filename_type;
  75. union {
  76. char m[1];/* MBS filename. */
  77. wchar_t w[1];/* WCS filename. */
  78. } filename; /* Must be last! */
  79. };
  80. static int file_open(struct archive *, void *);
  81. static int file_close(struct archive *, void *);
  82. static int file_close2(struct archive *, void *);
  83. static int file_switch(struct archive *, void *, void *);
  84. static ssize_t file_read(struct archive *, void *, const void **buff);
  85. static int64_t file_seek(struct archive *, void *, int64_t request, int);
  86. static int64_t file_skip(struct archive *, void *, int64_t request);
  87. static int64_t file_skip_lseek(struct archive *, void *, int64_t request);
  88. int
  89. archive_read_open_file(struct archive *a, const char *filename,
  90. size_t block_size)
  91. {
  92. return (archive_read_open_filename(a, filename, block_size));
  93. }
  94. int
  95. archive_read_open_filename(struct archive *a, const char *filename,
  96. size_t block_size)
  97. {
  98. const char *filenames[2];
  99. filenames[0] = filename;
  100. filenames[1] = NULL;
  101. return archive_read_open_filenames(a, filenames, block_size);
  102. }
  103. int
  104. archive_read_open_filenames(struct archive *a, const char **filenames,
  105. size_t block_size)
  106. {
  107. struct read_file_data *mine;
  108. const char *filename = NULL;
  109. if (filenames)
  110. filename = *(filenames++);
  111. archive_clear_error(a);
  112. do
  113. {
  114. if (filename == NULL)
  115. filename = "";
  116. mine = (struct read_file_data *)calloc(1,
  117. sizeof(*mine) + strlen(filename));
  118. if (mine == NULL)
  119. goto no_memory;
  120. strcpy(mine->filename.m, filename);
  121. mine->block_size = block_size;
  122. mine->fd = -1;
  123. mine->buffer = NULL;
  124. mine->st_mode = mine->use_lseek = 0;
  125. if (filename == NULL || filename[0] == '\0') {
  126. mine->filename_type = FNT_STDIN;
  127. } else
  128. mine->filename_type = FNT_MBS;
  129. if (archive_read_append_callback_data(a, mine) != (ARCHIVE_OK))
  130. return (ARCHIVE_FATAL);
  131. if (filenames == NULL)
  132. break;
  133. filename = *(filenames++);
  134. } while (filename != NULL && filename[0] != '\0');
  135. archive_read_set_open_callback(a, file_open);
  136. archive_read_set_read_callback(a, file_read);
  137. archive_read_set_skip_callback(a, file_skip);
  138. archive_read_set_close_callback(a, file_close);
  139. archive_read_set_switch_callback(a, file_switch);
  140. archive_read_set_seek_callback(a, file_seek);
  141. return (archive_read_open1(a));
  142. no_memory:
  143. archive_set_error(a, ENOMEM, "No memory");
  144. return (ARCHIVE_FATAL);
  145. }
  146. int
  147. archive_read_open_filename_w(struct archive *a, const wchar_t *wfilename,
  148. size_t block_size)
  149. {
  150. struct read_file_data *mine = (struct read_file_data *)calloc(1,
  151. sizeof(*mine) + wcslen(wfilename) * sizeof(wchar_t));
  152. if (!mine)
  153. {
  154. archive_set_error(a, ENOMEM, "No memory");
  155. return (ARCHIVE_FATAL);
  156. }
  157. mine->fd = -1;
  158. mine->block_size = block_size;
  159. if (wfilename == NULL || wfilename[0] == L'\0') {
  160. mine->filename_type = FNT_STDIN;
  161. } else {
  162. #if defined(_WIN32) && !defined(__CYGWIN__)
  163. mine->filename_type = FNT_WCS;
  164. wcscpy(mine->filename.w, wfilename);
  165. #else
  166. /*
  167. * POSIX system does not support a wchar_t interface for
  168. * open() system call, so we have to translate a wchar_t
  169. * filename to multi-byte one and use it.
  170. */
  171. struct archive_string fn;
  172. archive_string_init(&fn);
  173. if (archive_string_append_from_wcs(&fn, wfilename,
  174. wcslen(wfilename)) != 0) {
  175. if (errno == ENOMEM)
  176. archive_set_error(a, errno,
  177. "Can't allocate memory");
  178. else
  179. archive_set_error(a, EINVAL,
  180. "Failed to convert a wide-character"
  181. " filename to a multi-byte filename");
  182. archive_string_free(&fn);
  183. free(mine);
  184. return (ARCHIVE_FATAL);
  185. }
  186. mine->filename_type = FNT_MBS;
  187. strcpy(mine->filename.m, fn.s);
  188. archive_string_free(&fn);
  189. #endif
  190. }
  191. if (archive_read_append_callback_data(a, mine) != (ARCHIVE_OK))
  192. return (ARCHIVE_FATAL);
  193. archive_read_set_open_callback(a, file_open);
  194. archive_read_set_read_callback(a, file_read);
  195. archive_read_set_skip_callback(a, file_skip);
  196. archive_read_set_close_callback(a, file_close);
  197. archive_read_set_switch_callback(a, file_switch);
  198. archive_read_set_seek_callback(a, file_seek);
  199. return (archive_read_open1(a));
  200. }
  201. static int
  202. file_open(struct archive *a, void *client_data)
  203. {
  204. struct stat st;
  205. struct read_file_data *mine = (struct read_file_data *)client_data;
  206. void *buffer;
  207. const char *filename = NULL;
  208. const wchar_t *wfilename = NULL;
  209. int fd = -1;
  210. int is_disk_like = 0;
  211. #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
  212. off_t mediasize = 0; /* FreeBSD-specific, so off_t okay here. */
  213. #elif defined(__NetBSD__) || defined(__OpenBSD__)
  214. struct disklabel dl;
  215. #elif defined(__DragonFly__)
  216. struct partinfo pi;
  217. #endif
  218. archive_clear_error(a);
  219. if (mine->filename_type == FNT_STDIN) {
  220. /* We used to delegate stdin support by
  221. * directly calling archive_read_open_fd(a,0,block_size)
  222. * here, but that doesn't (and shouldn't) handle the
  223. * end-of-file flush when reading stdout from a pipe.
  224. * Basically, read_open_fd() is intended for folks who
  225. * are willing to handle such details themselves. This
  226. * API is intended to be a little smarter for folks who
  227. * want easy handling of the common case.
  228. */
  229. fd = 0;
  230. #if defined(__CYGWIN__) || defined(_WIN32)
  231. setmode(0, O_BINARY);
  232. #endif
  233. filename = "";
  234. } else if (mine->filename_type == FNT_MBS) {
  235. filename = mine->filename.m;
  236. fd = open(filename, O_RDONLY | O_BINARY | O_CLOEXEC);
  237. __archive_ensure_cloexec_flag(fd);
  238. if (fd < 0) {
  239. archive_set_error(a, errno,
  240. "Failed to open '%s'", filename);
  241. return (ARCHIVE_FATAL);
  242. }
  243. } else {
  244. #if defined(_WIN32) && !defined(__CYGWIN__)
  245. wfilename = mine->filename.w;
  246. fd = _wopen(wfilename, O_RDONLY | O_BINARY);
  247. if (fd < 0 && errno == ENOENT) {
  248. wchar_t *fullpath;
  249. fullpath = __la_win_permissive_name_w(wfilename);
  250. if (fullpath != NULL) {
  251. fd = _wopen(fullpath, O_RDONLY | O_BINARY);
  252. free(fullpath);
  253. }
  254. }
  255. if (fd < 0) {
  256. archive_set_error(a, errno,
  257. "Failed to open '%S'", wfilename);
  258. return (ARCHIVE_FATAL);
  259. }
  260. #else
  261. archive_set_error(a, ARCHIVE_ERRNO_MISC,
  262. "Unexpedted operation in archive_read_open_filename");
  263. goto fail;
  264. #endif
  265. }
  266. if (fstat(fd, &st) != 0) {
  267. if (mine->filename_type == FNT_WCS)
  268. archive_set_error(a, errno, "Can't stat '%S'",
  269. wfilename);
  270. else
  271. archive_set_error(a, errno, "Can't stat '%s'",
  272. filename);
  273. goto fail;
  274. }
  275. /*
  276. * Determine whether the input looks like a disk device or a
  277. * tape device. The results are used below to select an I/O
  278. * strategy:
  279. * = "disk-like" devices support arbitrary lseek() and will
  280. * support I/O requests of any size. So we get easy skipping
  281. * and can cheat on block sizes to get better performance.
  282. * = "tape-like" devices require strict blocking and use
  283. * specialized ioctls for seeking.
  284. * = "socket-like" devices cannot seek at all but can improve
  285. * performance by using nonblocking I/O to read "whatever is
  286. * available right now".
  287. *
  288. * Right now, we only specially recognize disk-like devices,
  289. * but it should be straightforward to add probes and strategy
  290. * here for tape-like and socket-like devices.
  291. */
  292. if (S_ISREG(st.st_mode)) {
  293. /* Safety: Tell the extractor not to overwrite the input. */
  294. archive_read_extract_set_skip_file(a, st.st_dev, st.st_ino);
  295. /* Regular files act like disks. */
  296. is_disk_like = 1;
  297. }
  298. #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
  299. /* FreeBSD: if it supports DIOCGMEDIASIZE ioctl, it's disk-like. */
  300. else if (S_ISCHR(st.st_mode) &&
  301. ioctl(fd, DIOCGMEDIASIZE, &mediasize) == 0 &&
  302. mediasize > 0) {
  303. is_disk_like = 1;
  304. }
  305. #elif defined(__NetBSD__) || defined(__OpenBSD__)
  306. /* Net/OpenBSD: if it supports DIOCGDINFO ioctl, it's disk-like. */
  307. else if ((S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) &&
  308. ioctl(fd, DIOCGDINFO, &dl) == 0 &&
  309. dl.d_partitions[DISKPART(st.st_rdev)].p_size > 0) {
  310. is_disk_like = 1;
  311. }
  312. #elif defined(__DragonFly__)
  313. /* DragonFly BSD: if it supports DIOCGPART ioctl, it's disk-like. */
  314. else if (S_ISCHR(st.st_mode) &&
  315. ioctl(fd, DIOCGPART, &pi) == 0 &&
  316. pi.media_size > 0) {
  317. is_disk_like = 1;
  318. }
  319. #elif defined(__linux__)
  320. /* Linux: All block devices are disk-like. */
  321. else if (S_ISBLK(st.st_mode) &&
  322. lseek(fd, 0, SEEK_CUR) == 0 &&
  323. lseek(fd, 0, SEEK_SET) == 0 &&
  324. lseek(fd, 0, SEEK_END) > 0 &&
  325. lseek(fd, 0, SEEK_SET) == 0) {
  326. is_disk_like = 1;
  327. }
  328. #endif
  329. /* TODO: Add an "is_tape_like" variable and appropriate tests. */
  330. /* Disk-like devices prefer power-of-two block sizes. */
  331. /* Use provided block_size as a guide so users have some control. */
  332. if (is_disk_like) {
  333. size_t new_block_size = 64 * 1024;
  334. while (new_block_size < mine->block_size
  335. && new_block_size < 64 * 1024 * 1024)
  336. new_block_size *= 2;
  337. mine->block_size = new_block_size;
  338. }
  339. buffer = malloc(mine->block_size);
  340. if (buffer == NULL) {
  341. archive_set_error(a, ENOMEM, "No memory");
  342. goto fail;
  343. }
  344. mine->buffer = buffer;
  345. mine->fd = fd;
  346. /* Remember mode so close can decide whether to flush. */
  347. mine->st_mode = st.st_mode;
  348. /* Disk-like inputs can use lseek(). */
  349. if (is_disk_like)
  350. mine->use_lseek = 1;
  351. return (ARCHIVE_OK);
  352. fail:
  353. /*
  354. * Don't close file descriptors not opened or ones pointing referring
  355. * to `FNT_STDIN`.
  356. */
  357. if (fd != -1 && fd != 0)
  358. close(fd);
  359. return (ARCHIVE_FATAL);
  360. }
  361. static ssize_t
  362. file_read(struct archive *a, void *client_data, const void **buff)
  363. {
  364. struct read_file_data *mine = (struct read_file_data *)client_data;
  365. ssize_t bytes_read;
  366. /* TODO: If a recent lseek() operation has left us
  367. * mis-aligned, read and return a short block to try to get
  368. * us back in alignment. */
  369. /* TODO: Someday, try mmap() here; if that succeeds, give
  370. * the entire file to libarchive as a single block. That
  371. * could be a lot faster than block-by-block manual I/O. */
  372. /* TODO: We might be able to improve performance on pipes and
  373. * sockets by setting non-blocking I/O and just accepting
  374. * whatever we get here instead of waiting for a full block
  375. * worth of data. */
  376. *buff = mine->buffer;
  377. for (;;) {
  378. bytes_read = read(mine->fd, mine->buffer, mine->block_size);
  379. if (bytes_read < 0) {
  380. if (errno == EINTR)
  381. continue;
  382. else if (mine->filename_type == FNT_STDIN)
  383. archive_set_error(a, errno,
  384. "Error reading stdin");
  385. else if (mine->filename_type == FNT_MBS)
  386. archive_set_error(a, errno,
  387. "Error reading '%s'", mine->filename.m);
  388. else
  389. archive_set_error(a, errno,
  390. "Error reading '%S'", mine->filename.w);
  391. }
  392. return (bytes_read);
  393. }
  394. }
  395. /*
  396. * Regular files and disk-like block devices can use simple lseek
  397. * without needing to round the request to the block size.
  398. *
  399. * TODO: This can leave future reads mis-aligned. Since we know the
  400. * offset here, we should store it and use it in file_read() above
  401. * to determine whether we should perform a short read to get back
  402. * into alignment. Long series of mis-aligned reads can negatively
  403. * impact disk throughput. (Of course, the performance impact should
  404. * be carefully tested; extra code complexity is only worthwhile if
  405. * it does provide measurable improvement.)
  406. *
  407. * TODO: Be lazy about the actual seek. There are a few pathological
  408. * cases where libarchive makes a bunch of seek requests in a row
  409. * without any intervening reads. This isn't a huge performance
  410. * problem, since the kernel handles seeks lazily already, but
  411. * it would be very slightly faster if we simply remembered the
  412. * seek request here and then actually performed the seek at the
  413. * top of the read callback above.
  414. */
  415. static int64_t
  416. file_skip_lseek(struct archive *a, void *client_data, int64_t request)
  417. {
  418. struct read_file_data *mine = (struct read_file_data *)client_data;
  419. #if defined(_WIN32) && !defined(__CYGWIN__)
  420. /* We use _lseeki64() on Windows. */
  421. int64_t old_offset, new_offset;
  422. #else
  423. off_t old_offset, new_offset;
  424. #endif
  425. /* We use off_t here because lseek() is declared that way. */
  426. /* TODO: Deal with case where off_t isn't 64 bits.
  427. * This shouldn't be a problem on Linux or other POSIX
  428. * systems, since the configuration logic for libarchive
  429. * tries to obtain a 64-bit off_t.
  430. */
  431. if ((old_offset = lseek(mine->fd, 0, SEEK_CUR)) >= 0 &&
  432. (new_offset = lseek(mine->fd, request, SEEK_CUR)) >= 0)
  433. return (new_offset - old_offset);
  434. /* If lseek() fails, don't bother trying again. */
  435. mine->use_lseek = 0;
  436. /* Let libarchive recover with read+discard */
  437. if (errno == ESPIPE)
  438. return (0);
  439. /* If the input is corrupted or truncated, fail. */
  440. if (mine->filename_type == FNT_STDIN)
  441. archive_set_error(a, errno, "Error seeking in stdin");
  442. else if (mine->filename_type == FNT_MBS)
  443. archive_set_error(a, errno, "Error seeking in '%s'",
  444. mine->filename.m);
  445. else
  446. archive_set_error(a, errno, "Error seeking in '%S'",
  447. mine->filename.w);
  448. return (-1);
  449. }
  450. /*
  451. * TODO: Implement another file_skip_XXXX that uses MTIO ioctls to
  452. * accelerate operation on tape drives.
  453. */
  454. static int64_t
  455. file_skip(struct archive *a, void *client_data, int64_t request)
  456. {
  457. struct read_file_data *mine = (struct read_file_data *)client_data;
  458. /* Delegate skip requests. */
  459. if (mine->use_lseek)
  460. return (file_skip_lseek(a, client_data, request));
  461. /* If we can't skip, return 0; libarchive will read+discard instead. */
  462. return (0);
  463. }
  464. /*
  465. * TODO: Store the offset and use it in the read callback.
  466. */
  467. static int64_t
  468. file_seek(struct archive *a, void *client_data, int64_t request, int whence)
  469. {
  470. struct read_file_data *mine = (struct read_file_data *)client_data;
  471. int64_t r;
  472. /* We use off_t here because lseek() is declared that way. */
  473. /* See above for notes about when off_t is less than 64 bits. */
  474. r = lseek(mine->fd, request, whence);
  475. if (r >= 0)
  476. return r;
  477. /* If the input is corrupted or truncated, fail. */
  478. if (mine->filename_type == FNT_STDIN)
  479. archive_set_error(a, errno, "Error seeking in stdin");
  480. else if (mine->filename_type == FNT_MBS)
  481. archive_set_error(a, errno, "Error seeking in '%s'",
  482. mine->filename.m);
  483. else
  484. archive_set_error(a, errno, "Error seeking in '%S'",
  485. mine->filename.w);
  486. return (ARCHIVE_FATAL);
  487. }
  488. static int
  489. file_close2(struct archive *a, void *client_data)
  490. {
  491. struct read_file_data *mine = (struct read_file_data *)client_data;
  492. (void)a; /* UNUSED */
  493. /* Only flush and close if open succeeded. */
  494. if (mine->fd >= 0) {
  495. /*
  496. * Sometimes, we should flush the input before closing.
  497. * Regular files: faster to just close without flush.
  498. * Disk-like devices: Ditto.
  499. * Tapes: must not flush (user might need to
  500. * read the "next" item on a non-rewind device).
  501. * Pipes and sockets: must flush (otherwise, the
  502. * program feeding the pipe or socket may complain).
  503. * Here, I flush everything except for regular files and
  504. * device nodes.
  505. */
  506. if (!S_ISREG(mine->st_mode)
  507. && !S_ISCHR(mine->st_mode)
  508. && !S_ISBLK(mine->st_mode)) {
  509. ssize_t bytesRead;
  510. do {
  511. bytesRead = read(mine->fd, mine->buffer,
  512. mine->block_size);
  513. } while (bytesRead > 0);
  514. }
  515. /* If a named file was opened, then it needs to be closed. */
  516. if (mine->filename_type != FNT_STDIN)
  517. close(mine->fd);
  518. }
  519. free(mine->buffer);
  520. mine->buffer = NULL;
  521. mine->fd = -1;
  522. return (ARCHIVE_OK);
  523. }
  524. static int
  525. file_close(struct archive *a, void *client_data)
  526. {
  527. struct read_file_data *mine = (struct read_file_data *)client_data;
  528. file_close2(a, client_data);
  529. free(mine);
  530. return (ARCHIVE_OK);
  531. }
  532. static int
  533. file_switch(struct archive *a, void *client_data1, void *client_data2)
  534. {
  535. file_close2(a, client_data1);
  536. return file_open(a, client_data2);
  537. }