archive_read_support_format_tar.c 82 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883
  1. /*-
  2. * Copyright (c) 2003-2007 Tim Kientzle
  3. * Copyright (c) 2011-2012 Michihiro NAKAJIMA
  4. * Copyright (c) 2016 Martin Matuska
  5. * All rights reserved.
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions
  9. * are met:
  10. * 1. Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in the
  14. * documentation and/or other materials provided with the distribution.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  17. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  18. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  19. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  20. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  21. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  22. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  23. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  25. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. #include "archive_platform.h"
  28. __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_tar.c 201161 2009-12-29 05:44:39Z kientzle $");
  29. #ifdef HAVE_ERRNO_H
  30. #include <errno.h>
  31. #endif
  32. #include <stddef.h>
  33. #ifdef HAVE_STDLIB_H
  34. #include <stdlib.h>
  35. #endif
  36. #ifdef HAVE_STRING_H
  37. #include <string.h>
  38. #endif
  39. #include "archive.h"
  40. #include "archive_acl_private.h" /* For ACL parsing routines. */
  41. #include "archive_entry.h"
  42. #include "archive_entry_locale.h"
  43. #include "archive_private.h"
  44. #include "archive_read_private.h"
  45. #define tar_min(a,b) ((a) < (b) ? (a) : (b))
  46. /*
  47. * Layout of POSIX 'ustar' tar header.
  48. */
  49. struct archive_entry_header_ustar {
  50. char name[100];
  51. char mode[8];
  52. char uid[8];
  53. char gid[8];
  54. char size[12];
  55. char mtime[12];
  56. char checksum[8];
  57. char typeflag[1];
  58. char linkname[100]; /* "old format" header ends here */
  59. char magic[6]; /* For POSIX: "ustar\0" */
  60. char version[2]; /* For POSIX: "00" */
  61. char uname[32];
  62. char gname[32];
  63. char rdevmajor[8];
  64. char rdevminor[8];
  65. char prefix[155];
  66. };
  67. /*
  68. * Structure of GNU tar header
  69. */
  70. struct gnu_sparse {
  71. char offset[12];
  72. char numbytes[12];
  73. };
  74. struct archive_entry_header_gnutar {
  75. char name[100];
  76. char mode[8];
  77. char uid[8];
  78. char gid[8];
  79. char size[12];
  80. char mtime[12];
  81. char checksum[8];
  82. char typeflag[1];
  83. char linkname[100];
  84. char magic[8]; /* "ustar \0" (note blank/blank/null at end) */
  85. char uname[32];
  86. char gname[32];
  87. char rdevmajor[8];
  88. char rdevminor[8];
  89. char atime[12];
  90. char ctime[12];
  91. char offset[12];
  92. char longnames[4];
  93. char unused[1];
  94. struct gnu_sparse sparse[4];
  95. char isextended[1];
  96. char realsize[12];
  97. /*
  98. * Old GNU format doesn't use POSIX 'prefix' field; they use
  99. * the 'L' (longname) entry instead.
  100. */
  101. };
  102. /*
  103. * Data specific to this format.
  104. */
  105. struct sparse_block {
  106. struct sparse_block *next;
  107. int64_t offset;
  108. int64_t remaining;
  109. int hole;
  110. };
  111. struct tar {
  112. struct archive_string acl_text;
  113. struct archive_string entry_pathname;
  114. /* For "GNU.sparse.name" and other similar path extensions. */
  115. struct archive_string entry_pathname_override;
  116. struct archive_string entry_linkpath;
  117. struct archive_string entry_uname;
  118. struct archive_string entry_gname;
  119. struct archive_string longlink;
  120. struct archive_string longname;
  121. struct archive_string pax_header;
  122. struct archive_string pax_global;
  123. struct archive_string line;
  124. int pax_hdrcharset_binary;
  125. int header_recursion_depth;
  126. int64_t entry_bytes_remaining;
  127. int64_t entry_offset;
  128. int64_t entry_padding;
  129. int64_t entry_bytes_unconsumed;
  130. int64_t realsize;
  131. int sparse_allowed;
  132. struct sparse_block *sparse_list;
  133. struct sparse_block *sparse_last;
  134. int64_t sparse_offset;
  135. int64_t sparse_numbytes;
  136. int sparse_gnu_major;
  137. int sparse_gnu_minor;
  138. char sparse_gnu_pending;
  139. struct archive_string localname;
  140. struct archive_string_conv *opt_sconv;
  141. struct archive_string_conv *sconv;
  142. struct archive_string_conv *sconv_acl;
  143. struct archive_string_conv *sconv_default;
  144. int init_default_conversion;
  145. int compat_2x;
  146. int process_mac_extensions;
  147. int read_concatenated_archives;
  148. int realsize_override;
  149. };
  150. static int archive_block_is_null(const char *p);
  151. static char *base64_decode(const char *, size_t, size_t *);
  152. static int gnu_add_sparse_entry(struct archive_read *, struct tar *,
  153. int64_t offset, int64_t remaining);
  154. static void gnu_clear_sparse_list(struct tar *);
  155. static int gnu_sparse_old_read(struct archive_read *, struct tar *,
  156. const struct archive_entry_header_gnutar *header, size_t *);
  157. static int gnu_sparse_old_parse(struct archive_read *, struct tar *,
  158. const struct gnu_sparse *sparse, int length);
  159. static int gnu_sparse_01_parse(struct archive_read *, struct tar *,
  160. const char *);
  161. static ssize_t gnu_sparse_10_read(struct archive_read *, struct tar *,
  162. size_t *);
  163. static int header_Solaris_ACL(struct archive_read *, struct tar *,
  164. struct archive_entry *, const void *, size_t *);
  165. static int header_common(struct archive_read *, struct tar *,
  166. struct archive_entry *, const void *);
  167. static int header_old_tar(struct archive_read *, struct tar *,
  168. struct archive_entry *, const void *);
  169. static int header_pax_extensions(struct archive_read *, struct tar *,
  170. struct archive_entry *, const void *, size_t *);
  171. static int header_pax_global(struct archive_read *, struct tar *,
  172. struct archive_entry *, const void *h, size_t *);
  173. static int header_longlink(struct archive_read *, struct tar *,
  174. struct archive_entry *, const void *h, size_t *);
  175. static int header_longname(struct archive_read *, struct tar *,
  176. struct archive_entry *, const void *h, size_t *);
  177. static int read_mac_metadata_blob(struct archive_read *, struct tar *,
  178. struct archive_entry *, const void *h, size_t *);
  179. static int header_volume(struct archive_read *, struct tar *,
  180. struct archive_entry *, const void *h, size_t *);
  181. static int header_ustar(struct archive_read *, struct tar *,
  182. struct archive_entry *, const void *h);
  183. static int header_gnutar(struct archive_read *, struct tar *,
  184. struct archive_entry *, const void *h, size_t *);
  185. static int archive_read_format_tar_bid(struct archive_read *, int);
  186. static int archive_read_format_tar_options(struct archive_read *,
  187. const char *, const char *);
  188. static int archive_read_format_tar_cleanup(struct archive_read *);
  189. static int archive_read_format_tar_read_data(struct archive_read *a,
  190. const void **buff, size_t *size, int64_t *offset);
  191. static int archive_read_format_tar_skip(struct archive_read *a);
  192. static int archive_read_format_tar_read_header(struct archive_read *,
  193. struct archive_entry *);
  194. static int checksum(struct archive_read *, const void *);
  195. static int pax_attribute(struct archive_read *, struct tar *,
  196. struct archive_entry *, const char *key, const char *value,
  197. size_t value_length);
  198. static int pax_attribute_acl(struct archive_read *, struct tar *,
  199. struct archive_entry *, const char *, int);
  200. static int pax_attribute_xattr(struct archive_entry *, const char *,
  201. const char *);
  202. static int pax_header(struct archive_read *, struct tar *,
  203. struct archive_entry *, struct archive_string *);
  204. static void pax_time(const char *, int64_t *sec, long *nanos);
  205. static ssize_t readline(struct archive_read *, struct tar *, const char **,
  206. ssize_t limit, size_t *);
  207. static int read_body_to_string(struct archive_read *, struct tar *,
  208. struct archive_string *, const void *h, size_t *);
  209. static int solaris_sparse_parse(struct archive_read *, struct tar *,
  210. struct archive_entry *, const char *);
  211. static int64_t tar_atol(const char *, size_t);
  212. static int64_t tar_atol10(const char *, size_t);
  213. static int64_t tar_atol256(const char *, size_t);
  214. static int64_t tar_atol8(const char *, size_t);
  215. static int tar_read_header(struct archive_read *, struct tar *,
  216. struct archive_entry *, size_t *);
  217. static int tohex(int c);
  218. static char *url_decode(const char *);
  219. static void tar_flush_unconsumed(struct archive_read *, size_t *);
  220. int
  221. archive_read_support_format_gnutar(struct archive *a)
  222. {
  223. archive_check_magic(a, ARCHIVE_READ_MAGIC,
  224. ARCHIVE_STATE_NEW, "archive_read_support_format_gnutar");
  225. return (archive_read_support_format_tar(a));
  226. }
  227. int
  228. archive_read_support_format_tar(struct archive *_a)
  229. {
  230. struct archive_read *a = (struct archive_read *)_a;
  231. struct tar *tar;
  232. int r;
  233. archive_check_magic(_a, ARCHIVE_READ_MAGIC,
  234. ARCHIVE_STATE_NEW, "archive_read_support_format_tar");
  235. tar = (struct tar *)calloc(1, sizeof(*tar));
  236. #ifdef HAVE_COPYFILE_H
  237. /* Set this by default on Mac OS. */
  238. tar->process_mac_extensions = 1;
  239. #endif
  240. if (tar == NULL) {
  241. archive_set_error(&a->archive, ENOMEM,
  242. "Can't allocate tar data");
  243. return (ARCHIVE_FATAL);
  244. }
  245. r = __archive_read_register_format(a, tar, "tar",
  246. archive_read_format_tar_bid,
  247. archive_read_format_tar_options,
  248. archive_read_format_tar_read_header,
  249. archive_read_format_tar_read_data,
  250. archive_read_format_tar_skip,
  251. NULL,
  252. archive_read_format_tar_cleanup,
  253. NULL,
  254. NULL);
  255. if (r != ARCHIVE_OK)
  256. free(tar);
  257. return (ARCHIVE_OK);
  258. }
  259. static int
  260. archive_read_format_tar_cleanup(struct archive_read *a)
  261. {
  262. struct tar *tar;
  263. tar = (struct tar *)(a->format->data);
  264. gnu_clear_sparse_list(tar);
  265. archive_string_free(&tar->acl_text);
  266. archive_string_free(&tar->entry_pathname);
  267. archive_string_free(&tar->entry_pathname_override);
  268. archive_string_free(&tar->entry_linkpath);
  269. archive_string_free(&tar->entry_uname);
  270. archive_string_free(&tar->entry_gname);
  271. archive_string_free(&tar->line);
  272. archive_string_free(&tar->pax_global);
  273. archive_string_free(&tar->pax_header);
  274. archive_string_free(&tar->longname);
  275. archive_string_free(&tar->longlink);
  276. archive_string_free(&tar->localname);
  277. free(tar);
  278. (a->format->data) = NULL;
  279. return (ARCHIVE_OK);
  280. }
  281. /*
  282. * Validate number field
  283. *
  284. * This has to be pretty lenient in order to accommodate the enormous
  285. * variety of tar writers in the world:
  286. * = POSIX (IEEE Std 1003.1-1988) ustar requires octal values with leading
  287. * zeros and allows fields to be terminated with space or null characters
  288. * = Many writers use different termination (in particular, libarchive
  289. * omits terminator bytes to squeeze one or two more digits)
  290. * = Many writers pad with space and omit leading zeros
  291. * = GNU tar and star write base-256 values if numbers are too
  292. * big to be represented in octal
  293. *
  294. * Examples of specific tar headers that we should support:
  295. * = Perl Archive::Tar terminates uid, gid, devminor and devmajor with two
  296. * null bytes, pads size with spaces and other numeric fields with zeroes
  297. * = plexus-archiver prior to 2.6.3 (before switching to commons-compress)
  298. * may have uid and gid fields filled with spaces without any octal digits
  299. * at all and pads all numeric fields with spaces
  300. *
  301. * This should tolerate all variants in use. It will reject a field
  302. * where the writer just left garbage after a trailing NUL.
  303. */
  304. static int
  305. validate_number_field(const char* p_field, size_t i_size)
  306. {
  307. unsigned char marker = (unsigned char)p_field[0];
  308. if (marker == 128 || marker == 255 || marker == 0) {
  309. /* Base-256 marker, there's nothing we can check. */
  310. return 1;
  311. } else {
  312. /* Must be octal */
  313. size_t i = 0;
  314. /* Skip any leading spaces */
  315. while (i < i_size && p_field[i] == ' ') {
  316. ++i;
  317. }
  318. /* Skip octal digits. */
  319. while (i < i_size && p_field[i] >= '0' && p_field[i] <= '7') {
  320. ++i;
  321. }
  322. /* Any remaining characters must be space or NUL padding. */
  323. while (i < i_size) {
  324. if (p_field[i] != ' ' && p_field[i] != 0) {
  325. return 0;
  326. }
  327. ++i;
  328. }
  329. return 1;
  330. }
  331. }
  332. static int
  333. archive_read_format_tar_bid(struct archive_read *a, int best_bid)
  334. {
  335. int bid;
  336. const char *h;
  337. const struct archive_entry_header_ustar *header;
  338. (void)best_bid; /* UNUSED */
  339. bid = 0;
  340. /* Now let's look at the actual header and see if it matches. */
  341. h = __archive_read_ahead(a, 512, NULL);
  342. if (h == NULL)
  343. return (-1);
  344. /* If it's an end-of-archive mark, we can handle it. */
  345. if (h[0] == 0 && archive_block_is_null(h)) {
  346. /*
  347. * Usually, I bid the number of bits verified, but
  348. * in this case, 4096 seems excessive so I picked 10 as
  349. * an arbitrary but reasonable-seeming value.
  350. */
  351. return (10);
  352. }
  353. /* If it's not an end-of-archive mark, it must have a valid checksum.*/
  354. if (!checksum(a, h))
  355. return (0);
  356. bid += 48; /* Checksum is usually 6 octal digits. */
  357. header = (const struct archive_entry_header_ustar *)h;
  358. /* Recognize POSIX formats. */
  359. if ((memcmp(header->magic, "ustar\0", 6) == 0)
  360. && (memcmp(header->version, "00", 2) == 0))
  361. bid += 56;
  362. /* Recognize GNU tar format. */
  363. if ((memcmp(header->magic, "ustar ", 6) == 0)
  364. && (memcmp(header->version, " \0", 2) == 0))
  365. bid += 56;
  366. /* Type flag must be null, digit or A-Z, a-z. */
  367. if (header->typeflag[0] != 0 &&
  368. !( header->typeflag[0] >= '0' && header->typeflag[0] <= '9') &&
  369. !( header->typeflag[0] >= 'A' && header->typeflag[0] <= 'Z') &&
  370. !( header->typeflag[0] >= 'a' && header->typeflag[0] <= 'z') )
  371. return (0);
  372. bid += 2; /* 6 bits of variation in an 8-bit field leaves 2 bits. */
  373. /*
  374. * Check format of mode/uid/gid/mtime/size/rdevmajor/rdevminor fields.
  375. */
  376. if (bid > 0 && (
  377. validate_number_field(header->mode, sizeof(header->mode)) == 0
  378. || validate_number_field(header->uid, sizeof(header->uid)) == 0
  379. || validate_number_field(header->gid, sizeof(header->gid)) == 0
  380. || validate_number_field(header->mtime, sizeof(header->mtime)) == 0
  381. || validate_number_field(header->size, sizeof(header->size)) == 0
  382. || validate_number_field(header->rdevmajor, sizeof(header->rdevmajor)) == 0
  383. || validate_number_field(header->rdevminor, sizeof(header->rdevminor)) == 0)) {
  384. bid = 0;
  385. }
  386. return (bid);
  387. }
  388. static int
  389. archive_read_format_tar_options(struct archive_read *a,
  390. const char *key, const char *val)
  391. {
  392. struct tar *tar;
  393. int ret = ARCHIVE_FAILED;
  394. tar = (struct tar *)(a->format->data);
  395. if (strcmp(key, "compat-2x") == 0) {
  396. /* Handle UTF-8 filenames as libarchive 2.x */
  397. tar->compat_2x = (val != NULL && val[0] != 0);
  398. tar->init_default_conversion = tar->compat_2x;
  399. return (ARCHIVE_OK);
  400. } else if (strcmp(key, "hdrcharset") == 0) {
  401. if (val == NULL || val[0] == 0)
  402. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  403. "tar: hdrcharset option needs a character-set name");
  404. else {
  405. tar->opt_sconv =
  406. archive_string_conversion_from_charset(
  407. &a->archive, val, 0);
  408. if (tar->opt_sconv != NULL)
  409. ret = ARCHIVE_OK;
  410. else
  411. ret = ARCHIVE_FATAL;
  412. }
  413. return (ret);
  414. } else if (strcmp(key, "mac-ext") == 0) {
  415. tar->process_mac_extensions = (val != NULL && val[0] != 0);
  416. return (ARCHIVE_OK);
  417. } else if (strcmp(key, "read_concatenated_archives") == 0) {
  418. tar->read_concatenated_archives = (val != NULL && val[0] != 0);
  419. return (ARCHIVE_OK);
  420. }
  421. /* Note: The "warn" return is just to inform the options
  422. * supervisor that we didn't handle it. It will generate
  423. * a suitable error if no one used this option. */
  424. return (ARCHIVE_WARN);
  425. }
  426. /* utility function- this exists to centralize the logic of tracking
  427. * how much unconsumed data we have floating around, and to consume
  428. * anything outstanding since we're going to do read_aheads
  429. */
  430. static void
  431. tar_flush_unconsumed(struct archive_read *a, size_t *unconsumed)
  432. {
  433. if (*unconsumed) {
  434. /*
  435. void *data = (void *)__archive_read_ahead(a, *unconsumed, NULL);
  436. * this block of code is to poison claimed unconsumed space, ensuring
  437. * things break if it is in use still.
  438. * currently it WILL break things, so enable it only for debugging this issue
  439. if (data) {
  440. memset(data, 0xff, *unconsumed);
  441. }
  442. */
  443. __archive_read_consume(a, *unconsumed);
  444. *unconsumed = 0;
  445. }
  446. }
  447. /*
  448. * The function invoked by archive_read_next_header(). This
  449. * just sets up a few things and then calls the internal
  450. * tar_read_header() function below.
  451. */
  452. static int
  453. archive_read_format_tar_read_header(struct archive_read *a,
  454. struct archive_entry *entry)
  455. {
  456. /*
  457. * When converting tar archives to cpio archives, it is
  458. * essential that each distinct file have a distinct inode
  459. * number. To simplify this, we keep a static count here to
  460. * assign fake dev/inode numbers to each tar entry. Note that
  461. * pax format archives may overwrite this with something more
  462. * useful.
  463. *
  464. * Ideally, we would track every file read from the archive so
  465. * that we could assign the same dev/ino pair to hardlinks,
  466. * but the memory required to store a complete lookup table is
  467. * probably not worthwhile just to support the relatively
  468. * obscure tar->cpio conversion case.
  469. */
  470. static int default_inode;
  471. static int default_dev;
  472. struct tar *tar;
  473. const char *p;
  474. const wchar_t *wp;
  475. int r;
  476. size_t l, unconsumed = 0;
  477. /* Assign default device/inode values. */
  478. archive_entry_set_dev(entry, 1 + default_dev); /* Don't use zero. */
  479. archive_entry_set_ino(entry, ++default_inode); /* Don't use zero. */
  480. /* Limit generated st_ino number to 16 bits. */
  481. if (default_inode >= 0xffff) {
  482. ++default_dev;
  483. default_inode = 0;
  484. }
  485. tar = (struct tar *)(a->format->data);
  486. tar->entry_offset = 0;
  487. gnu_clear_sparse_list(tar);
  488. tar->realsize = -1; /* Mark this as "unset" */
  489. tar->realsize_override = 0;
  490. /* Setup default string conversion. */
  491. tar->sconv = tar->opt_sconv;
  492. if (tar->sconv == NULL) {
  493. if (!tar->init_default_conversion) {
  494. tar->sconv_default =
  495. archive_string_default_conversion_for_read(&(a->archive));
  496. tar->init_default_conversion = 1;
  497. }
  498. tar->sconv = tar->sconv_default;
  499. }
  500. r = tar_read_header(a, tar, entry, &unconsumed);
  501. tar_flush_unconsumed(a, &unconsumed);
  502. /*
  503. * "non-sparse" files are really just sparse files with
  504. * a single block.
  505. */
  506. if (tar->sparse_list == NULL) {
  507. if (gnu_add_sparse_entry(a, tar, 0, tar->entry_bytes_remaining)
  508. != ARCHIVE_OK)
  509. return (ARCHIVE_FATAL);
  510. } else {
  511. struct sparse_block *sb;
  512. for (sb = tar->sparse_list; sb != NULL; sb = sb->next) {
  513. if (!sb->hole)
  514. archive_entry_sparse_add_entry(entry,
  515. sb->offset, sb->remaining);
  516. }
  517. }
  518. if (r == ARCHIVE_OK && archive_entry_filetype(entry) == AE_IFREG) {
  519. /*
  520. * "Regular" entry with trailing '/' is really
  521. * directory: This is needed for certain old tar
  522. * variants and even for some broken newer ones.
  523. */
  524. if ((wp = archive_entry_pathname_w(entry)) != NULL) {
  525. l = wcslen(wp);
  526. if (l > 0 && wp[l - 1] == L'/') {
  527. archive_entry_set_filetype(entry, AE_IFDIR);
  528. }
  529. } else if ((p = archive_entry_pathname(entry)) != NULL) {
  530. l = strlen(p);
  531. if (l > 0 && p[l - 1] == '/') {
  532. archive_entry_set_filetype(entry, AE_IFDIR);
  533. }
  534. }
  535. }
  536. return (r);
  537. }
  538. static int
  539. archive_read_format_tar_read_data(struct archive_read *a,
  540. const void **buff, size_t *size, int64_t *offset)
  541. {
  542. ssize_t bytes_read;
  543. struct tar *tar;
  544. struct sparse_block *p;
  545. tar = (struct tar *)(a->format->data);
  546. for (;;) {
  547. /* Remove exhausted entries from sparse list. */
  548. while (tar->sparse_list != NULL &&
  549. tar->sparse_list->remaining == 0) {
  550. p = tar->sparse_list;
  551. tar->sparse_list = p->next;
  552. free(p);
  553. }
  554. if (tar->entry_bytes_unconsumed) {
  555. __archive_read_consume(a, tar->entry_bytes_unconsumed);
  556. tar->entry_bytes_unconsumed = 0;
  557. }
  558. /* If we're at end of file, return EOF. */
  559. if (tar->sparse_list == NULL ||
  560. tar->entry_bytes_remaining == 0) {
  561. if (__archive_read_consume(a, tar->entry_padding) < 0)
  562. return (ARCHIVE_FATAL);
  563. tar->entry_padding = 0;
  564. *buff = NULL;
  565. *size = 0;
  566. *offset = tar->realsize;
  567. return (ARCHIVE_EOF);
  568. }
  569. *buff = __archive_read_ahead(a, 1, &bytes_read);
  570. if (bytes_read < 0)
  571. return (ARCHIVE_FATAL);
  572. if (*buff == NULL) {
  573. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  574. "Truncated tar archive");
  575. return (ARCHIVE_FATAL);
  576. }
  577. if (bytes_read > tar->entry_bytes_remaining)
  578. bytes_read = (ssize_t)tar->entry_bytes_remaining;
  579. /* Don't read more than is available in the
  580. * current sparse block. */
  581. if (tar->sparse_list->remaining < bytes_read)
  582. bytes_read = (ssize_t)tar->sparse_list->remaining;
  583. *size = bytes_read;
  584. *offset = tar->sparse_list->offset;
  585. tar->sparse_list->remaining -= bytes_read;
  586. tar->sparse_list->offset += bytes_read;
  587. tar->entry_bytes_remaining -= bytes_read;
  588. tar->entry_bytes_unconsumed = bytes_read;
  589. if (!tar->sparse_list->hole)
  590. return (ARCHIVE_OK);
  591. /* Current is hole data and skip this. */
  592. }
  593. }
  594. static int
  595. archive_read_format_tar_skip(struct archive_read *a)
  596. {
  597. int64_t bytes_skipped;
  598. int64_t request;
  599. struct sparse_block *p;
  600. struct tar* tar;
  601. tar = (struct tar *)(a->format->data);
  602. /* Do not consume the hole of a sparse file. */
  603. request = 0;
  604. for (p = tar->sparse_list; p != NULL; p = p->next) {
  605. if (!p->hole) {
  606. if (p->remaining >= INT64_MAX - request) {
  607. return ARCHIVE_FATAL;
  608. }
  609. request += p->remaining;
  610. }
  611. }
  612. if (request > tar->entry_bytes_remaining)
  613. request = tar->entry_bytes_remaining;
  614. request += tar->entry_padding + tar->entry_bytes_unconsumed;
  615. bytes_skipped = __archive_read_consume(a, request);
  616. if (bytes_skipped < 0)
  617. return (ARCHIVE_FATAL);
  618. tar->entry_bytes_remaining = 0;
  619. tar->entry_bytes_unconsumed = 0;
  620. tar->entry_padding = 0;
  621. /* Free the sparse list. */
  622. gnu_clear_sparse_list(tar);
  623. return (ARCHIVE_OK);
  624. }
  625. /*
  626. * This function recursively interprets all of the headers associated
  627. * with a single entry.
  628. */
  629. static int
  630. tar_read_header(struct archive_read *a, struct tar *tar,
  631. struct archive_entry *entry, size_t *unconsumed)
  632. {
  633. ssize_t bytes;
  634. int err;
  635. const char *h;
  636. const struct archive_entry_header_ustar *header;
  637. const struct archive_entry_header_gnutar *gnuheader;
  638. /* Loop until we find a workable header record. */
  639. for (;;) {
  640. tar_flush_unconsumed(a, unconsumed);
  641. /* Read 512-byte header record */
  642. h = __archive_read_ahead(a, 512, &bytes);
  643. if (bytes < 0)
  644. return ((int)bytes);
  645. if (bytes == 0) { /* EOF at a block boundary. */
  646. /* Some writers do omit the block of nulls. <sigh> */
  647. return (ARCHIVE_EOF);
  648. }
  649. if (bytes < 512) { /* Short block at EOF; this is bad. */
  650. archive_set_error(&a->archive,
  651. ARCHIVE_ERRNO_FILE_FORMAT,
  652. "Truncated tar archive");
  653. return (ARCHIVE_FATAL);
  654. }
  655. *unconsumed = 512;
  656. /* Header is workable if it's not an end-of-archive mark. */
  657. if (h[0] != 0 || !archive_block_is_null(h))
  658. break;
  659. /* Ensure format is set for archives with only null blocks. */
  660. if (a->archive.archive_format_name == NULL) {
  661. a->archive.archive_format = ARCHIVE_FORMAT_TAR;
  662. a->archive.archive_format_name = "tar";
  663. }
  664. if (!tar->read_concatenated_archives) {
  665. /* Try to consume a second all-null record, as well. */
  666. tar_flush_unconsumed(a, unconsumed);
  667. h = __archive_read_ahead(a, 512, NULL);
  668. if (h != NULL && h[0] == 0 && archive_block_is_null(h))
  669. __archive_read_consume(a, 512);
  670. archive_clear_error(&a->archive);
  671. return (ARCHIVE_EOF);
  672. }
  673. /*
  674. * We're reading concatenated archives, ignore this block and
  675. * loop to get the next.
  676. */
  677. }
  678. /*
  679. * Note: If the checksum fails and we return ARCHIVE_RETRY,
  680. * then the client is likely to just retry. This is a very
  681. * crude way to search for the next valid header!
  682. *
  683. * TODO: Improve this by implementing a real header scan.
  684. */
  685. if (!checksum(a, h)) {
  686. tar_flush_unconsumed(a, unconsumed);
  687. archive_set_error(&a->archive, EINVAL, "Damaged tar archive");
  688. return (ARCHIVE_RETRY); /* Retryable: Invalid header */
  689. }
  690. if (++tar->header_recursion_depth > 32) {
  691. tar_flush_unconsumed(a, unconsumed);
  692. archive_set_error(&a->archive, EINVAL, "Too many special headers");
  693. return (ARCHIVE_WARN);
  694. }
  695. /* Determine the format variant. */
  696. header = (const struct archive_entry_header_ustar *)h;
  697. switch(header->typeflag[0]) {
  698. case 'A': /* Solaris tar ACL */
  699. a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
  700. a->archive.archive_format_name = "Solaris tar";
  701. err = header_Solaris_ACL(a, tar, entry, h, unconsumed);
  702. break;
  703. case 'g': /* POSIX-standard 'g' header. */
  704. a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
  705. a->archive.archive_format_name = "POSIX pax interchange format";
  706. err = header_pax_global(a, tar, entry, h, unconsumed);
  707. if (err == ARCHIVE_EOF)
  708. return (err);
  709. break;
  710. case 'K': /* Long link name (GNU tar, others) */
  711. err = header_longlink(a, tar, entry, h, unconsumed);
  712. break;
  713. case 'L': /* Long filename (GNU tar, others) */
  714. err = header_longname(a, tar, entry, h, unconsumed);
  715. break;
  716. case 'V': /* GNU volume header */
  717. err = header_volume(a, tar, entry, h, unconsumed);
  718. break;
  719. case 'X': /* Used by SUN tar; same as 'x'. */
  720. a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
  721. a->archive.archive_format_name =
  722. "POSIX pax interchange format (Sun variant)";
  723. err = header_pax_extensions(a, tar, entry, h, unconsumed);
  724. break;
  725. case 'x': /* POSIX-standard 'x' header. */
  726. a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
  727. a->archive.archive_format_name = "POSIX pax interchange format";
  728. err = header_pax_extensions(a, tar, entry, h, unconsumed);
  729. break;
  730. default:
  731. gnuheader = (const struct archive_entry_header_gnutar *)h;
  732. if (memcmp(gnuheader->magic, "ustar \0", 8) == 0) {
  733. a->archive.archive_format = ARCHIVE_FORMAT_TAR_GNUTAR;
  734. a->archive.archive_format_name = "GNU tar format";
  735. err = header_gnutar(a, tar, entry, h, unconsumed);
  736. } else if (memcmp(header->magic, "ustar", 5) == 0) {
  737. if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
  738. a->archive.archive_format = ARCHIVE_FORMAT_TAR_USTAR;
  739. a->archive.archive_format_name = "POSIX ustar format";
  740. }
  741. err = header_ustar(a, tar, entry, h);
  742. } else {
  743. a->archive.archive_format = ARCHIVE_FORMAT_TAR;
  744. a->archive.archive_format_name = "tar (non-POSIX)";
  745. err = header_old_tar(a, tar, entry, h);
  746. }
  747. }
  748. if (err == ARCHIVE_FATAL)
  749. return (err);
  750. tar_flush_unconsumed(a, unconsumed);
  751. h = NULL;
  752. header = NULL;
  753. --tar->header_recursion_depth;
  754. /* Yuck. Apple's design here ends up storing long pathname
  755. * extensions for both the AppleDouble extension entry and the
  756. * regular entry.
  757. */
  758. if ((err == ARCHIVE_WARN || err == ARCHIVE_OK) &&
  759. tar->header_recursion_depth == 0 &&
  760. tar->process_mac_extensions) {
  761. int err2 = read_mac_metadata_blob(a, tar, entry, h, unconsumed);
  762. if (err2 < err)
  763. err = err2;
  764. }
  765. /* We return warnings or success as-is. Anything else is fatal. */
  766. if (err == ARCHIVE_WARN || err == ARCHIVE_OK) {
  767. if (tar->sparse_gnu_pending) {
  768. if (tar->sparse_gnu_major == 1 &&
  769. tar->sparse_gnu_minor == 0) {
  770. ssize_t bytes_read;
  771. tar->sparse_gnu_pending = 0;
  772. /* Read initial sparse map. */
  773. bytes_read = gnu_sparse_10_read(a, tar, unconsumed);
  774. if (bytes_read < 0)
  775. return ((int)bytes_read);
  776. tar->entry_bytes_remaining -= bytes_read;
  777. } else {
  778. archive_set_error(&a->archive,
  779. ARCHIVE_ERRNO_MISC,
  780. "Unrecognized GNU sparse file format");
  781. return (ARCHIVE_WARN);
  782. }
  783. tar->sparse_gnu_pending = 0;
  784. }
  785. return (err);
  786. }
  787. if (err == ARCHIVE_EOF)
  788. /* EOF when recursively reading a header is bad. */
  789. archive_set_error(&a->archive, EINVAL, "Damaged tar archive");
  790. return (ARCHIVE_FATAL);
  791. }
  792. /*
  793. * Return true if block checksum is correct.
  794. */
  795. static int
  796. checksum(struct archive_read *a, const void *h)
  797. {
  798. const unsigned char *bytes;
  799. const struct archive_entry_header_ustar *header;
  800. int check, sum;
  801. size_t i;
  802. (void)a; /* UNUSED */
  803. bytes = (const unsigned char *)h;
  804. header = (const struct archive_entry_header_ustar *)h;
  805. /* Checksum field must hold an octal number */
  806. for (i = 0; i < sizeof(header->checksum); ++i) {
  807. char c = header->checksum[i];
  808. if (c != ' ' && c != '\0' && (c < '0' || c > '7'))
  809. return 0;
  810. }
  811. /*
  812. * Test the checksum. Note that POSIX specifies _unsigned_
  813. * bytes for this calculation.
  814. */
  815. sum = (int)tar_atol(header->checksum, sizeof(header->checksum));
  816. check = 0;
  817. for (i = 0; i < 148; i++)
  818. check += (unsigned char)bytes[i];
  819. for (; i < 156; i++)
  820. check += 32;
  821. for (; i < 512; i++)
  822. check += (unsigned char)bytes[i];
  823. if (sum == check)
  824. return (1);
  825. /*
  826. * Repeat test with _signed_ bytes, just in case this archive
  827. * was created by an old BSD, Solaris, or HP-UX tar with a
  828. * broken checksum calculation.
  829. */
  830. check = 0;
  831. for (i = 0; i < 148; i++)
  832. check += (signed char)bytes[i];
  833. for (; i < 156; i++)
  834. check += 32;
  835. for (; i < 512; i++)
  836. check += (signed char)bytes[i];
  837. if (sum == check)
  838. return (1);
  839. return (0);
  840. }
  841. /*
  842. * Return true if this block contains only nulls.
  843. */
  844. static int
  845. archive_block_is_null(const char *p)
  846. {
  847. unsigned i;
  848. for (i = 0; i < 512; i++)
  849. if (*p++)
  850. return (0);
  851. return (1);
  852. }
  853. /*
  854. * Interpret 'A' Solaris ACL header
  855. */
  856. static int
  857. header_Solaris_ACL(struct archive_read *a, struct tar *tar,
  858. struct archive_entry *entry, const void *h, size_t *unconsumed)
  859. {
  860. const struct archive_entry_header_ustar *header;
  861. size_t size;
  862. int err, acl_type;
  863. int64_t type;
  864. char *acl, *p;
  865. /*
  866. * read_body_to_string adds a NUL terminator, but we need a little
  867. * more to make sure that we don't overrun acl_text later.
  868. */
  869. header = (const struct archive_entry_header_ustar *)h;
  870. size = (size_t)tar_atol(header->size, sizeof(header->size));
  871. err = read_body_to_string(a, tar, &(tar->acl_text), h, unconsumed);
  872. if (err != ARCHIVE_OK)
  873. return (err);
  874. /* Recursively read next header */
  875. err = tar_read_header(a, tar, entry, unconsumed);
  876. if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
  877. return (err);
  878. /* TODO: Examine the first characters to see if this
  879. * is an AIX ACL descriptor. We'll likely never support
  880. * them, but it would be polite to recognize and warn when
  881. * we do see them. */
  882. /* Leading octal number indicates ACL type and number of entries. */
  883. p = acl = tar->acl_text.s;
  884. type = 0;
  885. while (*p != '\0' && p < acl + size) {
  886. if (*p < '0' || *p > '7') {
  887. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  888. "Malformed Solaris ACL attribute (invalid digit)");
  889. return(ARCHIVE_WARN);
  890. }
  891. type <<= 3;
  892. type += *p - '0';
  893. if (type > 077777777) {
  894. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  895. "Malformed Solaris ACL attribute (count too large)");
  896. return (ARCHIVE_WARN);
  897. }
  898. p++;
  899. }
  900. switch ((int)type & ~0777777) {
  901. case 01000000:
  902. /* POSIX.1e ACL */
  903. acl_type = ARCHIVE_ENTRY_ACL_TYPE_ACCESS;
  904. break;
  905. case 03000000:
  906. /* NFSv4 ACL */
  907. acl_type = ARCHIVE_ENTRY_ACL_TYPE_NFS4;
  908. break;
  909. default:
  910. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  911. "Malformed Solaris ACL attribute (unsupported type %o)",
  912. (int)type);
  913. return (ARCHIVE_WARN);
  914. }
  915. p++;
  916. if (p >= acl + size) {
  917. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  918. "Malformed Solaris ACL attribute (body overflow)");
  919. return(ARCHIVE_WARN);
  920. }
  921. /* ACL text is null-terminated; find the end. */
  922. size -= (p - acl);
  923. acl = p;
  924. while (*p != '\0' && p < acl + size)
  925. p++;
  926. if (tar->sconv_acl == NULL) {
  927. tar->sconv_acl = archive_string_conversion_from_charset(
  928. &(a->archive), "UTF-8", 1);
  929. if (tar->sconv_acl == NULL)
  930. return (ARCHIVE_FATAL);
  931. }
  932. archive_strncpy(&(tar->localname), acl, p - acl);
  933. err = archive_acl_from_text_l(archive_entry_acl(entry),
  934. tar->localname.s, acl_type, tar->sconv_acl);
  935. if (err != ARCHIVE_OK) {
  936. if (errno == ENOMEM) {
  937. archive_set_error(&a->archive, ENOMEM,
  938. "Can't allocate memory for ACL");
  939. } else
  940. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  941. "Malformed Solaris ACL attribute (unparsable)");
  942. }
  943. return (err);
  944. }
  945. /*
  946. * Interpret 'K' long linkname header.
  947. */
  948. static int
  949. header_longlink(struct archive_read *a, struct tar *tar,
  950. struct archive_entry *entry, const void *h, size_t *unconsumed)
  951. {
  952. int err;
  953. err = read_body_to_string(a, tar, &(tar->longlink), h, unconsumed);
  954. if (err != ARCHIVE_OK)
  955. return (err);
  956. err = tar_read_header(a, tar, entry, unconsumed);
  957. if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
  958. return (err);
  959. /* Set symlink if symlink already set, else hardlink. */
  960. archive_entry_copy_link(entry, tar->longlink.s);
  961. return (ARCHIVE_OK);
  962. }
  963. static int
  964. set_conversion_failed_error(struct archive_read *a,
  965. struct archive_string_conv *sconv, const char *name)
  966. {
  967. if (errno == ENOMEM) {
  968. archive_set_error(&a->archive, ENOMEM,
  969. "Can't allocate memory for %s", name);
  970. return (ARCHIVE_FATAL);
  971. }
  972. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  973. "%s can't be converted from %s to current locale.",
  974. name, archive_string_conversion_charset_name(sconv));
  975. return (ARCHIVE_WARN);
  976. }
  977. /*
  978. * Interpret 'L' long filename header.
  979. */
  980. static int
  981. header_longname(struct archive_read *a, struct tar *tar,
  982. struct archive_entry *entry, const void *h, size_t *unconsumed)
  983. {
  984. int err;
  985. err = read_body_to_string(a, tar, &(tar->longname), h, unconsumed);
  986. if (err != ARCHIVE_OK)
  987. return (err);
  988. /* Read and parse "real" header, then override name. */
  989. err = tar_read_header(a, tar, entry, unconsumed);
  990. if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
  991. return (err);
  992. if (archive_entry_copy_pathname_l(entry, tar->longname.s,
  993. archive_strlen(&(tar->longname)), tar->sconv) != 0)
  994. err = set_conversion_failed_error(a, tar->sconv, "Pathname");
  995. return (err);
  996. }
  997. /*
  998. * Interpret 'V' GNU tar volume header.
  999. */
  1000. static int
  1001. header_volume(struct archive_read *a, struct tar *tar,
  1002. struct archive_entry *entry, const void *h, size_t *unconsumed)
  1003. {
  1004. (void)h;
  1005. /* Just skip this and read the next header. */
  1006. return (tar_read_header(a, tar, entry, unconsumed));
  1007. }
  1008. /*
  1009. * Read body of an archive entry into an archive_string object.
  1010. */
  1011. static int
  1012. read_body_to_string(struct archive_read *a, struct tar *tar,
  1013. struct archive_string *as, const void *h, size_t *unconsumed)
  1014. {
  1015. int64_t size;
  1016. const struct archive_entry_header_ustar *header;
  1017. const void *src;
  1018. (void)tar; /* UNUSED */
  1019. header = (const struct archive_entry_header_ustar *)h;
  1020. size = tar_atol(header->size, sizeof(header->size));
  1021. if ((size > 1048576) || (size < 0)) {
  1022. archive_set_error(&a->archive, EINVAL,
  1023. "Special header too large");
  1024. return (ARCHIVE_FATAL);
  1025. }
  1026. /* Fail if we can't make our buffer big enough. */
  1027. if (archive_string_ensure(as, (size_t)size+1) == NULL) {
  1028. archive_set_error(&a->archive, ENOMEM,
  1029. "No memory");
  1030. return (ARCHIVE_FATAL);
  1031. }
  1032. tar_flush_unconsumed(a, unconsumed);
  1033. /* Read the body into the string. */
  1034. *unconsumed = (size_t)((size + 511) & ~ 511);
  1035. src = __archive_read_ahead(a, *unconsumed, NULL);
  1036. if (src == NULL) {
  1037. *unconsumed = 0;
  1038. return (ARCHIVE_FATAL);
  1039. }
  1040. memcpy(as->s, src, (size_t)size);
  1041. as->s[size] = '\0';
  1042. as->length = (size_t)size;
  1043. return (ARCHIVE_OK);
  1044. }
  1045. /*
  1046. * Parse out common header elements.
  1047. *
  1048. * This would be the same as header_old_tar, except that the
  1049. * filename is handled slightly differently for old and POSIX
  1050. * entries (POSIX entries support a 'prefix'). This factoring
  1051. * allows header_old_tar and header_ustar
  1052. * to handle filenames differently, while still putting most of the
  1053. * common parsing into one place.
  1054. */
  1055. static int
  1056. header_common(struct archive_read *a, struct tar *tar,
  1057. struct archive_entry *entry, const void *h)
  1058. {
  1059. const struct archive_entry_header_ustar *header;
  1060. char tartype;
  1061. int err = ARCHIVE_OK;
  1062. header = (const struct archive_entry_header_ustar *)h;
  1063. if (header->linkname[0])
  1064. archive_strncpy(&(tar->entry_linkpath),
  1065. header->linkname, sizeof(header->linkname));
  1066. else
  1067. archive_string_empty(&(tar->entry_linkpath));
  1068. /* Parse out the numeric fields (all are octal) */
  1069. archive_entry_set_mode(entry,
  1070. (mode_t)tar_atol(header->mode, sizeof(header->mode)));
  1071. archive_entry_set_uid(entry, tar_atol(header->uid, sizeof(header->uid)));
  1072. archive_entry_set_gid(entry, tar_atol(header->gid, sizeof(header->gid)));
  1073. tar->entry_bytes_remaining = tar_atol(header->size, sizeof(header->size));
  1074. if (tar->entry_bytes_remaining < 0) {
  1075. tar->entry_bytes_remaining = 0;
  1076. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  1077. "Tar entry has negative size");
  1078. return (ARCHIVE_FATAL);
  1079. }
  1080. if (tar->entry_bytes_remaining == INT64_MAX) {
  1081. /* Note: tar_atol returns INT64_MAX on overflow */
  1082. tar->entry_bytes_remaining = 0;
  1083. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  1084. "Tar entry size overflow");
  1085. return (ARCHIVE_FATAL);
  1086. }
  1087. tar->realsize = tar->entry_bytes_remaining;
  1088. archive_entry_set_size(entry, tar->entry_bytes_remaining);
  1089. archive_entry_set_mtime(entry, tar_atol(header->mtime, sizeof(header->mtime)), 0);
  1090. /* Handle the tar type flag appropriately. */
  1091. tartype = header->typeflag[0];
  1092. switch (tartype) {
  1093. case '1': /* Hard link */
  1094. if (archive_entry_copy_hardlink_l(entry, tar->entry_linkpath.s,
  1095. archive_strlen(&(tar->entry_linkpath)), tar->sconv) != 0) {
  1096. err = set_conversion_failed_error(a, tar->sconv,
  1097. "Linkname");
  1098. if (err == ARCHIVE_FATAL)
  1099. return (err);
  1100. }
  1101. /*
  1102. * The following may seem odd, but: Technically, tar
  1103. * does not store the file type for a "hard link"
  1104. * entry, only the fact that it is a hard link. So, I
  1105. * leave the type zero normally. But, pax interchange
  1106. * format allows hard links to have data, which
  1107. * implies that the underlying entry is a regular
  1108. * file.
  1109. */
  1110. if (archive_entry_size(entry) > 0)
  1111. archive_entry_set_filetype(entry, AE_IFREG);
  1112. /*
  1113. * A tricky point: Traditionally, tar readers have
  1114. * ignored the size field when reading hardlink
  1115. * entries, and some writers put non-zero sizes even
  1116. * though the body is empty. POSIX blessed this
  1117. * convention in the 1988 standard, but broke with
  1118. * this tradition in 2001 by permitting hardlink
  1119. * entries to store valid bodies in pax interchange
  1120. * format, but not in ustar format. Since there is no
  1121. * hard and fast way to distinguish pax interchange
  1122. * from earlier archives (the 'x' and 'g' entries are
  1123. * optional, after all), we need a heuristic.
  1124. */
  1125. if (archive_entry_size(entry) == 0) {
  1126. /* If the size is already zero, we're done. */
  1127. } else if (a->archive.archive_format
  1128. == ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
  1129. /* Definitely pax extended; must obey hardlink size. */
  1130. } else if (a->archive.archive_format == ARCHIVE_FORMAT_TAR
  1131. || a->archive.archive_format == ARCHIVE_FORMAT_TAR_GNUTAR)
  1132. {
  1133. /* Old-style or GNU tar: we must ignore the size. */
  1134. archive_entry_set_size(entry, 0);
  1135. tar->entry_bytes_remaining = 0;
  1136. } else if (archive_read_format_tar_bid(a, 50) > 50) {
  1137. /*
  1138. * We don't know if it's pax: If the bid
  1139. * function sees a valid ustar header
  1140. * immediately following, then let's ignore
  1141. * the hardlink size.
  1142. */
  1143. archive_entry_set_size(entry, 0);
  1144. tar->entry_bytes_remaining = 0;
  1145. }
  1146. /*
  1147. * TODO: There are still two cases I'd like to handle:
  1148. * = a ustar non-pax archive with a hardlink entry at
  1149. * end-of-archive. (Look for block of nulls following?)
  1150. * = a pax archive that has not seen any pax headers
  1151. * and has an entry which is a hardlink entry storing
  1152. * a body containing an uncompressed tar archive.
  1153. * The first is worth addressing; I don't see any reliable
  1154. * way to deal with the second possibility.
  1155. */
  1156. break;
  1157. case '2': /* Symlink */
  1158. archive_entry_set_filetype(entry, AE_IFLNK);
  1159. archive_entry_set_size(entry, 0);
  1160. tar->entry_bytes_remaining = 0;
  1161. if (archive_entry_copy_symlink_l(entry, tar->entry_linkpath.s,
  1162. archive_strlen(&(tar->entry_linkpath)), tar->sconv) != 0) {
  1163. err = set_conversion_failed_error(a, tar->sconv,
  1164. "Linkname");
  1165. if (err == ARCHIVE_FATAL)
  1166. return (err);
  1167. }
  1168. break;
  1169. case '3': /* Character device */
  1170. archive_entry_set_filetype(entry, AE_IFCHR);
  1171. archive_entry_set_size(entry, 0);
  1172. tar->entry_bytes_remaining = 0;
  1173. break;
  1174. case '4': /* Block device */
  1175. archive_entry_set_filetype(entry, AE_IFBLK);
  1176. archive_entry_set_size(entry, 0);
  1177. tar->entry_bytes_remaining = 0;
  1178. break;
  1179. case '5': /* Dir */
  1180. archive_entry_set_filetype(entry, AE_IFDIR);
  1181. archive_entry_set_size(entry, 0);
  1182. tar->entry_bytes_remaining = 0;
  1183. break;
  1184. case '6': /* FIFO device */
  1185. archive_entry_set_filetype(entry, AE_IFIFO);
  1186. archive_entry_set_size(entry, 0);
  1187. tar->entry_bytes_remaining = 0;
  1188. break;
  1189. case 'D': /* GNU incremental directory type */
  1190. /*
  1191. * No special handling is actually required here.
  1192. * It might be nice someday to preprocess the file list and
  1193. * provide it to the client, though.
  1194. */
  1195. archive_entry_set_filetype(entry, AE_IFDIR);
  1196. break;
  1197. case 'M': /* GNU "Multi-volume" (remainder of file from last archive)*/
  1198. /*
  1199. * As far as I can tell, this is just like a regular file
  1200. * entry, except that the contents should be _appended_ to
  1201. * the indicated file at the indicated offset. This may
  1202. * require some API work to fully support.
  1203. */
  1204. break;
  1205. case 'N': /* Old GNU "long filename" entry. */
  1206. /* The body of this entry is a script for renaming
  1207. * previously-extracted entries. Ugh. It will never
  1208. * be supported by libarchive. */
  1209. archive_entry_set_filetype(entry, AE_IFREG);
  1210. break;
  1211. case 'S': /* GNU sparse files */
  1212. /*
  1213. * Sparse files are really just regular files with
  1214. * sparse information in the extended area.
  1215. */
  1216. /* FALLTHROUGH */
  1217. case '0':
  1218. /*
  1219. * Enable sparse file "read" support only for regular
  1220. * files and explicit GNU sparse files. However, we
  1221. * don't allow non-standard file types to be sparse.
  1222. */
  1223. tar->sparse_allowed = 1;
  1224. /* FALLTHROUGH */
  1225. default: /* Regular file and non-standard types */
  1226. /*
  1227. * Per POSIX: non-recognized types should always be
  1228. * treated as regular files.
  1229. */
  1230. archive_entry_set_filetype(entry, AE_IFREG);
  1231. break;
  1232. }
  1233. return (err);
  1234. }
  1235. /*
  1236. * Parse out header elements for "old-style" tar archives.
  1237. */
  1238. static int
  1239. header_old_tar(struct archive_read *a, struct tar *tar,
  1240. struct archive_entry *entry, const void *h)
  1241. {
  1242. const struct archive_entry_header_ustar *header;
  1243. int err = ARCHIVE_OK, err2;
  1244. /* Copy filename over (to ensure null termination). */
  1245. header = (const struct archive_entry_header_ustar *)h;
  1246. if (archive_entry_copy_pathname_l(entry,
  1247. header->name, sizeof(header->name), tar->sconv) != 0) {
  1248. err = set_conversion_failed_error(a, tar->sconv, "Pathname");
  1249. if (err == ARCHIVE_FATAL)
  1250. return (err);
  1251. }
  1252. /* Grab rest of common fields */
  1253. err2 = header_common(a, tar, entry, h);
  1254. if (err > err2)
  1255. err = err2;
  1256. tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
  1257. return (err);
  1258. }
  1259. /*
  1260. * Read a Mac AppleDouble-encoded blob of file metadata,
  1261. * if there is one.
  1262. */
  1263. static int
  1264. read_mac_metadata_blob(struct archive_read *a, struct tar *tar,
  1265. struct archive_entry *entry, const void *h, size_t *unconsumed)
  1266. {
  1267. int64_t size;
  1268. const void *data;
  1269. const char *p, *name;
  1270. const wchar_t *wp, *wname;
  1271. (void)h; /* UNUSED */
  1272. wname = wp = archive_entry_pathname_w(entry);
  1273. if (wp != NULL) {
  1274. /* Find the last path element. */
  1275. for (; *wp != L'\0'; ++wp) {
  1276. if (wp[0] == '/' && wp[1] != L'\0')
  1277. wname = wp + 1;
  1278. }
  1279. /*
  1280. * If last path element starts with "._", then
  1281. * this is a Mac extension.
  1282. */
  1283. if (wname[0] != L'.' || wname[1] != L'_' || wname[2] == L'\0')
  1284. return ARCHIVE_OK;
  1285. } else {
  1286. /* Find the last path element. */
  1287. name = p = archive_entry_pathname(entry);
  1288. if (p == NULL)
  1289. return (ARCHIVE_FAILED);
  1290. for (; *p != '\0'; ++p) {
  1291. if (p[0] == '/' && p[1] != '\0')
  1292. name = p + 1;
  1293. }
  1294. /*
  1295. * If last path element starts with "._", then
  1296. * this is a Mac extension.
  1297. */
  1298. if (name[0] != '.' || name[1] != '_' || name[2] == '\0')
  1299. return ARCHIVE_OK;
  1300. }
  1301. /* Read the body as a Mac OS metadata blob. */
  1302. size = archive_entry_size(entry);
  1303. /*
  1304. * TODO: Look beyond the body here to peek at the next header.
  1305. * If it's a regular header (not an extension header)
  1306. * that has the wrong name, just return the current
  1307. * entry as-is, without consuming the body here.
  1308. * That would reduce the risk of us mis-identifying
  1309. * an ordinary file that just happened to have
  1310. * a name starting with "._".
  1311. *
  1312. * Q: Is the above idea really possible? Even
  1313. * when there are GNU or pax extension entries?
  1314. */
  1315. data = __archive_read_ahead(a, (size_t)size, NULL);
  1316. if (data == NULL) {
  1317. *unconsumed = 0;
  1318. return (ARCHIVE_FATAL);
  1319. }
  1320. archive_entry_copy_mac_metadata(entry, data, (size_t)size);
  1321. *unconsumed = (size_t)((size + 511) & ~ 511);
  1322. tar_flush_unconsumed(a, unconsumed);
  1323. return (tar_read_header(a, tar, entry, unconsumed));
  1324. }
  1325. /*
  1326. * Parse a file header for a pax extended archive entry.
  1327. */
  1328. static int
  1329. header_pax_global(struct archive_read *a, struct tar *tar,
  1330. struct archive_entry *entry, const void *h, size_t *unconsumed)
  1331. {
  1332. int err;
  1333. err = read_body_to_string(a, tar, &(tar->pax_global), h, unconsumed);
  1334. if (err != ARCHIVE_OK)
  1335. return (err);
  1336. err = tar_read_header(a, tar, entry, unconsumed);
  1337. return (err);
  1338. }
  1339. static int
  1340. header_pax_extensions(struct archive_read *a, struct tar *tar,
  1341. struct archive_entry *entry, const void *h, size_t *unconsumed)
  1342. {
  1343. int err, err2;
  1344. err = read_body_to_string(a, tar, &(tar->pax_header), h, unconsumed);
  1345. if (err != ARCHIVE_OK)
  1346. return (err);
  1347. /* Parse the next header. */
  1348. err = tar_read_header(a, tar, entry, unconsumed);
  1349. if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
  1350. return (err);
  1351. /*
  1352. * TODO: Parse global/default options into 'entry' struct here
  1353. * before handling file-specific options.
  1354. *
  1355. * This design (parse standard header, then overwrite with pax
  1356. * extended attribute data) usually works well, but isn't ideal;
  1357. * it would be better to parse the pax extended attributes first
  1358. * and then skip any fields in the standard header that were
  1359. * defined in the pax header.
  1360. */
  1361. err2 = pax_header(a, tar, entry, &tar->pax_header);
  1362. err = err_combine(err, err2);
  1363. tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
  1364. return (err);
  1365. }
  1366. /*
  1367. * Parse a file header for a Posix "ustar" archive entry. This also
  1368. * handles "pax" or "extended ustar" entries.
  1369. */
  1370. static int
  1371. header_ustar(struct archive_read *a, struct tar *tar,
  1372. struct archive_entry *entry, const void *h)
  1373. {
  1374. const struct archive_entry_header_ustar *header;
  1375. struct archive_string *as;
  1376. int err = ARCHIVE_OK, r;
  1377. header = (const struct archive_entry_header_ustar *)h;
  1378. /* Copy name into an internal buffer to ensure null-termination. */
  1379. as = &(tar->entry_pathname);
  1380. if (header->prefix[0]) {
  1381. archive_strncpy(as, header->prefix, sizeof(header->prefix));
  1382. if (as->s[archive_strlen(as) - 1] != '/')
  1383. archive_strappend_char(as, '/');
  1384. archive_strncat(as, header->name, sizeof(header->name));
  1385. } else {
  1386. archive_strncpy(as, header->name, sizeof(header->name));
  1387. }
  1388. if (archive_entry_copy_pathname_l(entry, as->s, archive_strlen(as),
  1389. tar->sconv) != 0) {
  1390. err = set_conversion_failed_error(a, tar->sconv, "Pathname");
  1391. if (err == ARCHIVE_FATAL)
  1392. return (err);
  1393. }
  1394. /* Handle rest of common fields. */
  1395. r = header_common(a, tar, entry, h);
  1396. if (r == ARCHIVE_FATAL)
  1397. return (r);
  1398. if (r < err)
  1399. err = r;
  1400. /* Handle POSIX ustar fields. */
  1401. if (archive_entry_copy_uname_l(entry,
  1402. header->uname, sizeof(header->uname), tar->sconv) != 0) {
  1403. err = set_conversion_failed_error(a, tar->sconv, "Uname");
  1404. if (err == ARCHIVE_FATAL)
  1405. return (err);
  1406. }
  1407. if (archive_entry_copy_gname_l(entry,
  1408. header->gname, sizeof(header->gname), tar->sconv) != 0) {
  1409. err = set_conversion_failed_error(a, tar->sconv, "Gname");
  1410. if (err == ARCHIVE_FATAL)
  1411. return (err);
  1412. }
  1413. /* Parse out device numbers only for char and block specials. */
  1414. if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
  1415. archive_entry_set_rdevmajor(entry, (dev_t)
  1416. tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
  1417. archive_entry_set_rdevminor(entry, (dev_t)
  1418. tar_atol(header->rdevminor, sizeof(header->rdevminor)));
  1419. }
  1420. tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
  1421. return (err);
  1422. }
  1423. /*
  1424. * Parse the pax extended attributes record.
  1425. *
  1426. * Returns non-zero if there's an error in the data.
  1427. */
  1428. static int
  1429. pax_header(struct archive_read *a, struct tar *tar,
  1430. struct archive_entry *entry, struct archive_string *in_as)
  1431. {
  1432. size_t attr_length, l, line_length, value_length;
  1433. char *p;
  1434. char *key, *value;
  1435. struct archive_string *as;
  1436. struct archive_string_conv *sconv;
  1437. int err, err2;
  1438. char *attr = in_as->s;
  1439. attr_length = in_as->length;
  1440. tar->pax_hdrcharset_binary = 0;
  1441. archive_string_empty(&(tar->entry_gname));
  1442. archive_string_empty(&(tar->entry_linkpath));
  1443. archive_string_empty(&(tar->entry_pathname));
  1444. archive_string_empty(&(tar->entry_pathname_override));
  1445. archive_string_empty(&(tar->entry_uname));
  1446. err = ARCHIVE_OK;
  1447. while (attr_length > 0) {
  1448. /* Parse decimal length field at start of line. */
  1449. line_length = 0;
  1450. l = attr_length;
  1451. p = attr; /* Record start of line. */
  1452. while (l>0) {
  1453. if (*p == ' ') {
  1454. p++;
  1455. l--;
  1456. break;
  1457. }
  1458. if (*p < '0' || *p > '9') {
  1459. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  1460. "Ignoring malformed pax extended attributes");
  1461. return (ARCHIVE_WARN);
  1462. }
  1463. line_length *= 10;
  1464. line_length += *p - '0';
  1465. if (line_length > 999999) {
  1466. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  1467. "Rejecting pax extended attribute > 1MB");
  1468. return (ARCHIVE_WARN);
  1469. }
  1470. p++;
  1471. l--;
  1472. }
  1473. /*
  1474. * Parsed length must be no bigger than available data,
  1475. * at least 1, and the last character of the line must
  1476. * be '\n'.
  1477. */
  1478. if (line_length > attr_length
  1479. || line_length < 1
  1480. || attr[line_length - 1] != '\n')
  1481. {
  1482. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  1483. "Ignoring malformed pax extended attribute");
  1484. return (ARCHIVE_WARN);
  1485. }
  1486. /* Null-terminate the line. */
  1487. attr[line_length - 1] = '\0';
  1488. /* Find end of key and null terminate it. */
  1489. key = p;
  1490. if (key[0] == '=')
  1491. return (-1);
  1492. while (*p && *p != '=')
  1493. ++p;
  1494. if (*p == '\0') {
  1495. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  1496. "Invalid pax extended attributes");
  1497. return (ARCHIVE_WARN);
  1498. }
  1499. *p = '\0';
  1500. value = p + 1;
  1501. /* Some values may be binary data */
  1502. value_length = attr + line_length - 1 - value;
  1503. /* Identify this attribute and set it in the entry. */
  1504. err2 = pax_attribute(a, tar, entry, key, value, value_length);
  1505. if (err2 == ARCHIVE_FATAL)
  1506. return (err2);
  1507. err = err_combine(err, err2);
  1508. /* Skip to next line */
  1509. attr += line_length;
  1510. attr_length -= line_length;
  1511. }
  1512. /*
  1513. * PAX format uses UTF-8 as default charset for its metadata
  1514. * unless hdrcharset=BINARY is present in its header.
  1515. * We apply the charset specified by the hdrcharset option only
  1516. * when the hdrcharset attribute(in PAX header) is BINARY because
  1517. * we respect the charset described in PAX header and BINARY also
  1518. * means that metadata(filename,uname and gname) character-set
  1519. * is unknown.
  1520. */
  1521. if (tar->pax_hdrcharset_binary)
  1522. sconv = tar->opt_sconv;
  1523. else {
  1524. sconv = archive_string_conversion_from_charset(
  1525. &(a->archive), "UTF-8", 1);
  1526. if (sconv == NULL)
  1527. return (ARCHIVE_FATAL);
  1528. if (tar->compat_2x)
  1529. archive_string_conversion_set_opt(sconv,
  1530. SCONV_SET_OPT_UTF8_LIBARCHIVE2X);
  1531. }
  1532. if (archive_strlen(&(tar->entry_gname)) > 0) {
  1533. if (archive_entry_copy_gname_l(entry, tar->entry_gname.s,
  1534. archive_strlen(&(tar->entry_gname)), sconv) != 0) {
  1535. err = set_conversion_failed_error(a, sconv, "Gname");
  1536. if (err == ARCHIVE_FATAL)
  1537. return (err);
  1538. /* Use a converted an original name. */
  1539. archive_entry_copy_gname(entry, tar->entry_gname.s);
  1540. }
  1541. }
  1542. if (archive_strlen(&(tar->entry_linkpath)) > 0) {
  1543. if (archive_entry_copy_link_l(entry, tar->entry_linkpath.s,
  1544. archive_strlen(&(tar->entry_linkpath)), sconv) != 0) {
  1545. err = set_conversion_failed_error(a, sconv, "Linkname");
  1546. if (err == ARCHIVE_FATAL)
  1547. return (err);
  1548. /* Use a converted an original name. */
  1549. archive_entry_copy_link(entry, tar->entry_linkpath.s);
  1550. }
  1551. }
  1552. /*
  1553. * Some extensions (such as the GNU sparse file extensions)
  1554. * deliberately store a synthetic name under the regular 'path'
  1555. * attribute and the real file name under a different attribute.
  1556. * Since we're supposed to not care about the order, we
  1557. * have no choice but to store all of the various filenames
  1558. * we find and figure it all out afterwards. This is the
  1559. * figuring out part.
  1560. */
  1561. as = NULL;
  1562. if (archive_strlen(&(tar->entry_pathname_override)) > 0)
  1563. as = &(tar->entry_pathname_override);
  1564. else if (archive_strlen(&(tar->entry_pathname)) > 0)
  1565. as = &(tar->entry_pathname);
  1566. if (as != NULL) {
  1567. if (archive_entry_copy_pathname_l(entry, as->s,
  1568. archive_strlen(as), sconv) != 0) {
  1569. err = set_conversion_failed_error(a, sconv, "Pathname");
  1570. if (err == ARCHIVE_FATAL)
  1571. return (err);
  1572. /* Use a converted an original name. */
  1573. archive_entry_copy_pathname(entry, as->s);
  1574. }
  1575. }
  1576. if (archive_strlen(&(tar->entry_uname)) > 0) {
  1577. if (archive_entry_copy_uname_l(entry, tar->entry_uname.s,
  1578. archive_strlen(&(tar->entry_uname)), sconv) != 0) {
  1579. err = set_conversion_failed_error(a, sconv, "Uname");
  1580. if (err == ARCHIVE_FATAL)
  1581. return (err);
  1582. /* Use a converted an original name. */
  1583. archive_entry_copy_uname(entry, tar->entry_uname.s);
  1584. }
  1585. }
  1586. return (err);
  1587. }
  1588. static int
  1589. pax_attribute_xattr(struct archive_entry *entry,
  1590. const char *name, const char *value)
  1591. {
  1592. char *name_decoded;
  1593. void *value_decoded;
  1594. size_t value_len;
  1595. if (strlen(name) < 18 || (memcmp(name, "LIBARCHIVE.xattr.", 17)) != 0)
  1596. return 3;
  1597. name += 17;
  1598. /* URL-decode name */
  1599. name_decoded = url_decode(name);
  1600. if (name_decoded == NULL)
  1601. return 2;
  1602. /* Base-64 decode value */
  1603. value_decoded = base64_decode(value, strlen(value), &value_len);
  1604. if (value_decoded == NULL) {
  1605. free(name_decoded);
  1606. return 1;
  1607. }
  1608. archive_entry_xattr_add_entry(entry, name_decoded,
  1609. value_decoded, value_len);
  1610. free(name_decoded);
  1611. free(value_decoded);
  1612. return 0;
  1613. }
  1614. static int
  1615. pax_attribute_schily_xattr(struct archive_entry *entry,
  1616. const char *name, const char *value, size_t value_length)
  1617. {
  1618. if (strlen(name) < 14 || (memcmp(name, "SCHILY.xattr.", 13)) != 0)
  1619. return 1;
  1620. name += 13;
  1621. archive_entry_xattr_add_entry(entry, name, value, value_length);
  1622. return 0;
  1623. }
  1624. static int
  1625. pax_attribute_acl(struct archive_read *a, struct tar *tar,
  1626. struct archive_entry *entry, const char *value, int type)
  1627. {
  1628. int r;
  1629. const char* errstr;
  1630. switch (type) {
  1631. case ARCHIVE_ENTRY_ACL_TYPE_ACCESS:
  1632. errstr = "SCHILY.acl.access";
  1633. break;
  1634. case ARCHIVE_ENTRY_ACL_TYPE_DEFAULT:
  1635. errstr = "SCHILY.acl.default";
  1636. break;
  1637. case ARCHIVE_ENTRY_ACL_TYPE_NFS4:
  1638. errstr = "SCHILY.acl.ace";
  1639. break;
  1640. default:
  1641. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  1642. "Unknown ACL type: %d", type);
  1643. return(ARCHIVE_FATAL);
  1644. }
  1645. if (tar->sconv_acl == NULL) {
  1646. tar->sconv_acl =
  1647. archive_string_conversion_from_charset(
  1648. &(a->archive), "UTF-8", 1);
  1649. if (tar->sconv_acl == NULL)
  1650. return (ARCHIVE_FATAL);
  1651. }
  1652. r = archive_acl_from_text_l(archive_entry_acl(entry), value, type,
  1653. tar->sconv_acl);
  1654. if (r != ARCHIVE_OK) {
  1655. if (r == ARCHIVE_FATAL) {
  1656. archive_set_error(&a->archive, ENOMEM,
  1657. "%s %s", "Can't allocate memory for ",
  1658. errstr);
  1659. return (r);
  1660. }
  1661. archive_set_error(&a->archive,
  1662. ARCHIVE_ERRNO_MISC, "%s %s", "Parse error: ", errstr);
  1663. }
  1664. return (r);
  1665. }
  1666. /*
  1667. * Parse a single key=value attribute. key/value pointers are
  1668. * assumed to point into reasonably long-lived storage.
  1669. *
  1670. * Note that POSIX reserves all-lowercase keywords. Vendor-specific
  1671. * extensions should always have keywords of the form "VENDOR.attribute"
  1672. * In particular, it's quite feasible to support many different
  1673. * vendor extensions here. I'm using "LIBARCHIVE" for extensions
  1674. * unique to this library.
  1675. *
  1676. * Investigate other vendor-specific extensions and see if
  1677. * any of them look useful.
  1678. */
  1679. static int
  1680. pax_attribute(struct archive_read *a, struct tar *tar,
  1681. struct archive_entry *entry, const char *key, const char *value, size_t value_length)
  1682. {
  1683. int64_t s;
  1684. long n;
  1685. int err = ARCHIVE_OK, r;
  1686. if (value == NULL)
  1687. value = ""; /* Disable compiler warning; do not pass
  1688. * NULL pointer to strlen(). */
  1689. switch (key[0]) {
  1690. case 'G':
  1691. /* Reject GNU.sparse.* headers on non-regular files. */
  1692. if (strncmp(key, "GNU.sparse", 10) == 0 &&
  1693. !tar->sparse_allowed) {
  1694. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  1695. "Non-regular file cannot be sparse");
  1696. return (ARCHIVE_FATAL);
  1697. }
  1698. /* GNU "0.0" sparse pax format. */
  1699. if (strcmp(key, "GNU.sparse.numblocks") == 0) {
  1700. tar->sparse_offset = -1;
  1701. tar->sparse_numbytes = -1;
  1702. tar->sparse_gnu_major = 0;
  1703. tar->sparse_gnu_minor = 0;
  1704. }
  1705. if (strcmp(key, "GNU.sparse.offset") == 0) {
  1706. tar->sparse_offset = tar_atol10(value, strlen(value));
  1707. if (tar->sparse_numbytes != -1) {
  1708. if (gnu_add_sparse_entry(a, tar,
  1709. tar->sparse_offset, tar->sparse_numbytes)
  1710. != ARCHIVE_OK)
  1711. return (ARCHIVE_FATAL);
  1712. tar->sparse_offset = -1;
  1713. tar->sparse_numbytes = -1;
  1714. }
  1715. }
  1716. if (strcmp(key, "GNU.sparse.numbytes") == 0) {
  1717. tar->sparse_numbytes = tar_atol10(value, strlen(value));
  1718. if (tar->sparse_numbytes != -1) {
  1719. if (gnu_add_sparse_entry(a, tar,
  1720. tar->sparse_offset, tar->sparse_numbytes)
  1721. != ARCHIVE_OK)
  1722. return (ARCHIVE_FATAL);
  1723. tar->sparse_offset = -1;
  1724. tar->sparse_numbytes = -1;
  1725. }
  1726. }
  1727. if (strcmp(key, "GNU.sparse.size") == 0) {
  1728. tar->realsize = tar_atol10(value, strlen(value));
  1729. archive_entry_set_size(entry, tar->realsize);
  1730. tar->realsize_override = 1;
  1731. }
  1732. /* GNU "0.1" sparse pax format. */
  1733. if (strcmp(key, "GNU.sparse.map") == 0) {
  1734. tar->sparse_gnu_major = 0;
  1735. tar->sparse_gnu_minor = 1;
  1736. if (gnu_sparse_01_parse(a, tar, value) != ARCHIVE_OK)
  1737. return (ARCHIVE_WARN);
  1738. }
  1739. /* GNU "1.0" sparse pax format */
  1740. if (strcmp(key, "GNU.sparse.major") == 0) {
  1741. tar->sparse_gnu_major = (int)tar_atol10(value, strlen(value));
  1742. tar->sparse_gnu_pending = 1;
  1743. }
  1744. if (strcmp(key, "GNU.sparse.minor") == 0) {
  1745. tar->sparse_gnu_minor = (int)tar_atol10(value, strlen(value));
  1746. tar->sparse_gnu_pending = 1;
  1747. }
  1748. if (strcmp(key, "GNU.sparse.name") == 0) {
  1749. /*
  1750. * The real filename; when storing sparse
  1751. * files, GNU tar puts a synthesized name into
  1752. * the regular 'path' attribute in an attempt
  1753. * to limit confusion. ;-)
  1754. */
  1755. archive_strcpy(&(tar->entry_pathname_override), value);
  1756. }
  1757. if (strcmp(key, "GNU.sparse.realsize") == 0) {
  1758. tar->realsize = tar_atol10(value, strlen(value));
  1759. archive_entry_set_size(entry, tar->realsize);
  1760. tar->realsize_override = 1;
  1761. }
  1762. break;
  1763. case 'L':
  1764. /* Our extensions */
  1765. /* TODO: Handle arbitrary extended attributes... */
  1766. /*
  1767. if (strcmp(key, "LIBARCHIVE.xxxxxxx") == 0)
  1768. archive_entry_set_xxxxxx(entry, value);
  1769. */
  1770. if (strcmp(key, "LIBARCHIVE.creationtime") == 0) {
  1771. pax_time(value, &s, &n);
  1772. archive_entry_set_birthtime(entry, s, n);
  1773. }
  1774. if (memcmp(key, "LIBARCHIVE.xattr.", 17) == 0)
  1775. pax_attribute_xattr(entry, key, value);
  1776. break;
  1777. case 'S':
  1778. /* We support some keys used by the "star" archiver */
  1779. if (strcmp(key, "SCHILY.acl.access") == 0) {
  1780. r = pax_attribute_acl(a, tar, entry, value,
  1781. ARCHIVE_ENTRY_ACL_TYPE_ACCESS);
  1782. if (r == ARCHIVE_FATAL)
  1783. return (r);
  1784. } else if (strcmp(key, "SCHILY.acl.default") == 0) {
  1785. r = pax_attribute_acl(a, tar, entry, value,
  1786. ARCHIVE_ENTRY_ACL_TYPE_DEFAULT);
  1787. if (r == ARCHIVE_FATAL)
  1788. return (r);
  1789. } else if (strcmp(key, "SCHILY.acl.ace") == 0) {
  1790. r = pax_attribute_acl(a, tar, entry, value,
  1791. ARCHIVE_ENTRY_ACL_TYPE_NFS4);
  1792. if (r == ARCHIVE_FATAL)
  1793. return (r);
  1794. } else if (strcmp(key, "SCHILY.devmajor") == 0) {
  1795. archive_entry_set_rdevmajor(entry,
  1796. (dev_t)tar_atol10(value, strlen(value)));
  1797. } else if (strcmp(key, "SCHILY.devminor") == 0) {
  1798. archive_entry_set_rdevminor(entry,
  1799. (dev_t)tar_atol10(value, strlen(value)));
  1800. } else if (strcmp(key, "SCHILY.fflags") == 0) {
  1801. archive_entry_copy_fflags_text(entry, value);
  1802. } else if (strcmp(key, "SCHILY.dev") == 0) {
  1803. archive_entry_set_dev(entry,
  1804. (dev_t)tar_atol10(value, strlen(value)));
  1805. } else if (strcmp(key, "SCHILY.ino") == 0) {
  1806. archive_entry_set_ino(entry,
  1807. tar_atol10(value, strlen(value)));
  1808. } else if (strcmp(key, "SCHILY.nlink") == 0) {
  1809. archive_entry_set_nlink(entry, (unsigned)
  1810. tar_atol10(value, strlen(value)));
  1811. } else if (strcmp(key, "SCHILY.realsize") == 0) {
  1812. tar->realsize = tar_atol10(value, strlen(value));
  1813. tar->realsize_override = 1;
  1814. archive_entry_set_size(entry, tar->realsize);
  1815. } else if (strncmp(key, "SCHILY.xattr.", 13) == 0) {
  1816. pax_attribute_schily_xattr(entry, key, value,
  1817. value_length);
  1818. } else if (strcmp(key, "SUN.holesdata") == 0) {
  1819. /* A Solaris extension for sparse. */
  1820. r = solaris_sparse_parse(a, tar, entry, value);
  1821. if (r < err) {
  1822. if (r == ARCHIVE_FATAL)
  1823. return (r);
  1824. err = r;
  1825. archive_set_error(&a->archive,
  1826. ARCHIVE_ERRNO_MISC,
  1827. "Parse error: SUN.holesdata");
  1828. }
  1829. }
  1830. break;
  1831. case 'a':
  1832. if (strcmp(key, "atime") == 0) {
  1833. pax_time(value, &s, &n);
  1834. archive_entry_set_atime(entry, s, n);
  1835. }
  1836. break;
  1837. case 'c':
  1838. if (strcmp(key, "ctime") == 0) {
  1839. pax_time(value, &s, &n);
  1840. archive_entry_set_ctime(entry, s, n);
  1841. } else if (strcmp(key, "charset") == 0) {
  1842. /* TODO: Publish charset information in entry. */
  1843. } else if (strcmp(key, "comment") == 0) {
  1844. /* TODO: Publish comment in entry. */
  1845. }
  1846. break;
  1847. case 'g':
  1848. if (strcmp(key, "gid") == 0) {
  1849. archive_entry_set_gid(entry,
  1850. tar_atol10(value, strlen(value)));
  1851. } else if (strcmp(key, "gname") == 0) {
  1852. archive_strcpy(&(tar->entry_gname), value);
  1853. }
  1854. break;
  1855. case 'h':
  1856. if (strcmp(key, "hdrcharset") == 0) {
  1857. if (strcmp(value, "BINARY") == 0)
  1858. /* Binary mode. */
  1859. tar->pax_hdrcharset_binary = 1;
  1860. else if (strcmp(value, "ISO-IR 10646 2000 UTF-8") == 0)
  1861. tar->pax_hdrcharset_binary = 0;
  1862. }
  1863. break;
  1864. case 'l':
  1865. /* pax interchange doesn't distinguish hardlink vs. symlink. */
  1866. if (strcmp(key, "linkpath") == 0) {
  1867. archive_strcpy(&(tar->entry_linkpath), value);
  1868. }
  1869. break;
  1870. case 'm':
  1871. if (strcmp(key, "mtime") == 0) {
  1872. pax_time(value, &s, &n);
  1873. archive_entry_set_mtime(entry, s, n);
  1874. }
  1875. break;
  1876. case 'p':
  1877. if (strcmp(key, "path") == 0) {
  1878. archive_strcpy(&(tar->entry_pathname), value);
  1879. }
  1880. break;
  1881. case 'r':
  1882. /* POSIX has reserved 'realtime.*' */
  1883. break;
  1884. case 's':
  1885. /* POSIX has reserved 'security.*' */
  1886. /* Someday: if (strcmp(key, "security.acl") == 0) { ... } */
  1887. if (strcmp(key, "size") == 0) {
  1888. /* "size" is the size of the data in the entry. */
  1889. tar->entry_bytes_remaining
  1890. = tar_atol10(value, strlen(value));
  1891. /*
  1892. * The "size" pax header keyword always overrides the
  1893. * "size" field in the tar header.
  1894. * GNU.sparse.realsize, GNU.sparse.size and
  1895. * SCHILY.realsize override this value.
  1896. */
  1897. if (!tar->realsize_override) {
  1898. archive_entry_set_size(entry,
  1899. tar->entry_bytes_remaining);
  1900. tar->realsize
  1901. = tar->entry_bytes_remaining;
  1902. }
  1903. }
  1904. break;
  1905. case 'u':
  1906. if (strcmp(key, "uid") == 0) {
  1907. archive_entry_set_uid(entry,
  1908. tar_atol10(value, strlen(value)));
  1909. } else if (strcmp(key, "uname") == 0) {
  1910. archive_strcpy(&(tar->entry_uname), value);
  1911. }
  1912. break;
  1913. }
  1914. return (err);
  1915. }
  1916. /*
  1917. * parse a decimal time value, which may include a fractional portion
  1918. */
  1919. static void
  1920. pax_time(const char *p, int64_t *ps, long *pn)
  1921. {
  1922. char digit;
  1923. int64_t s;
  1924. unsigned long l;
  1925. int sign;
  1926. int64_t limit, last_digit_limit;
  1927. limit = INT64_MAX / 10;
  1928. last_digit_limit = INT64_MAX % 10;
  1929. s = 0;
  1930. sign = 1;
  1931. if (*p == '-') {
  1932. sign = -1;
  1933. p++;
  1934. }
  1935. while (*p >= '0' && *p <= '9') {
  1936. digit = *p - '0';
  1937. if (s > limit ||
  1938. (s == limit && digit > last_digit_limit)) {
  1939. s = INT64_MAX;
  1940. break;
  1941. }
  1942. s = (s * 10) + digit;
  1943. ++p;
  1944. }
  1945. *ps = s * sign;
  1946. /* Calculate nanoseconds. */
  1947. *pn = 0;
  1948. if (*p != '.')
  1949. return;
  1950. l = 100000000UL;
  1951. do {
  1952. ++p;
  1953. if (*p >= '0' && *p <= '9')
  1954. *pn += (*p - '0') * l;
  1955. else
  1956. break;
  1957. } while (l /= 10);
  1958. }
  1959. /*
  1960. * Parse GNU tar header
  1961. */
  1962. static int
  1963. header_gnutar(struct archive_read *a, struct tar *tar,
  1964. struct archive_entry *entry, const void *h, size_t *unconsumed)
  1965. {
  1966. const struct archive_entry_header_gnutar *header;
  1967. int64_t t;
  1968. int err = ARCHIVE_OK;
  1969. /*
  1970. * GNU header is like POSIX ustar, except 'prefix' is
  1971. * replaced with some other fields. This also means the
  1972. * filename is stored as in old-style archives.
  1973. */
  1974. /* Grab fields common to all tar variants. */
  1975. err = header_common(a, tar, entry, h);
  1976. if (err == ARCHIVE_FATAL)
  1977. return (err);
  1978. /* Copy filename over (to ensure null termination). */
  1979. header = (const struct archive_entry_header_gnutar *)h;
  1980. if (archive_entry_copy_pathname_l(entry,
  1981. header->name, sizeof(header->name), tar->sconv) != 0) {
  1982. err = set_conversion_failed_error(a, tar->sconv, "Pathname");
  1983. if (err == ARCHIVE_FATAL)
  1984. return (err);
  1985. }
  1986. /* Fields common to ustar and GNU */
  1987. /* XXX Can the following be factored out since it's common
  1988. * to ustar and gnu tar? Is it okay to move it down into
  1989. * header_common, perhaps? */
  1990. if (archive_entry_copy_uname_l(entry,
  1991. header->uname, sizeof(header->uname), tar->sconv) != 0) {
  1992. err = set_conversion_failed_error(a, tar->sconv, "Uname");
  1993. if (err == ARCHIVE_FATAL)
  1994. return (err);
  1995. }
  1996. if (archive_entry_copy_gname_l(entry,
  1997. header->gname, sizeof(header->gname), tar->sconv) != 0) {
  1998. err = set_conversion_failed_error(a, tar->sconv, "Gname");
  1999. if (err == ARCHIVE_FATAL)
  2000. return (err);
  2001. }
  2002. /* Parse out device numbers only for char and block specials */
  2003. if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
  2004. archive_entry_set_rdevmajor(entry, (dev_t)
  2005. tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
  2006. archive_entry_set_rdevminor(entry, (dev_t)
  2007. tar_atol(header->rdevminor, sizeof(header->rdevminor)));
  2008. } else
  2009. archive_entry_set_rdev(entry, 0);
  2010. tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
  2011. /* Grab GNU-specific fields. */
  2012. t = tar_atol(header->atime, sizeof(header->atime));
  2013. if (t > 0)
  2014. archive_entry_set_atime(entry, t, 0);
  2015. t = tar_atol(header->ctime, sizeof(header->ctime));
  2016. if (t > 0)
  2017. archive_entry_set_ctime(entry, t, 0);
  2018. if (header->realsize[0] != 0) {
  2019. tar->realsize
  2020. = tar_atol(header->realsize, sizeof(header->realsize));
  2021. archive_entry_set_size(entry, tar->realsize);
  2022. tar->realsize_override = 1;
  2023. }
  2024. if (header->sparse[0].offset[0] != 0) {
  2025. if (gnu_sparse_old_read(a, tar, header, unconsumed)
  2026. != ARCHIVE_OK)
  2027. return (ARCHIVE_FATAL);
  2028. } else {
  2029. if (header->isextended[0] != 0) {
  2030. /* XXX WTF? XXX */
  2031. }
  2032. }
  2033. return (err);
  2034. }
  2035. static int
  2036. gnu_add_sparse_entry(struct archive_read *a, struct tar *tar,
  2037. int64_t offset, int64_t remaining)
  2038. {
  2039. struct sparse_block *p;
  2040. p = (struct sparse_block *)calloc(1, sizeof(*p));
  2041. if (p == NULL) {
  2042. archive_set_error(&a->archive, ENOMEM, "Out of memory");
  2043. return (ARCHIVE_FATAL);
  2044. }
  2045. if (tar->sparse_last != NULL)
  2046. tar->sparse_last->next = p;
  2047. else
  2048. tar->sparse_list = p;
  2049. tar->sparse_last = p;
  2050. if (remaining < 0 || offset < 0) {
  2051. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed sparse map data");
  2052. return (ARCHIVE_FATAL);
  2053. }
  2054. p->offset = offset;
  2055. p->remaining = remaining;
  2056. return (ARCHIVE_OK);
  2057. }
  2058. static void
  2059. gnu_clear_sparse_list(struct tar *tar)
  2060. {
  2061. struct sparse_block *p;
  2062. while (tar->sparse_list != NULL) {
  2063. p = tar->sparse_list;
  2064. tar->sparse_list = p->next;
  2065. free(p);
  2066. }
  2067. tar->sparse_last = NULL;
  2068. }
  2069. /*
  2070. * GNU tar old-format sparse data.
  2071. *
  2072. * GNU old-format sparse data is stored in a fixed-field
  2073. * format. Offset/size values are 11-byte octal fields (same
  2074. * format as 'size' field in ustart header). These are
  2075. * stored in the header, allocating subsequent header blocks
  2076. * as needed. Extending the header in this way is a pretty
  2077. * severe POSIX violation; this design has earned GNU tar a
  2078. * lot of criticism.
  2079. */
  2080. static int
  2081. gnu_sparse_old_read(struct archive_read *a, struct tar *tar,
  2082. const struct archive_entry_header_gnutar *header, size_t *unconsumed)
  2083. {
  2084. ssize_t bytes_read;
  2085. const void *data;
  2086. struct extended {
  2087. struct gnu_sparse sparse[21];
  2088. char isextended[1];
  2089. char padding[7];
  2090. };
  2091. const struct extended *ext;
  2092. if (gnu_sparse_old_parse(a, tar, header->sparse, 4) != ARCHIVE_OK)
  2093. return (ARCHIVE_FATAL);
  2094. if (header->isextended[0] == 0)
  2095. return (ARCHIVE_OK);
  2096. do {
  2097. tar_flush_unconsumed(a, unconsumed);
  2098. data = __archive_read_ahead(a, 512, &bytes_read);
  2099. if (bytes_read < 0)
  2100. return (ARCHIVE_FATAL);
  2101. if (bytes_read < 512) {
  2102. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  2103. "Truncated tar archive "
  2104. "detected while reading sparse file data");
  2105. return (ARCHIVE_FATAL);
  2106. }
  2107. *unconsumed = 512;
  2108. ext = (const struct extended *)data;
  2109. if (gnu_sparse_old_parse(a, tar, ext->sparse, 21) != ARCHIVE_OK)
  2110. return (ARCHIVE_FATAL);
  2111. } while (ext->isextended[0] != 0);
  2112. if (tar->sparse_list != NULL)
  2113. tar->entry_offset = tar->sparse_list->offset;
  2114. return (ARCHIVE_OK);
  2115. }
  2116. static int
  2117. gnu_sparse_old_parse(struct archive_read *a, struct tar *tar,
  2118. const struct gnu_sparse *sparse, int length)
  2119. {
  2120. while (length > 0 && sparse->offset[0] != 0) {
  2121. if (gnu_add_sparse_entry(a, tar,
  2122. tar_atol(sparse->offset, sizeof(sparse->offset)),
  2123. tar_atol(sparse->numbytes, sizeof(sparse->numbytes)))
  2124. != ARCHIVE_OK)
  2125. return (ARCHIVE_FATAL);
  2126. sparse++;
  2127. length--;
  2128. }
  2129. return (ARCHIVE_OK);
  2130. }
  2131. /*
  2132. * GNU tar sparse format 0.0
  2133. *
  2134. * Beginning with GNU tar 1.15, sparse files are stored using
  2135. * information in the pax extended header. The GNU tar maintainers
  2136. * have gone through a number of variations in the process of working
  2137. * out this scheme; fortunately, they're all numbered.
  2138. *
  2139. * Sparse format 0.0 uses attribute GNU.sparse.numblocks to store the
  2140. * number of blocks, and GNU.sparse.offset/GNU.sparse.numbytes to
  2141. * store offset/size for each block. The repeated instances of these
  2142. * latter fields violate the pax specification (which frowns on
  2143. * duplicate keys), so this format was quickly replaced.
  2144. */
  2145. /*
  2146. * GNU tar sparse format 0.1
  2147. *
  2148. * This version replaced the offset/numbytes attributes with
  2149. * a single "map" attribute that stored a list of integers. This
  2150. * format had two problems: First, the "map" attribute could be very
  2151. * long, which caused problems for some implementations. More
  2152. * importantly, the sparse data was lost when extracted by archivers
  2153. * that didn't recognize this extension.
  2154. */
  2155. static int
  2156. gnu_sparse_01_parse(struct archive_read *a, struct tar *tar, const char *p)
  2157. {
  2158. const char *e;
  2159. int64_t offset = -1, size = -1;
  2160. for (;;) {
  2161. e = p;
  2162. while (*e != '\0' && *e != ',') {
  2163. if (*e < '0' || *e > '9')
  2164. return (ARCHIVE_WARN);
  2165. e++;
  2166. }
  2167. if (offset < 0) {
  2168. offset = tar_atol10(p, e - p);
  2169. if (offset < 0)
  2170. return (ARCHIVE_WARN);
  2171. } else {
  2172. size = tar_atol10(p, e - p);
  2173. if (size < 0)
  2174. return (ARCHIVE_WARN);
  2175. if (gnu_add_sparse_entry(a, tar, offset, size)
  2176. != ARCHIVE_OK)
  2177. return (ARCHIVE_FATAL);
  2178. offset = -1;
  2179. }
  2180. if (*e == '\0')
  2181. return (ARCHIVE_OK);
  2182. p = e + 1;
  2183. }
  2184. }
  2185. /*
  2186. * GNU tar sparse format 1.0
  2187. *
  2188. * The idea: The offset/size data is stored as a series of base-10
  2189. * ASCII numbers prepended to the file data, so that dearchivers that
  2190. * don't support this format will extract the block map along with the
  2191. * data and a separate post-process can restore the sparseness.
  2192. *
  2193. * Unfortunately, GNU tar 1.16 had a bug that added unnecessary
  2194. * padding to the body of the file when using this format. GNU tar
  2195. * 1.17 corrected this bug without bumping the version number, so
  2196. * it's not possible to support both variants. This code supports
  2197. * the later variant at the expense of not supporting the former.
  2198. *
  2199. * This variant also replaced GNU.sparse.size with GNU.sparse.realsize
  2200. * and introduced the GNU.sparse.major/GNU.sparse.minor attributes.
  2201. */
  2202. /*
  2203. * Read the next line from the input, and parse it as a decimal
  2204. * integer followed by '\n'. Returns positive integer value or
  2205. * negative on error.
  2206. */
  2207. static int64_t
  2208. gnu_sparse_10_atol(struct archive_read *a, struct tar *tar,
  2209. int64_t *remaining, size_t *unconsumed)
  2210. {
  2211. int64_t l, limit, last_digit_limit;
  2212. const char *p;
  2213. ssize_t bytes_read;
  2214. int base, digit;
  2215. base = 10;
  2216. limit = INT64_MAX / base;
  2217. last_digit_limit = INT64_MAX % base;
  2218. /*
  2219. * Skip any lines starting with '#'; GNU tar specs
  2220. * don't require this, but they should.
  2221. */
  2222. do {
  2223. bytes_read = readline(a, tar, &p,
  2224. (ssize_t)tar_min(*remaining, 100), unconsumed);
  2225. if (bytes_read <= 0)
  2226. return (ARCHIVE_FATAL);
  2227. *remaining -= bytes_read;
  2228. } while (p[0] == '#');
  2229. l = 0;
  2230. while (bytes_read > 0) {
  2231. if (*p == '\n')
  2232. return (l);
  2233. if (*p < '0' || *p >= '0' + base)
  2234. return (ARCHIVE_WARN);
  2235. digit = *p - '0';
  2236. if (l > limit || (l == limit && digit > last_digit_limit))
  2237. l = INT64_MAX; /* Truncate on overflow. */
  2238. else
  2239. l = (l * base) + digit;
  2240. p++;
  2241. bytes_read--;
  2242. }
  2243. /* TODO: Error message. */
  2244. return (ARCHIVE_WARN);
  2245. }
  2246. /*
  2247. * Returns length (in bytes) of the sparse data description
  2248. * that was read.
  2249. */
  2250. static ssize_t
  2251. gnu_sparse_10_read(struct archive_read *a, struct tar *tar, size_t *unconsumed)
  2252. {
  2253. ssize_t bytes_read;
  2254. int entries;
  2255. int64_t offset, size, to_skip, remaining;
  2256. /* Clear out the existing sparse list. */
  2257. gnu_clear_sparse_list(tar);
  2258. remaining = tar->entry_bytes_remaining;
  2259. /* Parse entries. */
  2260. entries = (int)gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
  2261. if (entries < 0)
  2262. return (ARCHIVE_FATAL);
  2263. /* Parse the individual entries. */
  2264. while (entries-- > 0) {
  2265. /* Parse offset/size */
  2266. offset = gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
  2267. if (offset < 0)
  2268. return (ARCHIVE_FATAL);
  2269. size = gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
  2270. if (size < 0)
  2271. return (ARCHIVE_FATAL);
  2272. /* Add a new sparse entry. */
  2273. if (gnu_add_sparse_entry(a, tar, offset, size) != ARCHIVE_OK)
  2274. return (ARCHIVE_FATAL);
  2275. }
  2276. /* Skip rest of block... */
  2277. tar_flush_unconsumed(a, unconsumed);
  2278. bytes_read = (ssize_t)(tar->entry_bytes_remaining - remaining);
  2279. to_skip = 0x1ff & -bytes_read;
  2280. /* Fail if tar->entry_bytes_remaing would get negative */
  2281. if (to_skip > remaining)
  2282. return (ARCHIVE_FATAL);
  2283. if (to_skip != __archive_read_consume(a, to_skip))
  2284. return (ARCHIVE_FATAL);
  2285. return ((ssize_t)(bytes_read + to_skip));
  2286. }
  2287. /*
  2288. * Solaris pax extension for a sparse file. This is recorded with the
  2289. * data and hole pairs. The way recording sparse information by Solaris'
  2290. * pax simply indicates where data and sparse are, so the stored contents
  2291. * consist of both data and hole.
  2292. */
  2293. static int
  2294. solaris_sparse_parse(struct archive_read *a, struct tar *tar,
  2295. struct archive_entry *entry, const char *p)
  2296. {
  2297. const char *e;
  2298. int64_t start, end;
  2299. int hole = 1;
  2300. (void)entry; /* UNUSED */
  2301. end = 0;
  2302. if (*p == ' ')
  2303. p++;
  2304. else
  2305. return (ARCHIVE_WARN);
  2306. for (;;) {
  2307. e = p;
  2308. while (*e != '\0' && *e != ' ') {
  2309. if (*e < '0' || *e > '9')
  2310. return (ARCHIVE_WARN);
  2311. e++;
  2312. }
  2313. start = end;
  2314. end = tar_atol10(p, e - p);
  2315. if (end < 0)
  2316. return (ARCHIVE_WARN);
  2317. if (start < end) {
  2318. if (gnu_add_sparse_entry(a, tar, start,
  2319. end - start) != ARCHIVE_OK)
  2320. return (ARCHIVE_FATAL);
  2321. tar->sparse_last->hole = hole;
  2322. }
  2323. if (*e == '\0')
  2324. return (ARCHIVE_OK);
  2325. p = e + 1;
  2326. hole = hole == 0;
  2327. }
  2328. }
  2329. /*-
  2330. * Convert text->integer.
  2331. *
  2332. * Traditional tar formats (including POSIX) specify base-8 for
  2333. * all of the standard numeric fields. This is a significant limitation
  2334. * in practice:
  2335. * = file size is limited to 8GB
  2336. * = rdevmajor and rdevminor are limited to 21 bits
  2337. * = uid/gid are limited to 21 bits
  2338. *
  2339. * There are two workarounds for this:
  2340. * = pax extended headers, which use variable-length string fields
  2341. * = GNU tar and STAR both allow either base-8 or base-256 in
  2342. * most fields. The high bit is set to indicate base-256.
  2343. *
  2344. * On read, this implementation supports both extensions.
  2345. */
  2346. static int64_t
  2347. tar_atol(const char *p, size_t char_cnt)
  2348. {
  2349. /*
  2350. * Technically, GNU tar considers a field to be in base-256
  2351. * only if the first byte is 0xff or 0x80.
  2352. */
  2353. if (*p & 0x80)
  2354. return (tar_atol256(p, char_cnt));
  2355. return (tar_atol8(p, char_cnt));
  2356. }
  2357. /*
  2358. * Note that this implementation does not (and should not!) obey
  2359. * locale settings; you cannot simply substitute strtol here, since
  2360. * it does obey locale.
  2361. */
  2362. static int64_t
  2363. tar_atol_base_n(const char *p, size_t char_cnt, int base)
  2364. {
  2365. int64_t l, maxval, limit, last_digit_limit;
  2366. int digit, sign;
  2367. maxval = INT64_MAX;
  2368. limit = INT64_MAX / base;
  2369. last_digit_limit = INT64_MAX % base;
  2370. /* the pointer will not be dereferenced if char_cnt is zero
  2371. * due to the way the && operator is evaluated.
  2372. */
  2373. while (char_cnt != 0 && (*p == ' ' || *p == '\t')) {
  2374. p++;
  2375. char_cnt--;
  2376. }
  2377. sign = 1;
  2378. if (char_cnt != 0 && *p == '-') {
  2379. sign = -1;
  2380. p++;
  2381. char_cnt--;
  2382. maxval = INT64_MIN;
  2383. limit = -(INT64_MIN / base);
  2384. last_digit_limit = INT64_MIN % base;
  2385. }
  2386. l = 0;
  2387. if (char_cnt != 0) {
  2388. digit = *p - '0';
  2389. while (digit >= 0 && digit < base && char_cnt != 0) {
  2390. if (l>limit || (l == limit && digit > last_digit_limit)) {
  2391. return maxval; /* Truncate on overflow. */
  2392. }
  2393. l = (l * base) + digit;
  2394. digit = *++p - '0';
  2395. char_cnt--;
  2396. }
  2397. }
  2398. return (sign < 0) ? -l : l;
  2399. }
  2400. static int64_t
  2401. tar_atol8(const char *p, size_t char_cnt)
  2402. {
  2403. return tar_atol_base_n(p, char_cnt, 8);
  2404. }
  2405. static int64_t
  2406. tar_atol10(const char *p, size_t char_cnt)
  2407. {
  2408. return tar_atol_base_n(p, char_cnt, 10);
  2409. }
  2410. /*
  2411. * Parse a base-256 integer. This is just a variable-length
  2412. * twos-complement signed binary value in big-endian order, except
  2413. * that the high-order bit is ignored. The values here can be up to
  2414. * 12 bytes, so we need to be careful about overflowing 64-bit
  2415. * (8-byte) integers.
  2416. *
  2417. * This code unashamedly assumes that the local machine uses 8-bit
  2418. * bytes and twos-complement arithmetic.
  2419. */
  2420. static int64_t
  2421. tar_atol256(const char *_p, size_t char_cnt)
  2422. {
  2423. uint64_t l;
  2424. const unsigned char *p = (const unsigned char *)_p;
  2425. unsigned char c, neg;
  2426. /* Extend 7-bit 2s-comp to 8-bit 2s-comp, decide sign. */
  2427. c = *p;
  2428. if (c & 0x40) {
  2429. neg = 0xff;
  2430. c |= 0x80;
  2431. l = ~ARCHIVE_LITERAL_ULL(0);
  2432. } else {
  2433. neg = 0;
  2434. c &= 0x7f;
  2435. l = 0;
  2436. }
  2437. /* If more than 8 bytes, check that we can ignore
  2438. * high-order bits without overflow. */
  2439. while (char_cnt > sizeof(int64_t)) {
  2440. --char_cnt;
  2441. if (c != neg)
  2442. return neg ? INT64_MIN : INT64_MAX;
  2443. c = *++p;
  2444. }
  2445. /* c is first byte that fits; if sign mismatch, return overflow */
  2446. if ((c ^ neg) & 0x80) {
  2447. return neg ? INT64_MIN : INT64_MAX;
  2448. }
  2449. /* Accumulate remaining bytes. */
  2450. while (--char_cnt > 0) {
  2451. l = (l << 8) | c;
  2452. c = *++p;
  2453. }
  2454. l = (l << 8) | c;
  2455. /* Return signed twos-complement value. */
  2456. return (int64_t)(l);
  2457. }
  2458. /*
  2459. * Returns length of line (including trailing newline)
  2460. * or negative on error. 'start' argument is updated to
  2461. * point to first character of line. This avoids copying
  2462. * when possible.
  2463. */
  2464. static ssize_t
  2465. readline(struct archive_read *a, struct tar *tar, const char **start,
  2466. ssize_t limit, size_t *unconsumed)
  2467. {
  2468. ssize_t bytes_read;
  2469. ssize_t total_size = 0;
  2470. const void *t;
  2471. const char *s;
  2472. void *p;
  2473. tar_flush_unconsumed(a, unconsumed);
  2474. t = __archive_read_ahead(a, 1, &bytes_read);
  2475. if (bytes_read <= 0)
  2476. return (ARCHIVE_FATAL);
  2477. s = t; /* Start of line? */
  2478. p = memchr(t, '\n', bytes_read);
  2479. /* If we found '\n' in the read buffer, return pointer to that. */
  2480. if (p != NULL) {
  2481. bytes_read = 1 + ((const char *)p) - s;
  2482. if (bytes_read > limit) {
  2483. archive_set_error(&a->archive,
  2484. ARCHIVE_ERRNO_FILE_FORMAT,
  2485. "Line too long");
  2486. return (ARCHIVE_FATAL);
  2487. }
  2488. *unconsumed = bytes_read;
  2489. *start = s;
  2490. return (bytes_read);
  2491. }
  2492. *unconsumed = bytes_read;
  2493. /* Otherwise, we need to accumulate in a line buffer. */
  2494. for (;;) {
  2495. if (total_size + bytes_read > limit) {
  2496. archive_set_error(&a->archive,
  2497. ARCHIVE_ERRNO_FILE_FORMAT,
  2498. "Line too long");
  2499. return (ARCHIVE_FATAL);
  2500. }
  2501. if (archive_string_ensure(&tar->line, total_size + bytes_read) == NULL) {
  2502. archive_set_error(&a->archive, ENOMEM,
  2503. "Can't allocate working buffer");
  2504. return (ARCHIVE_FATAL);
  2505. }
  2506. memcpy(tar->line.s + total_size, t, bytes_read);
  2507. tar_flush_unconsumed(a, unconsumed);
  2508. total_size += bytes_read;
  2509. /* If we found '\n', clean up and return. */
  2510. if (p != NULL) {
  2511. *start = tar->line.s;
  2512. return (total_size);
  2513. }
  2514. /* Read some more. */
  2515. t = __archive_read_ahead(a, 1, &bytes_read);
  2516. if (bytes_read <= 0)
  2517. return (ARCHIVE_FATAL);
  2518. s = t; /* Start of line? */
  2519. p = memchr(t, '\n', bytes_read);
  2520. /* If we found '\n', trim the read. */
  2521. if (p != NULL) {
  2522. bytes_read = 1 + ((const char *)p) - s;
  2523. }
  2524. *unconsumed = bytes_read;
  2525. }
  2526. }
  2527. /*
  2528. * base64_decode - Base64 decode
  2529. *
  2530. * This accepts most variations of base-64 encoding, including:
  2531. * * with or without line breaks
  2532. * * with or without the final group padded with '=' or '_' characters
  2533. * (The most economical Base-64 variant does not pad the last group and
  2534. * omits line breaks; RFC1341 used for MIME requires both.)
  2535. */
  2536. static char *
  2537. base64_decode(const char *s, size_t len, size_t *out_len)
  2538. {
  2539. static const unsigned char digits[64] = {
  2540. 'A','B','C','D','E','F','G','H','I','J','K','L','M','N',
  2541. 'O','P','Q','R','S','T','U','V','W','X','Y','Z','a','b',
  2542. 'c','d','e','f','g','h','i','j','k','l','m','n','o','p',
  2543. 'q','r','s','t','u','v','w','x','y','z','0','1','2','3',
  2544. '4','5','6','7','8','9','+','/' };
  2545. static unsigned char decode_table[128];
  2546. char *out, *d;
  2547. const unsigned char *src = (const unsigned char *)s;
  2548. /* If the decode table is not yet initialized, prepare it. */
  2549. if (decode_table[digits[1]] != 1) {
  2550. unsigned i;
  2551. memset(decode_table, 0xff, sizeof(decode_table));
  2552. for (i = 0; i < sizeof(digits); i++)
  2553. decode_table[digits[i]] = i;
  2554. }
  2555. /* Allocate enough space to hold the entire output. */
  2556. /* Note that we may not use all of this... */
  2557. out = (char *)malloc(len - len / 4 + 1);
  2558. if (out == NULL) {
  2559. *out_len = 0;
  2560. return (NULL);
  2561. }
  2562. d = out;
  2563. while (len > 0) {
  2564. /* Collect the next group of (up to) four characters. */
  2565. int v = 0;
  2566. int group_size = 0;
  2567. while (group_size < 4 && len > 0) {
  2568. /* '=' or '_' padding indicates final group. */
  2569. if (*src == '=' || *src == '_') {
  2570. len = 0;
  2571. break;
  2572. }
  2573. /* Skip illegal characters (including line breaks) */
  2574. if (*src > 127 || *src < 32
  2575. || decode_table[*src] == 0xff) {
  2576. len--;
  2577. src++;
  2578. continue;
  2579. }
  2580. v <<= 6;
  2581. v |= decode_table[*src++];
  2582. len --;
  2583. group_size++;
  2584. }
  2585. /* Align a short group properly. */
  2586. v <<= 6 * (4 - group_size);
  2587. /* Unpack the group we just collected. */
  2588. switch (group_size) {
  2589. case 4: d[2] = v & 0xff;
  2590. /* FALLTHROUGH */
  2591. case 3: d[1] = (v >> 8) & 0xff;
  2592. /* FALLTHROUGH */
  2593. case 2: d[0] = (v >> 16) & 0xff;
  2594. break;
  2595. case 1: /* this is invalid! */
  2596. break;
  2597. }
  2598. d += group_size * 3 / 4;
  2599. }
  2600. *out_len = d - out;
  2601. return (out);
  2602. }
  2603. static char *
  2604. url_decode(const char *in)
  2605. {
  2606. char *out, *d;
  2607. const char *s;
  2608. out = (char *)malloc(strlen(in) + 1);
  2609. if (out == NULL)
  2610. return (NULL);
  2611. for (s = in, d = out; *s != '\0'; ) {
  2612. if (s[0] == '%' && s[1] != '\0' && s[2] != '\0') {
  2613. /* Try to convert % escape */
  2614. int digit1 = tohex(s[1]);
  2615. int digit2 = tohex(s[2]);
  2616. if (digit1 >= 0 && digit2 >= 0) {
  2617. /* Looks good, consume three chars */
  2618. s += 3;
  2619. /* Convert output */
  2620. *d++ = ((digit1 << 4) | digit2);
  2621. continue;
  2622. }
  2623. /* Else fall through and treat '%' as normal char */
  2624. }
  2625. *d++ = *s++;
  2626. }
  2627. *d = '\0';
  2628. return (out);
  2629. }
  2630. static int
  2631. tohex(int c)
  2632. {
  2633. if (c >= '0' && c <= '9')
  2634. return (c - '0');
  2635. else if (c >= 'A' && c <= 'F')
  2636. return (c - 'A' + 10);
  2637. else if (c >= 'a' && c <= 'f')
  2638. return (c - 'a' + 10);
  2639. else
  2640. return (-1);
  2641. }