archive_read_support_format_tar.c 83 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904
  1. /*-
  2. * Copyright (c) 2003-2007 Tim Kientzle
  3. * Copyright (c) 2011-2012 Michihiro NAKAJIMA
  4. * Copyright (c) 2016 Martin Matuska
  5. * All rights reserved.
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions
  9. * are met:
  10. * 1. Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in the
  14. * documentation and/or other materials provided with the distribution.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  17. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  18. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  19. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  20. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  21. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  22. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  23. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  25. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. #include "archive_platform.h"
  28. __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_tar.c 201161 2009-12-29 05:44:39Z kientzle $");
  29. #ifdef HAVE_ERRNO_H
  30. #include <errno.h>
  31. #endif
  32. #include <stddef.h>
  33. #ifdef HAVE_STDLIB_H
  34. #include <stdlib.h>
  35. #endif
  36. #ifdef HAVE_STRING_H
  37. #include <string.h>
  38. #endif
  39. #include "archive.h"
  40. #include "archive_acl_private.h" /* For ACL parsing routines. */
  41. #include "archive_entry.h"
  42. #include "archive_entry_locale.h"
  43. #include "archive_private.h"
  44. #include "archive_read_private.h"
  45. #define tar_min(a,b) ((a) < (b) ? (a) : (b))
  46. /*
  47. * Layout of POSIX 'ustar' tar header.
  48. */
  49. struct archive_entry_header_ustar {
  50. char name[100];
  51. char mode[8];
  52. char uid[8];
  53. char gid[8];
  54. char size[12];
  55. char mtime[12];
  56. char checksum[8];
  57. char typeflag[1];
  58. char linkname[100]; /* "old format" header ends here */
  59. char magic[6]; /* For POSIX: "ustar\0" */
  60. char version[2]; /* For POSIX: "00" */
  61. char uname[32];
  62. char gname[32];
  63. char rdevmajor[8];
  64. char rdevminor[8];
  65. char prefix[155];
  66. };
  67. /*
  68. * Structure of GNU tar header
  69. */
  70. struct gnu_sparse {
  71. char offset[12];
  72. char numbytes[12];
  73. };
  74. struct archive_entry_header_gnutar {
  75. char name[100];
  76. char mode[8];
  77. char uid[8];
  78. char gid[8];
  79. char size[12];
  80. char mtime[12];
  81. char checksum[8];
  82. char typeflag[1];
  83. char linkname[100];
  84. char magic[8]; /* "ustar \0" (note blank/blank/null at end) */
  85. char uname[32];
  86. char gname[32];
  87. char rdevmajor[8];
  88. char rdevminor[8];
  89. char atime[12];
  90. char ctime[12];
  91. char offset[12];
  92. char longnames[4];
  93. char unused[1];
  94. struct gnu_sparse sparse[4];
  95. char isextended[1];
  96. char realsize[12];
  97. /*
  98. * Old GNU format doesn't use POSIX 'prefix' field; they use
  99. * the 'L' (longname) entry instead.
  100. */
  101. };
  102. /*
  103. * Data specific to this format.
  104. */
  105. struct sparse_block {
  106. struct sparse_block *next;
  107. int64_t offset;
  108. int64_t remaining;
  109. int hole;
  110. };
  111. struct tar {
  112. struct archive_string acl_text;
  113. struct archive_string entry_pathname;
  114. /* For "GNU.sparse.name" and other similar path extensions. */
  115. struct archive_string entry_pathname_override;
  116. struct archive_string entry_linkpath;
  117. struct archive_string entry_uname;
  118. struct archive_string entry_gname;
  119. struct archive_string longlink;
  120. struct archive_string longname;
  121. struct archive_string pax_header;
  122. struct archive_string pax_global;
  123. struct archive_string line;
  124. int pax_hdrcharset_binary;
  125. int header_recursion_depth;
  126. int64_t entry_bytes_remaining;
  127. int64_t entry_offset;
  128. int64_t entry_padding;
  129. int64_t entry_bytes_unconsumed;
  130. int64_t realsize;
  131. int sparse_allowed;
  132. struct sparse_block *sparse_list;
  133. struct sparse_block *sparse_last;
  134. int64_t sparse_offset;
  135. int64_t sparse_numbytes;
  136. int sparse_gnu_major;
  137. int sparse_gnu_minor;
  138. char sparse_gnu_pending;
  139. struct archive_string localname;
  140. struct archive_string_conv *opt_sconv;
  141. struct archive_string_conv *sconv;
  142. struct archive_string_conv *sconv_acl;
  143. struct archive_string_conv *sconv_default;
  144. int init_default_conversion;
  145. int compat_2x;
  146. int process_mac_extensions;
  147. int read_concatenated_archives;
  148. int realsize_override;
  149. };
  150. static int archive_block_is_null(const char *p);
  151. static char *base64_decode(const char *, size_t, size_t *);
  152. static int gnu_add_sparse_entry(struct archive_read *, struct tar *,
  153. int64_t offset, int64_t remaining);
  154. static void gnu_clear_sparse_list(struct tar *);
  155. static int gnu_sparse_old_read(struct archive_read *, struct tar *,
  156. const struct archive_entry_header_gnutar *header, size_t *);
  157. static int gnu_sparse_old_parse(struct archive_read *, struct tar *,
  158. const struct gnu_sparse *sparse, int length);
  159. static int gnu_sparse_01_parse(struct archive_read *, struct tar *,
  160. const char *);
  161. static ssize_t gnu_sparse_10_read(struct archive_read *, struct tar *,
  162. size_t *);
  163. static int header_Solaris_ACL(struct archive_read *, struct tar *,
  164. struct archive_entry *, const void *, size_t *);
  165. static int header_common(struct archive_read *, struct tar *,
  166. struct archive_entry *, const void *);
  167. static int header_old_tar(struct archive_read *, struct tar *,
  168. struct archive_entry *, const void *);
  169. static int header_pax_extensions(struct archive_read *, struct tar *,
  170. struct archive_entry *, const void *, size_t *);
  171. static int header_pax_global(struct archive_read *, struct tar *,
  172. struct archive_entry *, const void *h, size_t *);
  173. static int header_longlink(struct archive_read *, struct tar *,
  174. struct archive_entry *, const void *h, size_t *);
  175. static int header_longname(struct archive_read *, struct tar *,
  176. struct archive_entry *, const void *h, size_t *);
  177. static int read_mac_metadata_blob(struct archive_read *, struct tar *,
  178. struct archive_entry *, const void *h, size_t *);
  179. static int header_volume(struct archive_read *, struct tar *,
  180. struct archive_entry *, const void *h, size_t *);
  181. static int header_ustar(struct archive_read *, struct tar *,
  182. struct archive_entry *, const void *h);
  183. static int header_gnutar(struct archive_read *, struct tar *,
  184. struct archive_entry *, const void *h, size_t *);
  185. static int archive_read_format_tar_bid(struct archive_read *, int);
  186. static int archive_read_format_tar_options(struct archive_read *,
  187. const char *, const char *);
  188. static int archive_read_format_tar_cleanup(struct archive_read *);
  189. static int archive_read_format_tar_read_data(struct archive_read *a,
  190. const void **buff, size_t *size, int64_t *offset);
  191. static int archive_read_format_tar_skip(struct archive_read *a);
  192. static int archive_read_format_tar_read_header(struct archive_read *,
  193. struct archive_entry *);
  194. static int checksum(struct archive_read *, const void *);
  195. static int pax_attribute(struct archive_read *, struct tar *,
  196. struct archive_entry *, const char *key, const char *value,
  197. size_t value_length);
  198. static int pax_attribute_acl(struct archive_read *, struct tar *,
  199. struct archive_entry *, const char *, int);
  200. static int pax_attribute_xattr(struct archive_entry *, const char *,
  201. const char *);
  202. static int pax_header(struct archive_read *, struct tar *,
  203. struct archive_entry *, struct archive_string *);
  204. static void pax_time(const char *, int64_t *sec, long *nanos);
  205. static ssize_t readline(struct archive_read *, struct tar *, const char **,
  206. ssize_t limit, size_t *);
  207. static int read_body_to_string(struct archive_read *, struct tar *,
  208. struct archive_string *, const void *h, size_t *);
  209. static int solaris_sparse_parse(struct archive_read *, struct tar *,
  210. struct archive_entry *, const char *);
  211. static int64_t tar_atol(const char *, size_t);
  212. static int64_t tar_atol10(const char *, size_t);
  213. static int64_t tar_atol256(const char *, size_t);
  214. static int64_t tar_atol8(const char *, size_t);
  215. static int tar_read_header(struct archive_read *, struct tar *,
  216. struct archive_entry *, size_t *);
  217. static int tohex(int c);
  218. static char *url_decode(const char *);
  219. static void tar_flush_unconsumed(struct archive_read *, size_t *);
  220. int
  221. archive_read_support_format_gnutar(struct archive *a)
  222. {
  223. archive_check_magic(a, ARCHIVE_READ_MAGIC,
  224. ARCHIVE_STATE_NEW, "archive_read_support_format_gnutar");
  225. return (archive_read_support_format_tar(a));
  226. }
  227. int
  228. archive_read_support_format_tar(struct archive *_a)
  229. {
  230. struct archive_read *a = (struct archive_read *)_a;
  231. struct tar *tar;
  232. int r;
  233. archive_check_magic(_a, ARCHIVE_READ_MAGIC,
  234. ARCHIVE_STATE_NEW, "archive_read_support_format_tar");
  235. tar = (struct tar *)calloc(1, sizeof(*tar));
  236. if (tar == NULL) {
  237. archive_set_error(&a->archive, ENOMEM,
  238. "Can't allocate tar data");
  239. return (ARCHIVE_FATAL);
  240. }
  241. #ifdef HAVE_COPYFILE_H
  242. /* Set this by default on Mac OS. */
  243. tar->process_mac_extensions = 1;
  244. #endif
  245. r = __archive_read_register_format(a, tar, "tar",
  246. archive_read_format_tar_bid,
  247. archive_read_format_tar_options,
  248. archive_read_format_tar_read_header,
  249. archive_read_format_tar_read_data,
  250. archive_read_format_tar_skip,
  251. NULL,
  252. archive_read_format_tar_cleanup,
  253. NULL,
  254. NULL);
  255. if (r != ARCHIVE_OK)
  256. free(tar);
  257. return (ARCHIVE_OK);
  258. }
  259. static int
  260. archive_read_format_tar_cleanup(struct archive_read *a)
  261. {
  262. struct tar *tar;
  263. tar = (struct tar *)(a->format->data);
  264. gnu_clear_sparse_list(tar);
  265. archive_string_free(&tar->acl_text);
  266. archive_string_free(&tar->entry_pathname);
  267. archive_string_free(&tar->entry_pathname_override);
  268. archive_string_free(&tar->entry_linkpath);
  269. archive_string_free(&tar->entry_uname);
  270. archive_string_free(&tar->entry_gname);
  271. archive_string_free(&tar->line);
  272. archive_string_free(&tar->pax_global);
  273. archive_string_free(&tar->pax_header);
  274. archive_string_free(&tar->longname);
  275. archive_string_free(&tar->longlink);
  276. archive_string_free(&tar->localname);
  277. free(tar);
  278. (a->format->data) = NULL;
  279. return (ARCHIVE_OK);
  280. }
  281. /*
  282. * Validate number field
  283. *
  284. * This has to be pretty lenient in order to accommodate the enormous
  285. * variety of tar writers in the world:
  286. * = POSIX (IEEE Std 1003.1-1988) ustar requires octal values with leading
  287. * zeros and allows fields to be terminated with space or null characters
  288. * = Many writers use different termination (in particular, libarchive
  289. * omits terminator bytes to squeeze one or two more digits)
  290. * = Many writers pad with space and omit leading zeros
  291. * = GNU tar and star write base-256 values if numbers are too
  292. * big to be represented in octal
  293. *
  294. * Examples of specific tar headers that we should support:
  295. * = Perl Archive::Tar terminates uid, gid, devminor and devmajor with two
  296. * null bytes, pads size with spaces and other numeric fields with zeroes
  297. * = plexus-archiver prior to 2.6.3 (before switching to commons-compress)
  298. * may have uid and gid fields filled with spaces without any octal digits
  299. * at all and pads all numeric fields with spaces
  300. *
  301. * This should tolerate all variants in use. It will reject a field
  302. * where the writer just left garbage after a trailing NUL.
  303. */
  304. static int
  305. validate_number_field(const char* p_field, size_t i_size)
  306. {
  307. unsigned char marker = (unsigned char)p_field[0];
  308. if (marker == 128 || marker == 255 || marker == 0) {
  309. /* Base-256 marker, there's nothing we can check. */
  310. return 1;
  311. } else {
  312. /* Must be octal */
  313. size_t i = 0;
  314. /* Skip any leading spaces */
  315. while (i < i_size && p_field[i] == ' ') {
  316. ++i;
  317. }
  318. /* Skip octal digits. */
  319. while (i < i_size && p_field[i] >= '0' && p_field[i] <= '7') {
  320. ++i;
  321. }
  322. /* Any remaining characters must be space or NUL padding. */
  323. while (i < i_size) {
  324. if (p_field[i] != ' ' && p_field[i] != 0) {
  325. return 0;
  326. }
  327. ++i;
  328. }
  329. return 1;
  330. }
  331. }
  332. static int
  333. archive_read_format_tar_bid(struct archive_read *a, int best_bid)
  334. {
  335. int bid;
  336. const char *h;
  337. const struct archive_entry_header_ustar *header;
  338. (void)best_bid; /* UNUSED */
  339. bid = 0;
  340. /* Now let's look at the actual header and see if it matches. */
  341. h = __archive_read_ahead(a, 512, NULL);
  342. if (h == NULL)
  343. return (-1);
  344. /* If it's an end-of-archive mark, we can handle it. */
  345. if (h[0] == 0 && archive_block_is_null(h)) {
  346. /*
  347. * Usually, I bid the number of bits verified, but
  348. * in this case, 4096 seems excessive so I picked 10 as
  349. * an arbitrary but reasonable-seeming value.
  350. */
  351. return (10);
  352. }
  353. /* If it's not an end-of-archive mark, it must have a valid checksum.*/
  354. if (!checksum(a, h))
  355. return (0);
  356. bid += 48; /* Checksum is usually 6 octal digits. */
  357. header = (const struct archive_entry_header_ustar *)h;
  358. /* Recognize POSIX formats. */
  359. if ((memcmp(header->magic, "ustar\0", 6) == 0)
  360. && (memcmp(header->version, "00", 2) == 0))
  361. bid += 56;
  362. /* Recognize GNU tar format. */
  363. if ((memcmp(header->magic, "ustar ", 6) == 0)
  364. && (memcmp(header->version, " \0", 2) == 0))
  365. bid += 56;
  366. /* Type flag must be null, digit or A-Z, a-z. */
  367. if (header->typeflag[0] != 0 &&
  368. !( header->typeflag[0] >= '0' && header->typeflag[0] <= '9') &&
  369. !( header->typeflag[0] >= 'A' && header->typeflag[0] <= 'Z') &&
  370. !( header->typeflag[0] >= 'a' && header->typeflag[0] <= 'z') )
  371. return (0);
  372. bid += 2; /* 6 bits of variation in an 8-bit field leaves 2 bits. */
  373. /*
  374. * Check format of mode/uid/gid/mtime/size/rdevmajor/rdevminor fields.
  375. */
  376. if (bid > 0 && (
  377. validate_number_field(header->mode, sizeof(header->mode)) == 0
  378. || validate_number_field(header->uid, sizeof(header->uid)) == 0
  379. || validate_number_field(header->gid, sizeof(header->gid)) == 0
  380. || validate_number_field(header->mtime, sizeof(header->mtime)) == 0
  381. || validate_number_field(header->size, sizeof(header->size)) == 0
  382. || validate_number_field(header->rdevmajor, sizeof(header->rdevmajor)) == 0
  383. || validate_number_field(header->rdevminor, sizeof(header->rdevminor)) == 0)) {
  384. bid = 0;
  385. }
  386. return (bid);
  387. }
  388. static int
  389. archive_read_format_tar_options(struct archive_read *a,
  390. const char *key, const char *val)
  391. {
  392. struct tar *tar;
  393. int ret = ARCHIVE_FAILED;
  394. tar = (struct tar *)(a->format->data);
  395. if (strcmp(key, "compat-2x") == 0) {
  396. /* Handle UTF-8 filenames as libarchive 2.x */
  397. tar->compat_2x = (val != NULL && val[0] != 0);
  398. tar->init_default_conversion = tar->compat_2x;
  399. return (ARCHIVE_OK);
  400. } else if (strcmp(key, "hdrcharset") == 0) {
  401. if (val == NULL || val[0] == 0)
  402. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  403. "tar: hdrcharset option needs a character-set name");
  404. else {
  405. tar->opt_sconv =
  406. archive_string_conversion_from_charset(
  407. &a->archive, val, 0);
  408. if (tar->opt_sconv != NULL)
  409. ret = ARCHIVE_OK;
  410. else
  411. ret = ARCHIVE_FATAL;
  412. }
  413. return (ret);
  414. } else if (strcmp(key, "mac-ext") == 0) {
  415. tar->process_mac_extensions = (val != NULL && val[0] != 0);
  416. return (ARCHIVE_OK);
  417. } else if (strcmp(key, "read_concatenated_archives") == 0) {
  418. tar->read_concatenated_archives = (val != NULL && val[0] != 0);
  419. return (ARCHIVE_OK);
  420. }
  421. /* Note: The "warn" return is just to inform the options
  422. * supervisor that we didn't handle it. It will generate
  423. * a suitable error if no one used this option. */
  424. return (ARCHIVE_WARN);
  425. }
  426. /* utility function- this exists to centralize the logic of tracking
  427. * how much unconsumed data we have floating around, and to consume
  428. * anything outstanding since we're going to do read_aheads
  429. */
  430. static void
  431. tar_flush_unconsumed(struct archive_read *a, size_t *unconsumed)
  432. {
  433. if (*unconsumed) {
  434. /*
  435. void *data = (void *)__archive_read_ahead(a, *unconsumed, NULL);
  436. * this block of code is to poison claimed unconsumed space, ensuring
  437. * things break if it is in use still.
  438. * currently it WILL break things, so enable it only for debugging this issue
  439. if (data) {
  440. memset(data, 0xff, *unconsumed);
  441. }
  442. */
  443. __archive_read_consume(a, *unconsumed);
  444. *unconsumed = 0;
  445. }
  446. }
  447. /*
  448. * The function invoked by archive_read_next_header(). This
  449. * just sets up a few things and then calls the internal
  450. * tar_read_header() function below.
  451. */
  452. static int
  453. archive_read_format_tar_read_header(struct archive_read *a,
  454. struct archive_entry *entry)
  455. {
  456. /*
  457. * When converting tar archives to cpio archives, it is
  458. * essential that each distinct file have a distinct inode
  459. * number. To simplify this, we keep a static count here to
  460. * assign fake dev/inode numbers to each tar entry. Note that
  461. * pax format archives may overwrite this with something more
  462. * useful.
  463. *
  464. * Ideally, we would track every file read from the archive so
  465. * that we could assign the same dev/ino pair to hardlinks,
  466. * but the memory required to store a complete lookup table is
  467. * probably not worthwhile just to support the relatively
  468. * obscure tar->cpio conversion case.
  469. */
  470. static int default_inode;
  471. static int default_dev;
  472. struct tar *tar;
  473. const char *p;
  474. const wchar_t *wp;
  475. int r;
  476. size_t l, unconsumed = 0;
  477. /* Assign default device/inode values. */
  478. archive_entry_set_dev(entry, 1 + default_dev); /* Don't use zero. */
  479. archive_entry_set_ino(entry, ++default_inode); /* Don't use zero. */
  480. /* Limit generated st_ino number to 16 bits. */
  481. if (default_inode >= 0xffff) {
  482. ++default_dev;
  483. default_inode = 0;
  484. }
  485. tar = (struct tar *)(a->format->data);
  486. tar->entry_offset = 0;
  487. gnu_clear_sparse_list(tar);
  488. tar->realsize = -1; /* Mark this as "unset" */
  489. tar->realsize_override = 0;
  490. /* Setup default string conversion. */
  491. tar->sconv = tar->opt_sconv;
  492. if (tar->sconv == NULL) {
  493. if (!tar->init_default_conversion) {
  494. tar->sconv_default =
  495. archive_string_default_conversion_for_read(&(a->archive));
  496. tar->init_default_conversion = 1;
  497. }
  498. tar->sconv = tar->sconv_default;
  499. }
  500. r = tar_read_header(a, tar, entry, &unconsumed);
  501. tar_flush_unconsumed(a, &unconsumed);
  502. /*
  503. * "non-sparse" files are really just sparse files with
  504. * a single block.
  505. */
  506. if (tar->sparse_list == NULL) {
  507. if (gnu_add_sparse_entry(a, tar, 0, tar->entry_bytes_remaining)
  508. != ARCHIVE_OK)
  509. return (ARCHIVE_FATAL);
  510. } else {
  511. struct sparse_block *sb;
  512. for (sb = tar->sparse_list; sb != NULL; sb = sb->next) {
  513. if (!sb->hole)
  514. archive_entry_sparse_add_entry(entry,
  515. sb->offset, sb->remaining);
  516. }
  517. }
  518. if (r == ARCHIVE_OK && archive_entry_filetype(entry) == AE_IFREG) {
  519. /*
  520. * "Regular" entry with trailing '/' is really
  521. * directory: This is needed for certain old tar
  522. * variants and even for some broken newer ones.
  523. */
  524. if ((wp = archive_entry_pathname_w(entry)) != NULL) {
  525. l = wcslen(wp);
  526. if (l > 0 && wp[l - 1] == L'/') {
  527. archive_entry_set_filetype(entry, AE_IFDIR);
  528. }
  529. } else if ((p = archive_entry_pathname(entry)) != NULL) {
  530. l = strlen(p);
  531. if (l > 0 && p[l - 1] == '/') {
  532. archive_entry_set_filetype(entry, AE_IFDIR);
  533. }
  534. }
  535. }
  536. return (r);
  537. }
  538. static int
  539. archive_read_format_tar_read_data(struct archive_read *a,
  540. const void **buff, size_t *size, int64_t *offset)
  541. {
  542. ssize_t bytes_read;
  543. struct tar *tar;
  544. struct sparse_block *p;
  545. tar = (struct tar *)(a->format->data);
  546. for (;;) {
  547. /* Remove exhausted entries from sparse list. */
  548. while (tar->sparse_list != NULL &&
  549. tar->sparse_list->remaining == 0) {
  550. p = tar->sparse_list;
  551. tar->sparse_list = p->next;
  552. free(p);
  553. }
  554. if (tar->entry_bytes_unconsumed) {
  555. __archive_read_consume(a, tar->entry_bytes_unconsumed);
  556. tar->entry_bytes_unconsumed = 0;
  557. }
  558. /* If we're at end of file, return EOF. */
  559. if (tar->sparse_list == NULL ||
  560. tar->entry_bytes_remaining == 0) {
  561. if (__archive_read_consume(a, tar->entry_padding) < 0)
  562. return (ARCHIVE_FATAL);
  563. tar->entry_padding = 0;
  564. *buff = NULL;
  565. *size = 0;
  566. *offset = tar->realsize;
  567. return (ARCHIVE_EOF);
  568. }
  569. *buff = __archive_read_ahead(a, 1, &bytes_read);
  570. if (bytes_read < 0)
  571. return (ARCHIVE_FATAL);
  572. if (*buff == NULL) {
  573. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  574. "Truncated tar archive");
  575. return (ARCHIVE_FATAL);
  576. }
  577. if (bytes_read > tar->entry_bytes_remaining)
  578. bytes_read = (ssize_t)tar->entry_bytes_remaining;
  579. /* Don't read more than is available in the
  580. * current sparse block. */
  581. if (tar->sparse_list->remaining < bytes_read)
  582. bytes_read = (ssize_t)tar->sparse_list->remaining;
  583. *size = bytes_read;
  584. *offset = tar->sparse_list->offset;
  585. tar->sparse_list->remaining -= bytes_read;
  586. tar->sparse_list->offset += bytes_read;
  587. tar->entry_bytes_remaining -= bytes_read;
  588. tar->entry_bytes_unconsumed = bytes_read;
  589. if (!tar->sparse_list->hole)
  590. return (ARCHIVE_OK);
  591. /* Current is hole data and skip this. */
  592. }
  593. }
  594. static int
  595. archive_read_format_tar_skip(struct archive_read *a)
  596. {
  597. int64_t bytes_skipped;
  598. int64_t request;
  599. struct sparse_block *p;
  600. struct tar* tar;
  601. tar = (struct tar *)(a->format->data);
  602. /* Do not consume the hole of a sparse file. */
  603. request = 0;
  604. for (p = tar->sparse_list; p != NULL; p = p->next) {
  605. if (!p->hole) {
  606. if (p->remaining >= INT64_MAX - request) {
  607. return ARCHIVE_FATAL;
  608. }
  609. request += p->remaining;
  610. }
  611. }
  612. if (request > tar->entry_bytes_remaining)
  613. request = tar->entry_bytes_remaining;
  614. request += tar->entry_padding + tar->entry_bytes_unconsumed;
  615. bytes_skipped = __archive_read_consume(a, request);
  616. if (bytes_skipped < 0)
  617. return (ARCHIVE_FATAL);
  618. tar->entry_bytes_remaining = 0;
  619. tar->entry_bytes_unconsumed = 0;
  620. tar->entry_padding = 0;
  621. /* Free the sparse list. */
  622. gnu_clear_sparse_list(tar);
  623. return (ARCHIVE_OK);
  624. }
  625. /*
  626. * This function recursively interprets all of the headers associated
  627. * with a single entry.
  628. */
  629. static int
  630. tar_read_header(struct archive_read *a, struct tar *tar,
  631. struct archive_entry *entry, size_t *unconsumed)
  632. {
  633. ssize_t bytes;
  634. int err, eof_vol_header;
  635. const char *h;
  636. const struct archive_entry_header_ustar *header;
  637. const struct archive_entry_header_gnutar *gnuheader;
  638. eof_vol_header = 0;
  639. /* Loop until we find a workable header record. */
  640. for (;;) {
  641. tar_flush_unconsumed(a, unconsumed);
  642. /* Read 512-byte header record */
  643. h = __archive_read_ahead(a, 512, &bytes);
  644. if (bytes < 0)
  645. return ((int)bytes);
  646. if (bytes == 0) { /* EOF at a block boundary. */
  647. /* Some writers do omit the block of nulls. <sigh> */
  648. return (ARCHIVE_EOF);
  649. }
  650. if (bytes < 512) { /* Short block at EOF; this is bad. */
  651. archive_set_error(&a->archive,
  652. ARCHIVE_ERRNO_FILE_FORMAT,
  653. "Truncated tar archive");
  654. return (ARCHIVE_FATAL);
  655. }
  656. *unconsumed = 512;
  657. /* Header is workable if it's not an end-of-archive mark. */
  658. if (h[0] != 0 || !archive_block_is_null(h))
  659. break;
  660. /* Ensure format is set for archives with only null blocks. */
  661. if (a->archive.archive_format_name == NULL) {
  662. a->archive.archive_format = ARCHIVE_FORMAT_TAR;
  663. a->archive.archive_format_name = "tar";
  664. }
  665. if (!tar->read_concatenated_archives) {
  666. /* Try to consume a second all-null record, as well. */
  667. tar_flush_unconsumed(a, unconsumed);
  668. h = __archive_read_ahead(a, 512, NULL);
  669. if (h != NULL && h[0] == 0 && archive_block_is_null(h))
  670. __archive_read_consume(a, 512);
  671. archive_clear_error(&a->archive);
  672. return (ARCHIVE_EOF);
  673. }
  674. /*
  675. * We're reading concatenated archives, ignore this block and
  676. * loop to get the next.
  677. */
  678. }
  679. /*
  680. * Note: If the checksum fails and we return ARCHIVE_RETRY,
  681. * then the client is likely to just retry. This is a very
  682. * crude way to search for the next valid header!
  683. *
  684. * TODO: Improve this by implementing a real header scan.
  685. */
  686. if (!checksum(a, h)) {
  687. tar_flush_unconsumed(a, unconsumed);
  688. archive_set_error(&a->archive, EINVAL, "Damaged tar archive");
  689. return (ARCHIVE_RETRY); /* Retryable: Invalid header */
  690. }
  691. if (++tar->header_recursion_depth > 32) {
  692. tar_flush_unconsumed(a, unconsumed);
  693. archive_set_error(&a->archive, EINVAL, "Too many special headers");
  694. return (ARCHIVE_WARN);
  695. }
  696. /* Determine the format variant. */
  697. header = (const struct archive_entry_header_ustar *)h;
  698. switch(header->typeflag[0]) {
  699. case 'A': /* Solaris tar ACL */
  700. a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
  701. a->archive.archive_format_name = "Solaris tar";
  702. err = header_Solaris_ACL(a, tar, entry, h, unconsumed);
  703. break;
  704. case 'g': /* POSIX-standard 'g' header. */
  705. a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
  706. a->archive.archive_format_name = "POSIX pax interchange format";
  707. err = header_pax_global(a, tar, entry, h, unconsumed);
  708. if (err == ARCHIVE_EOF)
  709. return (err);
  710. break;
  711. case 'K': /* Long link name (GNU tar, others) */
  712. err = header_longlink(a, tar, entry, h, unconsumed);
  713. break;
  714. case 'L': /* Long filename (GNU tar, others) */
  715. err = header_longname(a, tar, entry, h, unconsumed);
  716. break;
  717. case 'V': /* GNU volume header */
  718. err = header_volume(a, tar, entry, h, unconsumed);
  719. if (err == ARCHIVE_EOF)
  720. eof_vol_header = 1;
  721. break;
  722. case 'X': /* Used by SUN tar; same as 'x'. */
  723. a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
  724. a->archive.archive_format_name =
  725. "POSIX pax interchange format (Sun variant)";
  726. err = header_pax_extensions(a, tar, entry, h, unconsumed);
  727. break;
  728. case 'x': /* POSIX-standard 'x' header. */
  729. a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
  730. a->archive.archive_format_name = "POSIX pax interchange format";
  731. err = header_pax_extensions(a, tar, entry, h, unconsumed);
  732. break;
  733. default:
  734. gnuheader = (const struct archive_entry_header_gnutar *)h;
  735. if (memcmp(gnuheader->magic, "ustar \0", 8) == 0) {
  736. a->archive.archive_format = ARCHIVE_FORMAT_TAR_GNUTAR;
  737. a->archive.archive_format_name = "GNU tar format";
  738. err = header_gnutar(a, tar, entry, h, unconsumed);
  739. } else if (memcmp(header->magic, "ustar", 5) == 0) {
  740. if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
  741. a->archive.archive_format = ARCHIVE_FORMAT_TAR_USTAR;
  742. a->archive.archive_format_name = "POSIX ustar format";
  743. }
  744. err = header_ustar(a, tar, entry, h);
  745. } else {
  746. a->archive.archive_format = ARCHIVE_FORMAT_TAR;
  747. a->archive.archive_format_name = "tar (non-POSIX)";
  748. err = header_old_tar(a, tar, entry, h);
  749. }
  750. }
  751. if (err == ARCHIVE_FATAL)
  752. return (err);
  753. tar_flush_unconsumed(a, unconsumed);
  754. h = NULL;
  755. header = NULL;
  756. --tar->header_recursion_depth;
  757. /* Yuck. Apple's design here ends up storing long pathname
  758. * extensions for both the AppleDouble extension entry and the
  759. * regular entry.
  760. */
  761. if ((err == ARCHIVE_WARN || err == ARCHIVE_OK) &&
  762. tar->header_recursion_depth == 0 &&
  763. tar->process_mac_extensions) {
  764. int err2 = read_mac_metadata_blob(a, tar, entry, h, unconsumed);
  765. if (err2 < err)
  766. err = err2;
  767. }
  768. /* We return warnings or success as-is. Anything else is fatal. */
  769. if (err == ARCHIVE_WARN || err == ARCHIVE_OK) {
  770. if (tar->sparse_gnu_pending) {
  771. if (tar->sparse_gnu_major == 1 &&
  772. tar->sparse_gnu_minor == 0) {
  773. ssize_t bytes_read;
  774. tar->sparse_gnu_pending = 0;
  775. /* Read initial sparse map. */
  776. bytes_read = gnu_sparse_10_read(a, tar, unconsumed);
  777. if (bytes_read < 0)
  778. return ((int)bytes_read);
  779. tar->entry_bytes_remaining -= bytes_read;
  780. } else {
  781. archive_set_error(&a->archive,
  782. ARCHIVE_ERRNO_MISC,
  783. "Unrecognized GNU sparse file format");
  784. return (ARCHIVE_WARN);
  785. }
  786. tar->sparse_gnu_pending = 0;
  787. }
  788. return (err);
  789. }
  790. if (err == ARCHIVE_EOF) {
  791. if (!eof_vol_header) {
  792. /* EOF when recursively reading a header is bad. */
  793. archive_set_error(&a->archive, EINVAL,
  794. "Damaged tar archive");
  795. } else {
  796. /* If we encounter just a GNU volume header treat
  797. * this situation as an empty archive */
  798. return (ARCHIVE_EOF);
  799. }
  800. }
  801. return (ARCHIVE_FATAL);
  802. }
  803. /*
  804. * Return true if block checksum is correct.
  805. */
  806. static int
  807. checksum(struct archive_read *a, const void *h)
  808. {
  809. const unsigned char *bytes;
  810. const struct archive_entry_header_ustar *header;
  811. int check, sum;
  812. size_t i;
  813. (void)a; /* UNUSED */
  814. bytes = (const unsigned char *)h;
  815. header = (const struct archive_entry_header_ustar *)h;
  816. /* Checksum field must hold an octal number */
  817. for (i = 0; i < sizeof(header->checksum); ++i) {
  818. char c = header->checksum[i];
  819. if (c != ' ' && c != '\0' && (c < '0' || c > '7'))
  820. return 0;
  821. }
  822. /*
  823. * Test the checksum. Note that POSIX specifies _unsigned_
  824. * bytes for this calculation.
  825. */
  826. sum = (int)tar_atol(header->checksum, sizeof(header->checksum));
  827. check = 0;
  828. for (i = 0; i < 148; i++)
  829. check += (unsigned char)bytes[i];
  830. for (; i < 156; i++)
  831. check += 32;
  832. for (; i < 512; i++)
  833. check += (unsigned char)bytes[i];
  834. if (sum == check)
  835. return (1);
  836. /*
  837. * Repeat test with _signed_ bytes, just in case this archive
  838. * was created by an old BSD, Solaris, or HP-UX tar with a
  839. * broken checksum calculation.
  840. */
  841. check = 0;
  842. for (i = 0; i < 148; i++)
  843. check += (signed char)bytes[i];
  844. for (; i < 156; i++)
  845. check += 32;
  846. for (; i < 512; i++)
  847. check += (signed char)bytes[i];
  848. if (sum == check)
  849. return (1);
  850. return (0);
  851. }
  852. /*
  853. * Return true if this block contains only nulls.
  854. */
  855. static int
  856. archive_block_is_null(const char *p)
  857. {
  858. unsigned i;
  859. for (i = 0; i < 512; i++)
  860. if (*p++)
  861. return (0);
  862. return (1);
  863. }
  864. /*
  865. * Interpret 'A' Solaris ACL header
  866. */
  867. static int
  868. header_Solaris_ACL(struct archive_read *a, struct tar *tar,
  869. struct archive_entry *entry, const void *h, size_t *unconsumed)
  870. {
  871. const struct archive_entry_header_ustar *header;
  872. size_t size;
  873. int err, acl_type;
  874. int64_t type;
  875. char *acl, *p;
  876. /*
  877. * read_body_to_string adds a NUL terminator, but we need a little
  878. * more to make sure that we don't overrun acl_text later.
  879. */
  880. header = (const struct archive_entry_header_ustar *)h;
  881. size = (size_t)tar_atol(header->size, sizeof(header->size));
  882. err = read_body_to_string(a, tar, &(tar->acl_text), h, unconsumed);
  883. if (err != ARCHIVE_OK)
  884. return (err);
  885. /* Recursively read next header */
  886. err = tar_read_header(a, tar, entry, unconsumed);
  887. if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
  888. return (err);
  889. /* TODO: Examine the first characters to see if this
  890. * is an AIX ACL descriptor. We'll likely never support
  891. * them, but it would be polite to recognize and warn when
  892. * we do see them. */
  893. /* Leading octal number indicates ACL type and number of entries. */
  894. p = acl = tar->acl_text.s;
  895. type = 0;
  896. while (*p != '\0' && p < acl + size) {
  897. if (*p < '0' || *p > '7') {
  898. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  899. "Malformed Solaris ACL attribute (invalid digit)");
  900. return(ARCHIVE_WARN);
  901. }
  902. type <<= 3;
  903. type += *p - '0';
  904. if (type > 077777777) {
  905. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  906. "Malformed Solaris ACL attribute (count too large)");
  907. return (ARCHIVE_WARN);
  908. }
  909. p++;
  910. }
  911. switch ((int)type & ~0777777) {
  912. case 01000000:
  913. /* POSIX.1e ACL */
  914. acl_type = ARCHIVE_ENTRY_ACL_TYPE_ACCESS;
  915. break;
  916. case 03000000:
  917. /* NFSv4 ACL */
  918. acl_type = ARCHIVE_ENTRY_ACL_TYPE_NFS4;
  919. break;
  920. default:
  921. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  922. "Malformed Solaris ACL attribute (unsupported type %o)",
  923. (int)type);
  924. return (ARCHIVE_WARN);
  925. }
  926. p++;
  927. if (p >= acl + size) {
  928. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  929. "Malformed Solaris ACL attribute (body overflow)");
  930. return(ARCHIVE_WARN);
  931. }
  932. /* ACL text is null-terminated; find the end. */
  933. size -= (p - acl);
  934. acl = p;
  935. while (*p != '\0' && p < acl + size)
  936. p++;
  937. if (tar->sconv_acl == NULL) {
  938. tar->sconv_acl = archive_string_conversion_from_charset(
  939. &(a->archive), "UTF-8", 1);
  940. if (tar->sconv_acl == NULL)
  941. return (ARCHIVE_FATAL);
  942. }
  943. archive_strncpy(&(tar->localname), acl, p - acl);
  944. err = archive_acl_from_text_l(archive_entry_acl(entry),
  945. tar->localname.s, acl_type, tar->sconv_acl);
  946. if (err != ARCHIVE_OK) {
  947. if (errno == ENOMEM) {
  948. archive_set_error(&a->archive, ENOMEM,
  949. "Can't allocate memory for ACL");
  950. } else
  951. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  952. "Malformed Solaris ACL attribute (unparsable)");
  953. }
  954. return (err);
  955. }
  956. /*
  957. * Interpret 'K' long linkname header.
  958. */
  959. static int
  960. header_longlink(struct archive_read *a, struct tar *tar,
  961. struct archive_entry *entry, const void *h, size_t *unconsumed)
  962. {
  963. int err;
  964. err = read_body_to_string(a, tar, &(tar->longlink), h, unconsumed);
  965. if (err != ARCHIVE_OK)
  966. return (err);
  967. err = tar_read_header(a, tar, entry, unconsumed);
  968. if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
  969. return (err);
  970. /* Set symlink if symlink already set, else hardlink. */
  971. archive_entry_copy_link(entry, tar->longlink.s);
  972. return (ARCHIVE_OK);
  973. }
  974. static int
  975. set_conversion_failed_error(struct archive_read *a,
  976. struct archive_string_conv *sconv, const char *name)
  977. {
  978. if (errno == ENOMEM) {
  979. archive_set_error(&a->archive, ENOMEM,
  980. "Can't allocate memory for %s", name);
  981. return (ARCHIVE_FATAL);
  982. }
  983. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  984. "%s can't be converted from %s to current locale.",
  985. name, archive_string_conversion_charset_name(sconv));
  986. return (ARCHIVE_WARN);
  987. }
  988. /*
  989. * Interpret 'L' long filename header.
  990. */
  991. static int
  992. header_longname(struct archive_read *a, struct tar *tar,
  993. struct archive_entry *entry, const void *h, size_t *unconsumed)
  994. {
  995. int err;
  996. err = read_body_to_string(a, tar, &(tar->longname), h, unconsumed);
  997. if (err != ARCHIVE_OK)
  998. return (err);
  999. /* Read and parse "real" header, then override name. */
  1000. err = tar_read_header(a, tar, entry, unconsumed);
  1001. if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
  1002. return (err);
  1003. if (archive_entry_copy_pathname_l(entry, tar->longname.s,
  1004. archive_strlen(&(tar->longname)), tar->sconv) != 0)
  1005. err = set_conversion_failed_error(a, tar->sconv, "Pathname");
  1006. return (err);
  1007. }
  1008. /*
  1009. * Interpret 'V' GNU tar volume header.
  1010. */
  1011. static int
  1012. header_volume(struct archive_read *a, struct tar *tar,
  1013. struct archive_entry *entry, const void *h, size_t *unconsumed)
  1014. {
  1015. (void)h;
  1016. /* Just skip this and read the next header. */
  1017. return (tar_read_header(a, tar, entry, unconsumed));
  1018. }
  1019. /*
  1020. * Read body of an archive entry into an archive_string object.
  1021. */
  1022. static int
  1023. read_body_to_string(struct archive_read *a, struct tar *tar,
  1024. struct archive_string *as, const void *h, size_t *unconsumed)
  1025. {
  1026. int64_t size;
  1027. const struct archive_entry_header_ustar *header;
  1028. const void *src;
  1029. (void)tar; /* UNUSED */
  1030. header = (const struct archive_entry_header_ustar *)h;
  1031. size = tar_atol(header->size, sizeof(header->size));
  1032. if ((size > 1048576) || (size < 0)) {
  1033. archive_set_error(&a->archive, EINVAL,
  1034. "Special header too large");
  1035. return (ARCHIVE_FATAL);
  1036. }
  1037. /* Fail if we can't make our buffer big enough. */
  1038. if (archive_string_ensure(as, (size_t)size+1) == NULL) {
  1039. archive_set_error(&a->archive, ENOMEM,
  1040. "No memory");
  1041. return (ARCHIVE_FATAL);
  1042. }
  1043. tar_flush_unconsumed(a, unconsumed);
  1044. /* Read the body into the string. */
  1045. *unconsumed = (size_t)((size + 511) & ~ 511);
  1046. src = __archive_read_ahead(a, *unconsumed, NULL);
  1047. if (src == NULL) {
  1048. *unconsumed = 0;
  1049. return (ARCHIVE_FATAL);
  1050. }
  1051. memcpy(as->s, src, (size_t)size);
  1052. as->s[size] = '\0';
  1053. as->length = (size_t)size;
  1054. return (ARCHIVE_OK);
  1055. }
  1056. /*
  1057. * Parse out common header elements.
  1058. *
  1059. * This would be the same as header_old_tar, except that the
  1060. * filename is handled slightly differently for old and POSIX
  1061. * entries (POSIX entries support a 'prefix'). This factoring
  1062. * allows header_old_tar and header_ustar
  1063. * to handle filenames differently, while still putting most of the
  1064. * common parsing into one place.
  1065. */
  1066. static int
  1067. header_common(struct archive_read *a, struct tar *tar,
  1068. struct archive_entry *entry, const void *h)
  1069. {
  1070. const struct archive_entry_header_ustar *header;
  1071. char tartype;
  1072. int err = ARCHIVE_OK;
  1073. header = (const struct archive_entry_header_ustar *)h;
  1074. if (header->linkname[0])
  1075. archive_strncpy(&(tar->entry_linkpath),
  1076. header->linkname, sizeof(header->linkname));
  1077. else
  1078. archive_string_empty(&(tar->entry_linkpath));
  1079. /* Parse out the numeric fields (all are octal) */
  1080. archive_entry_set_mode(entry,
  1081. (mode_t)tar_atol(header->mode, sizeof(header->mode)));
  1082. archive_entry_set_uid(entry, tar_atol(header->uid, sizeof(header->uid)));
  1083. archive_entry_set_gid(entry, tar_atol(header->gid, sizeof(header->gid)));
  1084. tar->entry_bytes_remaining = tar_atol(header->size, sizeof(header->size));
  1085. if (tar->entry_bytes_remaining < 0) {
  1086. tar->entry_bytes_remaining = 0;
  1087. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  1088. "Tar entry has negative size");
  1089. return (ARCHIVE_FATAL);
  1090. }
  1091. if (tar->entry_bytes_remaining == INT64_MAX) {
  1092. /* Note: tar_atol returns INT64_MAX on overflow */
  1093. tar->entry_bytes_remaining = 0;
  1094. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  1095. "Tar entry size overflow");
  1096. return (ARCHIVE_FATAL);
  1097. }
  1098. tar->realsize = tar->entry_bytes_remaining;
  1099. archive_entry_set_size(entry, tar->entry_bytes_remaining);
  1100. archive_entry_set_mtime(entry, tar_atol(header->mtime, sizeof(header->mtime)), 0);
  1101. /* Handle the tar type flag appropriately. */
  1102. tartype = header->typeflag[0];
  1103. switch (tartype) {
  1104. case '1': /* Hard link */
  1105. if (archive_entry_copy_hardlink_l(entry, tar->entry_linkpath.s,
  1106. archive_strlen(&(tar->entry_linkpath)), tar->sconv) != 0) {
  1107. err = set_conversion_failed_error(a, tar->sconv,
  1108. "Linkname");
  1109. if (err == ARCHIVE_FATAL)
  1110. return (err);
  1111. }
  1112. /*
  1113. * The following may seem odd, but: Technically, tar
  1114. * does not store the file type for a "hard link"
  1115. * entry, only the fact that it is a hard link. So, I
  1116. * leave the type zero normally. But, pax interchange
  1117. * format allows hard links to have data, which
  1118. * implies that the underlying entry is a regular
  1119. * file.
  1120. */
  1121. if (archive_entry_size(entry) > 0)
  1122. archive_entry_set_filetype(entry, AE_IFREG);
  1123. /*
  1124. * A tricky point: Traditionally, tar readers have
  1125. * ignored the size field when reading hardlink
  1126. * entries, and some writers put non-zero sizes even
  1127. * though the body is empty. POSIX blessed this
  1128. * convention in the 1988 standard, but broke with
  1129. * this tradition in 2001 by permitting hardlink
  1130. * entries to store valid bodies in pax interchange
  1131. * format, but not in ustar format. Since there is no
  1132. * hard and fast way to distinguish pax interchange
  1133. * from earlier archives (the 'x' and 'g' entries are
  1134. * optional, after all), we need a heuristic.
  1135. */
  1136. if (archive_entry_size(entry) == 0) {
  1137. /* If the size is already zero, we're done. */
  1138. } else if (a->archive.archive_format
  1139. == ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
  1140. /* Definitely pax extended; must obey hardlink size. */
  1141. } else if (a->archive.archive_format == ARCHIVE_FORMAT_TAR
  1142. || a->archive.archive_format == ARCHIVE_FORMAT_TAR_GNUTAR)
  1143. {
  1144. /* Old-style or GNU tar: we must ignore the size. */
  1145. archive_entry_set_size(entry, 0);
  1146. tar->entry_bytes_remaining = 0;
  1147. } else if (archive_read_format_tar_bid(a, 50) > 50) {
  1148. /*
  1149. * We don't know if it's pax: If the bid
  1150. * function sees a valid ustar header
  1151. * immediately following, then let's ignore
  1152. * the hardlink size.
  1153. */
  1154. archive_entry_set_size(entry, 0);
  1155. tar->entry_bytes_remaining = 0;
  1156. }
  1157. /*
  1158. * TODO: There are still two cases I'd like to handle:
  1159. * = a ustar non-pax archive with a hardlink entry at
  1160. * end-of-archive. (Look for block of nulls following?)
  1161. * = a pax archive that has not seen any pax headers
  1162. * and has an entry which is a hardlink entry storing
  1163. * a body containing an uncompressed tar archive.
  1164. * The first is worth addressing; I don't see any reliable
  1165. * way to deal with the second possibility.
  1166. */
  1167. break;
  1168. case '2': /* Symlink */
  1169. archive_entry_set_filetype(entry, AE_IFLNK);
  1170. archive_entry_set_size(entry, 0);
  1171. tar->entry_bytes_remaining = 0;
  1172. if (archive_entry_copy_symlink_l(entry, tar->entry_linkpath.s,
  1173. archive_strlen(&(tar->entry_linkpath)), tar->sconv) != 0) {
  1174. err = set_conversion_failed_error(a, tar->sconv,
  1175. "Linkname");
  1176. if (err == ARCHIVE_FATAL)
  1177. return (err);
  1178. }
  1179. break;
  1180. case '3': /* Character device */
  1181. archive_entry_set_filetype(entry, AE_IFCHR);
  1182. archive_entry_set_size(entry, 0);
  1183. tar->entry_bytes_remaining = 0;
  1184. break;
  1185. case '4': /* Block device */
  1186. archive_entry_set_filetype(entry, AE_IFBLK);
  1187. archive_entry_set_size(entry, 0);
  1188. tar->entry_bytes_remaining = 0;
  1189. break;
  1190. case '5': /* Dir */
  1191. archive_entry_set_filetype(entry, AE_IFDIR);
  1192. archive_entry_set_size(entry, 0);
  1193. tar->entry_bytes_remaining = 0;
  1194. break;
  1195. case '6': /* FIFO device */
  1196. archive_entry_set_filetype(entry, AE_IFIFO);
  1197. archive_entry_set_size(entry, 0);
  1198. tar->entry_bytes_remaining = 0;
  1199. break;
  1200. case 'D': /* GNU incremental directory type */
  1201. /*
  1202. * No special handling is actually required here.
  1203. * It might be nice someday to preprocess the file list and
  1204. * provide it to the client, though.
  1205. */
  1206. archive_entry_set_filetype(entry, AE_IFDIR);
  1207. break;
  1208. case 'M': /* GNU "Multi-volume" (remainder of file from last archive)*/
  1209. /*
  1210. * As far as I can tell, this is just like a regular file
  1211. * entry, except that the contents should be _appended_ to
  1212. * the indicated file at the indicated offset. This may
  1213. * require some API work to fully support.
  1214. */
  1215. break;
  1216. case 'N': /* Old GNU "long filename" entry. */
  1217. /* The body of this entry is a script for renaming
  1218. * previously-extracted entries. Ugh. It will never
  1219. * be supported by libarchive. */
  1220. archive_entry_set_filetype(entry, AE_IFREG);
  1221. break;
  1222. case 'S': /* GNU sparse files */
  1223. /*
  1224. * Sparse files are really just regular files with
  1225. * sparse information in the extended area.
  1226. */
  1227. /* FALLTHROUGH */
  1228. case '0':
  1229. /*
  1230. * Enable sparse file "read" support only for regular
  1231. * files and explicit GNU sparse files. However, we
  1232. * don't allow non-standard file types to be sparse.
  1233. */
  1234. tar->sparse_allowed = 1;
  1235. /* FALLTHROUGH */
  1236. default: /* Regular file and non-standard types */
  1237. /*
  1238. * Per POSIX: non-recognized types should always be
  1239. * treated as regular files.
  1240. */
  1241. archive_entry_set_filetype(entry, AE_IFREG);
  1242. break;
  1243. }
  1244. return (err);
  1245. }
  1246. /*
  1247. * Parse out header elements for "old-style" tar archives.
  1248. */
  1249. static int
  1250. header_old_tar(struct archive_read *a, struct tar *tar,
  1251. struct archive_entry *entry, const void *h)
  1252. {
  1253. const struct archive_entry_header_ustar *header;
  1254. int err = ARCHIVE_OK, err2;
  1255. /* Copy filename over (to ensure null termination). */
  1256. header = (const struct archive_entry_header_ustar *)h;
  1257. if (archive_entry_copy_pathname_l(entry,
  1258. header->name, sizeof(header->name), tar->sconv) != 0) {
  1259. err = set_conversion_failed_error(a, tar->sconv, "Pathname");
  1260. if (err == ARCHIVE_FATAL)
  1261. return (err);
  1262. }
  1263. /* Grab rest of common fields */
  1264. err2 = header_common(a, tar, entry, h);
  1265. if (err > err2)
  1266. err = err2;
  1267. tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
  1268. return (err);
  1269. }
  1270. /*
  1271. * Read a Mac AppleDouble-encoded blob of file metadata,
  1272. * if there is one.
  1273. */
  1274. static int
  1275. read_mac_metadata_blob(struct archive_read *a, struct tar *tar,
  1276. struct archive_entry *entry, const void *h, size_t *unconsumed)
  1277. {
  1278. int64_t size;
  1279. const void *data;
  1280. const char *p, *name;
  1281. const wchar_t *wp, *wname;
  1282. (void)h; /* UNUSED */
  1283. wname = wp = archive_entry_pathname_w(entry);
  1284. if (wp != NULL) {
  1285. /* Find the last path element. */
  1286. for (; *wp != L'\0'; ++wp) {
  1287. if (wp[0] == '/' && wp[1] != L'\0')
  1288. wname = wp + 1;
  1289. }
  1290. /*
  1291. * If last path element starts with "._", then
  1292. * this is a Mac extension.
  1293. */
  1294. if (wname[0] != L'.' || wname[1] != L'_' || wname[2] == L'\0')
  1295. return ARCHIVE_OK;
  1296. } else {
  1297. /* Find the last path element. */
  1298. name = p = archive_entry_pathname(entry);
  1299. if (p == NULL)
  1300. return (ARCHIVE_FAILED);
  1301. for (; *p != '\0'; ++p) {
  1302. if (p[0] == '/' && p[1] != '\0')
  1303. name = p + 1;
  1304. }
  1305. /*
  1306. * If last path element starts with "._", then
  1307. * this is a Mac extension.
  1308. */
  1309. if (name[0] != '.' || name[1] != '_' || name[2] == '\0')
  1310. return ARCHIVE_OK;
  1311. }
  1312. /* Read the body as a Mac OS metadata blob. */
  1313. size = archive_entry_size(entry);
  1314. /*
  1315. * TODO: Look beyond the body here to peek at the next header.
  1316. * If it's a regular header (not an extension header)
  1317. * that has the wrong name, just return the current
  1318. * entry as-is, without consuming the body here.
  1319. * That would reduce the risk of us mis-identifying
  1320. * an ordinary file that just happened to have
  1321. * a name starting with "._".
  1322. *
  1323. * Q: Is the above idea really possible? Even
  1324. * when there are GNU or pax extension entries?
  1325. */
  1326. data = __archive_read_ahead(a, (size_t)size, NULL);
  1327. if (data == NULL) {
  1328. *unconsumed = 0;
  1329. return (ARCHIVE_FATAL);
  1330. }
  1331. archive_entry_copy_mac_metadata(entry, data, (size_t)size);
  1332. *unconsumed = (size_t)((size + 511) & ~ 511);
  1333. tar_flush_unconsumed(a, unconsumed);
  1334. return (tar_read_header(a, tar, entry, unconsumed));
  1335. }
  1336. /*
  1337. * Parse a file header for a pax extended archive entry.
  1338. */
  1339. static int
  1340. header_pax_global(struct archive_read *a, struct tar *tar,
  1341. struct archive_entry *entry, const void *h, size_t *unconsumed)
  1342. {
  1343. int err;
  1344. err = read_body_to_string(a, tar, &(tar->pax_global), h, unconsumed);
  1345. if (err != ARCHIVE_OK)
  1346. return (err);
  1347. err = tar_read_header(a, tar, entry, unconsumed);
  1348. return (err);
  1349. }
  1350. static int
  1351. header_pax_extensions(struct archive_read *a, struct tar *tar,
  1352. struct archive_entry *entry, const void *h, size_t *unconsumed)
  1353. {
  1354. int err, err2;
  1355. err = read_body_to_string(a, tar, &(tar->pax_header), h, unconsumed);
  1356. if (err != ARCHIVE_OK)
  1357. return (err);
  1358. /* Parse the next header. */
  1359. err = tar_read_header(a, tar, entry, unconsumed);
  1360. if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
  1361. return (err);
  1362. /*
  1363. * TODO: Parse global/default options into 'entry' struct here
  1364. * before handling file-specific options.
  1365. *
  1366. * This design (parse standard header, then overwrite with pax
  1367. * extended attribute data) usually works well, but isn't ideal;
  1368. * it would be better to parse the pax extended attributes first
  1369. * and then skip any fields in the standard header that were
  1370. * defined in the pax header.
  1371. */
  1372. err2 = pax_header(a, tar, entry, &tar->pax_header);
  1373. err = err_combine(err, err2);
  1374. tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
  1375. return (err);
  1376. }
  1377. /*
  1378. * Parse a file header for a Posix "ustar" archive entry. This also
  1379. * handles "pax" or "extended ustar" entries.
  1380. */
  1381. static int
  1382. header_ustar(struct archive_read *a, struct tar *tar,
  1383. struct archive_entry *entry, const void *h)
  1384. {
  1385. const struct archive_entry_header_ustar *header;
  1386. struct archive_string *as;
  1387. int err = ARCHIVE_OK, r;
  1388. header = (const struct archive_entry_header_ustar *)h;
  1389. /* Copy name into an internal buffer to ensure null-termination. */
  1390. as = &(tar->entry_pathname);
  1391. if (header->prefix[0]) {
  1392. archive_strncpy(as, header->prefix, sizeof(header->prefix));
  1393. if (as->s[archive_strlen(as) - 1] != '/')
  1394. archive_strappend_char(as, '/');
  1395. archive_strncat(as, header->name, sizeof(header->name));
  1396. } else {
  1397. archive_strncpy(as, header->name, sizeof(header->name));
  1398. }
  1399. if (archive_entry_copy_pathname_l(entry, as->s, archive_strlen(as),
  1400. tar->sconv) != 0) {
  1401. err = set_conversion_failed_error(a, tar->sconv, "Pathname");
  1402. if (err == ARCHIVE_FATAL)
  1403. return (err);
  1404. }
  1405. /* Handle rest of common fields. */
  1406. r = header_common(a, tar, entry, h);
  1407. if (r == ARCHIVE_FATAL)
  1408. return (r);
  1409. if (r < err)
  1410. err = r;
  1411. /* Handle POSIX ustar fields. */
  1412. if (archive_entry_copy_uname_l(entry,
  1413. header->uname, sizeof(header->uname), tar->sconv) != 0) {
  1414. err = set_conversion_failed_error(a, tar->sconv, "Uname");
  1415. if (err == ARCHIVE_FATAL)
  1416. return (err);
  1417. }
  1418. if (archive_entry_copy_gname_l(entry,
  1419. header->gname, sizeof(header->gname), tar->sconv) != 0) {
  1420. err = set_conversion_failed_error(a, tar->sconv, "Gname");
  1421. if (err == ARCHIVE_FATAL)
  1422. return (err);
  1423. }
  1424. /* Parse out device numbers only for char and block specials. */
  1425. if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
  1426. archive_entry_set_rdevmajor(entry, (dev_t)
  1427. tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
  1428. archive_entry_set_rdevminor(entry, (dev_t)
  1429. tar_atol(header->rdevminor, sizeof(header->rdevminor)));
  1430. }
  1431. tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
  1432. return (err);
  1433. }
  1434. /*
  1435. * Parse the pax extended attributes record.
  1436. *
  1437. * Returns non-zero if there's an error in the data.
  1438. */
  1439. static int
  1440. pax_header(struct archive_read *a, struct tar *tar,
  1441. struct archive_entry *entry, struct archive_string *in_as)
  1442. {
  1443. size_t attr_length, l, line_length, value_length;
  1444. char *p;
  1445. char *key, *value;
  1446. struct archive_string *as;
  1447. struct archive_string_conv *sconv;
  1448. int err, err2;
  1449. char *attr = in_as->s;
  1450. attr_length = in_as->length;
  1451. tar->pax_hdrcharset_binary = 0;
  1452. archive_string_empty(&(tar->entry_gname));
  1453. archive_string_empty(&(tar->entry_linkpath));
  1454. archive_string_empty(&(tar->entry_pathname));
  1455. archive_string_empty(&(tar->entry_pathname_override));
  1456. archive_string_empty(&(tar->entry_uname));
  1457. err = ARCHIVE_OK;
  1458. while (attr_length > 0) {
  1459. /* Parse decimal length field at start of line. */
  1460. line_length = 0;
  1461. l = attr_length;
  1462. p = attr; /* Record start of line. */
  1463. while (l>0) {
  1464. if (*p == ' ') {
  1465. p++;
  1466. l--;
  1467. break;
  1468. }
  1469. if (*p < '0' || *p > '9') {
  1470. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  1471. "Ignoring malformed pax extended attributes");
  1472. return (ARCHIVE_WARN);
  1473. }
  1474. line_length *= 10;
  1475. line_length += *p - '0';
  1476. if (line_length > 999999) {
  1477. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  1478. "Rejecting pax extended attribute > 1MB");
  1479. return (ARCHIVE_WARN);
  1480. }
  1481. p++;
  1482. l--;
  1483. }
  1484. /*
  1485. * Parsed length must be no bigger than available data,
  1486. * at least 1, and the last character of the line must
  1487. * be '\n'.
  1488. */
  1489. if (line_length > attr_length
  1490. || line_length < 1
  1491. || attr[line_length - 1] != '\n')
  1492. {
  1493. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  1494. "Ignoring malformed pax extended attribute");
  1495. return (ARCHIVE_WARN);
  1496. }
  1497. /* Null-terminate the line. */
  1498. attr[line_length - 1] = '\0';
  1499. /* Find end of key and null terminate it. */
  1500. key = p;
  1501. if (key[0] == '=')
  1502. return (-1);
  1503. while (*p && *p != '=')
  1504. ++p;
  1505. if (*p == '\0') {
  1506. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  1507. "Invalid pax extended attributes");
  1508. return (ARCHIVE_WARN);
  1509. }
  1510. *p = '\0';
  1511. value = p + 1;
  1512. /* Some values may be binary data */
  1513. value_length = attr + line_length - 1 - value;
  1514. /* Identify this attribute and set it in the entry. */
  1515. err2 = pax_attribute(a, tar, entry, key, value, value_length);
  1516. if (err2 == ARCHIVE_FATAL)
  1517. return (err2);
  1518. err = err_combine(err, err2);
  1519. /* Skip to next line */
  1520. attr += line_length;
  1521. attr_length -= line_length;
  1522. }
  1523. /*
  1524. * PAX format uses UTF-8 as default charset for its metadata
  1525. * unless hdrcharset=BINARY is present in its header.
  1526. * We apply the charset specified by the hdrcharset option only
  1527. * when the hdrcharset attribute(in PAX header) is BINARY because
  1528. * we respect the charset described in PAX header and BINARY also
  1529. * means that metadata(filename,uname and gname) character-set
  1530. * is unknown.
  1531. */
  1532. if (tar->pax_hdrcharset_binary)
  1533. sconv = tar->opt_sconv;
  1534. else {
  1535. sconv = archive_string_conversion_from_charset(
  1536. &(a->archive), "UTF-8", 1);
  1537. if (sconv == NULL)
  1538. return (ARCHIVE_FATAL);
  1539. if (tar->compat_2x)
  1540. archive_string_conversion_set_opt(sconv,
  1541. SCONV_SET_OPT_UTF8_LIBARCHIVE2X);
  1542. }
  1543. if (archive_strlen(&(tar->entry_gname)) > 0) {
  1544. if (archive_entry_copy_gname_l(entry, tar->entry_gname.s,
  1545. archive_strlen(&(tar->entry_gname)), sconv) != 0) {
  1546. err = set_conversion_failed_error(a, sconv, "Gname");
  1547. if (err == ARCHIVE_FATAL)
  1548. return (err);
  1549. /* Use a converted an original name. */
  1550. archive_entry_copy_gname(entry, tar->entry_gname.s);
  1551. }
  1552. }
  1553. if (archive_strlen(&(tar->entry_linkpath)) > 0) {
  1554. if (archive_entry_copy_link_l(entry, tar->entry_linkpath.s,
  1555. archive_strlen(&(tar->entry_linkpath)), sconv) != 0) {
  1556. err = set_conversion_failed_error(a, sconv, "Linkname");
  1557. if (err == ARCHIVE_FATAL)
  1558. return (err);
  1559. /* Use a converted an original name. */
  1560. archive_entry_copy_link(entry, tar->entry_linkpath.s);
  1561. }
  1562. }
  1563. /*
  1564. * Some extensions (such as the GNU sparse file extensions)
  1565. * deliberately store a synthetic name under the regular 'path'
  1566. * attribute and the real file name under a different attribute.
  1567. * Since we're supposed to not care about the order, we
  1568. * have no choice but to store all of the various filenames
  1569. * we find and figure it all out afterwards. This is the
  1570. * figuring out part.
  1571. */
  1572. as = NULL;
  1573. if (archive_strlen(&(tar->entry_pathname_override)) > 0)
  1574. as = &(tar->entry_pathname_override);
  1575. else if (archive_strlen(&(tar->entry_pathname)) > 0)
  1576. as = &(tar->entry_pathname);
  1577. if (as != NULL) {
  1578. if (archive_entry_copy_pathname_l(entry, as->s,
  1579. archive_strlen(as), sconv) != 0) {
  1580. err = set_conversion_failed_error(a, sconv, "Pathname");
  1581. if (err == ARCHIVE_FATAL)
  1582. return (err);
  1583. /* Use a converted an original name. */
  1584. archive_entry_copy_pathname(entry, as->s);
  1585. }
  1586. }
  1587. if (archive_strlen(&(tar->entry_uname)) > 0) {
  1588. if (archive_entry_copy_uname_l(entry, tar->entry_uname.s,
  1589. archive_strlen(&(tar->entry_uname)), sconv) != 0) {
  1590. err = set_conversion_failed_error(a, sconv, "Uname");
  1591. if (err == ARCHIVE_FATAL)
  1592. return (err);
  1593. /* Use a converted an original name. */
  1594. archive_entry_copy_uname(entry, tar->entry_uname.s);
  1595. }
  1596. }
  1597. return (err);
  1598. }
  1599. static int
  1600. pax_attribute_xattr(struct archive_entry *entry,
  1601. const char *name, const char *value)
  1602. {
  1603. char *name_decoded;
  1604. void *value_decoded;
  1605. size_t value_len;
  1606. if (strlen(name) < 18 || (memcmp(name, "LIBARCHIVE.xattr.", 17)) != 0)
  1607. return 3;
  1608. name += 17;
  1609. /* URL-decode name */
  1610. name_decoded = url_decode(name);
  1611. if (name_decoded == NULL)
  1612. return 2;
  1613. /* Base-64 decode value */
  1614. value_decoded = base64_decode(value, strlen(value), &value_len);
  1615. if (value_decoded == NULL) {
  1616. free(name_decoded);
  1617. return 1;
  1618. }
  1619. archive_entry_xattr_add_entry(entry, name_decoded,
  1620. value_decoded, value_len);
  1621. free(name_decoded);
  1622. free(value_decoded);
  1623. return 0;
  1624. }
  1625. static int
  1626. pax_attribute_schily_xattr(struct archive_entry *entry,
  1627. const char *name, const char *value, size_t value_length)
  1628. {
  1629. if (strlen(name) < 14 || (memcmp(name, "SCHILY.xattr.", 13)) != 0)
  1630. return 1;
  1631. name += 13;
  1632. archive_entry_xattr_add_entry(entry, name, value, value_length);
  1633. return 0;
  1634. }
  1635. static int
  1636. pax_attribute_acl(struct archive_read *a, struct tar *tar,
  1637. struct archive_entry *entry, const char *value, int type)
  1638. {
  1639. int r;
  1640. const char* errstr;
  1641. switch (type) {
  1642. case ARCHIVE_ENTRY_ACL_TYPE_ACCESS:
  1643. errstr = "SCHILY.acl.access";
  1644. break;
  1645. case ARCHIVE_ENTRY_ACL_TYPE_DEFAULT:
  1646. errstr = "SCHILY.acl.default";
  1647. break;
  1648. case ARCHIVE_ENTRY_ACL_TYPE_NFS4:
  1649. errstr = "SCHILY.acl.ace";
  1650. break;
  1651. default:
  1652. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  1653. "Unknown ACL type: %d", type);
  1654. return(ARCHIVE_FATAL);
  1655. }
  1656. if (tar->sconv_acl == NULL) {
  1657. tar->sconv_acl =
  1658. archive_string_conversion_from_charset(
  1659. &(a->archive), "UTF-8", 1);
  1660. if (tar->sconv_acl == NULL)
  1661. return (ARCHIVE_FATAL);
  1662. }
  1663. r = archive_acl_from_text_l(archive_entry_acl(entry), value, type,
  1664. tar->sconv_acl);
  1665. if (r != ARCHIVE_OK) {
  1666. if (r == ARCHIVE_FATAL) {
  1667. archive_set_error(&a->archive, ENOMEM,
  1668. "%s %s", "Can't allocate memory for ",
  1669. errstr);
  1670. return (r);
  1671. }
  1672. archive_set_error(&a->archive,
  1673. ARCHIVE_ERRNO_MISC, "%s %s", "Parse error: ", errstr);
  1674. }
  1675. return (r);
  1676. }
  1677. /*
  1678. * Parse a single key=value attribute. key/value pointers are
  1679. * assumed to point into reasonably long-lived storage.
  1680. *
  1681. * Note that POSIX reserves all-lowercase keywords. Vendor-specific
  1682. * extensions should always have keywords of the form "VENDOR.attribute"
  1683. * In particular, it's quite feasible to support many different
  1684. * vendor extensions here. I'm using "LIBARCHIVE" for extensions
  1685. * unique to this library.
  1686. *
  1687. * Investigate other vendor-specific extensions and see if
  1688. * any of them look useful.
  1689. */
  1690. static int
  1691. pax_attribute(struct archive_read *a, struct tar *tar,
  1692. struct archive_entry *entry, const char *key, const char *value, size_t value_length)
  1693. {
  1694. int64_t s;
  1695. long n;
  1696. int err = ARCHIVE_OK, r;
  1697. if (value == NULL)
  1698. value = ""; /* Disable compiler warning; do not pass
  1699. * NULL pointer to strlen(). */
  1700. switch (key[0]) {
  1701. case 'G':
  1702. /* Reject GNU.sparse.* headers on non-regular files. */
  1703. if (strncmp(key, "GNU.sparse", 10) == 0 &&
  1704. !tar->sparse_allowed) {
  1705. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  1706. "Non-regular file cannot be sparse");
  1707. return (ARCHIVE_FATAL);
  1708. }
  1709. /* GNU "0.0" sparse pax format. */
  1710. if (strcmp(key, "GNU.sparse.numblocks") == 0) {
  1711. tar->sparse_offset = -1;
  1712. tar->sparse_numbytes = -1;
  1713. tar->sparse_gnu_major = 0;
  1714. tar->sparse_gnu_minor = 0;
  1715. }
  1716. if (strcmp(key, "GNU.sparse.offset") == 0) {
  1717. tar->sparse_offset = tar_atol10(value, strlen(value));
  1718. if (tar->sparse_numbytes != -1) {
  1719. if (gnu_add_sparse_entry(a, tar,
  1720. tar->sparse_offset, tar->sparse_numbytes)
  1721. != ARCHIVE_OK)
  1722. return (ARCHIVE_FATAL);
  1723. tar->sparse_offset = -1;
  1724. tar->sparse_numbytes = -1;
  1725. }
  1726. }
  1727. if (strcmp(key, "GNU.sparse.numbytes") == 0) {
  1728. tar->sparse_numbytes = tar_atol10(value, strlen(value));
  1729. if (tar->sparse_numbytes != -1) {
  1730. if (gnu_add_sparse_entry(a, tar,
  1731. tar->sparse_offset, tar->sparse_numbytes)
  1732. != ARCHIVE_OK)
  1733. return (ARCHIVE_FATAL);
  1734. tar->sparse_offset = -1;
  1735. tar->sparse_numbytes = -1;
  1736. }
  1737. }
  1738. if (strcmp(key, "GNU.sparse.size") == 0) {
  1739. tar->realsize = tar_atol10(value, strlen(value));
  1740. archive_entry_set_size(entry, tar->realsize);
  1741. tar->realsize_override = 1;
  1742. }
  1743. /* GNU "0.1" sparse pax format. */
  1744. if (strcmp(key, "GNU.sparse.map") == 0) {
  1745. tar->sparse_gnu_major = 0;
  1746. tar->sparse_gnu_minor = 1;
  1747. if (gnu_sparse_01_parse(a, tar, value) != ARCHIVE_OK)
  1748. return (ARCHIVE_WARN);
  1749. }
  1750. /* GNU "1.0" sparse pax format */
  1751. if (strcmp(key, "GNU.sparse.major") == 0) {
  1752. tar->sparse_gnu_major = (int)tar_atol10(value, strlen(value));
  1753. tar->sparse_gnu_pending = 1;
  1754. }
  1755. if (strcmp(key, "GNU.sparse.minor") == 0) {
  1756. tar->sparse_gnu_minor = (int)tar_atol10(value, strlen(value));
  1757. tar->sparse_gnu_pending = 1;
  1758. }
  1759. if (strcmp(key, "GNU.sparse.name") == 0) {
  1760. /*
  1761. * The real filename; when storing sparse
  1762. * files, GNU tar puts a synthesized name into
  1763. * the regular 'path' attribute in an attempt
  1764. * to limit confusion. ;-)
  1765. */
  1766. archive_strcpy(&(tar->entry_pathname_override), value);
  1767. }
  1768. if (strcmp(key, "GNU.sparse.realsize") == 0) {
  1769. tar->realsize = tar_atol10(value, strlen(value));
  1770. archive_entry_set_size(entry, tar->realsize);
  1771. tar->realsize_override = 1;
  1772. }
  1773. break;
  1774. case 'L':
  1775. /* Our extensions */
  1776. /* TODO: Handle arbitrary extended attributes... */
  1777. /*
  1778. if (strcmp(key, "LIBARCHIVE.xxxxxxx") == 0)
  1779. archive_entry_set_xxxxxx(entry, value);
  1780. */
  1781. if (strcmp(key, "LIBARCHIVE.creationtime") == 0) {
  1782. pax_time(value, &s, &n);
  1783. archive_entry_set_birthtime(entry, s, n);
  1784. }
  1785. if (strcmp(key, "LIBARCHIVE.symlinktype") == 0) {
  1786. if (strcmp(value, "file") == 0) {
  1787. archive_entry_set_symlink_type(entry,
  1788. AE_SYMLINK_TYPE_FILE);
  1789. } else if (strcmp(value, "dir") == 0) {
  1790. archive_entry_set_symlink_type(entry,
  1791. AE_SYMLINK_TYPE_DIRECTORY);
  1792. }
  1793. }
  1794. if (memcmp(key, "LIBARCHIVE.xattr.", 17) == 0)
  1795. pax_attribute_xattr(entry, key, value);
  1796. break;
  1797. case 'S':
  1798. /* We support some keys used by the "star" archiver */
  1799. if (strcmp(key, "SCHILY.acl.access") == 0) {
  1800. r = pax_attribute_acl(a, tar, entry, value,
  1801. ARCHIVE_ENTRY_ACL_TYPE_ACCESS);
  1802. if (r == ARCHIVE_FATAL)
  1803. return (r);
  1804. } else if (strcmp(key, "SCHILY.acl.default") == 0) {
  1805. r = pax_attribute_acl(a, tar, entry, value,
  1806. ARCHIVE_ENTRY_ACL_TYPE_DEFAULT);
  1807. if (r == ARCHIVE_FATAL)
  1808. return (r);
  1809. } else if (strcmp(key, "SCHILY.acl.ace") == 0) {
  1810. r = pax_attribute_acl(a, tar, entry, value,
  1811. ARCHIVE_ENTRY_ACL_TYPE_NFS4);
  1812. if (r == ARCHIVE_FATAL)
  1813. return (r);
  1814. } else if (strcmp(key, "SCHILY.devmajor") == 0) {
  1815. archive_entry_set_rdevmajor(entry,
  1816. (dev_t)tar_atol10(value, strlen(value)));
  1817. } else if (strcmp(key, "SCHILY.devminor") == 0) {
  1818. archive_entry_set_rdevminor(entry,
  1819. (dev_t)tar_atol10(value, strlen(value)));
  1820. } else if (strcmp(key, "SCHILY.fflags") == 0) {
  1821. archive_entry_copy_fflags_text(entry, value);
  1822. } else if (strcmp(key, "SCHILY.dev") == 0) {
  1823. archive_entry_set_dev(entry,
  1824. (dev_t)tar_atol10(value, strlen(value)));
  1825. } else if (strcmp(key, "SCHILY.ino") == 0) {
  1826. archive_entry_set_ino(entry,
  1827. tar_atol10(value, strlen(value)));
  1828. } else if (strcmp(key, "SCHILY.nlink") == 0) {
  1829. archive_entry_set_nlink(entry, (unsigned)
  1830. tar_atol10(value, strlen(value)));
  1831. } else if (strcmp(key, "SCHILY.realsize") == 0) {
  1832. tar->realsize = tar_atol10(value, strlen(value));
  1833. tar->realsize_override = 1;
  1834. archive_entry_set_size(entry, tar->realsize);
  1835. } else if (strncmp(key, "SCHILY.xattr.", 13) == 0) {
  1836. pax_attribute_schily_xattr(entry, key, value,
  1837. value_length);
  1838. } else if (strcmp(key, "SUN.holesdata") == 0) {
  1839. /* A Solaris extension for sparse. */
  1840. r = solaris_sparse_parse(a, tar, entry, value);
  1841. if (r < err) {
  1842. if (r == ARCHIVE_FATAL)
  1843. return (r);
  1844. err = r;
  1845. archive_set_error(&a->archive,
  1846. ARCHIVE_ERRNO_MISC,
  1847. "Parse error: SUN.holesdata");
  1848. }
  1849. }
  1850. break;
  1851. case 'a':
  1852. if (strcmp(key, "atime") == 0) {
  1853. pax_time(value, &s, &n);
  1854. archive_entry_set_atime(entry, s, n);
  1855. }
  1856. break;
  1857. case 'c':
  1858. if (strcmp(key, "ctime") == 0) {
  1859. pax_time(value, &s, &n);
  1860. archive_entry_set_ctime(entry, s, n);
  1861. } else if (strcmp(key, "charset") == 0) {
  1862. /* TODO: Publish charset information in entry. */
  1863. } else if (strcmp(key, "comment") == 0) {
  1864. /* TODO: Publish comment in entry. */
  1865. }
  1866. break;
  1867. case 'g':
  1868. if (strcmp(key, "gid") == 0) {
  1869. archive_entry_set_gid(entry,
  1870. tar_atol10(value, strlen(value)));
  1871. } else if (strcmp(key, "gname") == 0) {
  1872. archive_strcpy(&(tar->entry_gname), value);
  1873. }
  1874. break;
  1875. case 'h':
  1876. if (strcmp(key, "hdrcharset") == 0) {
  1877. if (strcmp(value, "BINARY") == 0)
  1878. /* Binary mode. */
  1879. tar->pax_hdrcharset_binary = 1;
  1880. else if (strcmp(value, "ISO-IR 10646 2000 UTF-8") == 0)
  1881. tar->pax_hdrcharset_binary = 0;
  1882. }
  1883. break;
  1884. case 'l':
  1885. /* pax interchange doesn't distinguish hardlink vs. symlink. */
  1886. if (strcmp(key, "linkpath") == 0) {
  1887. archive_strcpy(&(tar->entry_linkpath), value);
  1888. }
  1889. break;
  1890. case 'm':
  1891. if (strcmp(key, "mtime") == 0) {
  1892. pax_time(value, &s, &n);
  1893. archive_entry_set_mtime(entry, s, n);
  1894. }
  1895. break;
  1896. case 'p':
  1897. if (strcmp(key, "path") == 0) {
  1898. archive_strcpy(&(tar->entry_pathname), value);
  1899. }
  1900. break;
  1901. case 'r':
  1902. /* POSIX has reserved 'realtime.*' */
  1903. break;
  1904. case 's':
  1905. /* POSIX has reserved 'security.*' */
  1906. /* Someday: if (strcmp(key, "security.acl") == 0) { ... } */
  1907. if (strcmp(key, "size") == 0) {
  1908. /* "size" is the size of the data in the entry. */
  1909. tar->entry_bytes_remaining
  1910. = tar_atol10(value, strlen(value));
  1911. /*
  1912. * The "size" pax header keyword always overrides the
  1913. * "size" field in the tar header.
  1914. * GNU.sparse.realsize, GNU.sparse.size and
  1915. * SCHILY.realsize override this value.
  1916. */
  1917. if (!tar->realsize_override) {
  1918. archive_entry_set_size(entry,
  1919. tar->entry_bytes_remaining);
  1920. tar->realsize
  1921. = tar->entry_bytes_remaining;
  1922. }
  1923. }
  1924. break;
  1925. case 'u':
  1926. if (strcmp(key, "uid") == 0) {
  1927. archive_entry_set_uid(entry,
  1928. tar_atol10(value, strlen(value)));
  1929. } else if (strcmp(key, "uname") == 0) {
  1930. archive_strcpy(&(tar->entry_uname), value);
  1931. }
  1932. break;
  1933. }
  1934. return (err);
  1935. }
  1936. /*
  1937. * parse a decimal time value, which may include a fractional portion
  1938. */
  1939. static void
  1940. pax_time(const char *p, int64_t *ps, long *pn)
  1941. {
  1942. char digit;
  1943. int64_t s;
  1944. unsigned long l;
  1945. int sign;
  1946. int64_t limit, last_digit_limit;
  1947. limit = INT64_MAX / 10;
  1948. last_digit_limit = INT64_MAX % 10;
  1949. s = 0;
  1950. sign = 1;
  1951. if (*p == '-') {
  1952. sign = -1;
  1953. p++;
  1954. }
  1955. while (*p >= '0' && *p <= '9') {
  1956. digit = *p - '0';
  1957. if (s > limit ||
  1958. (s == limit && digit > last_digit_limit)) {
  1959. s = INT64_MAX;
  1960. break;
  1961. }
  1962. s = (s * 10) + digit;
  1963. ++p;
  1964. }
  1965. *ps = s * sign;
  1966. /* Calculate nanoseconds. */
  1967. *pn = 0;
  1968. if (*p != '.')
  1969. return;
  1970. l = 100000000UL;
  1971. do {
  1972. ++p;
  1973. if (*p >= '0' && *p <= '9')
  1974. *pn += (*p - '0') * l;
  1975. else
  1976. break;
  1977. } while (l /= 10);
  1978. }
  1979. /*
  1980. * Parse GNU tar header
  1981. */
  1982. static int
  1983. header_gnutar(struct archive_read *a, struct tar *tar,
  1984. struct archive_entry *entry, const void *h, size_t *unconsumed)
  1985. {
  1986. const struct archive_entry_header_gnutar *header;
  1987. int64_t t;
  1988. int err = ARCHIVE_OK;
  1989. /*
  1990. * GNU header is like POSIX ustar, except 'prefix' is
  1991. * replaced with some other fields. This also means the
  1992. * filename is stored as in old-style archives.
  1993. */
  1994. /* Grab fields common to all tar variants. */
  1995. err = header_common(a, tar, entry, h);
  1996. if (err == ARCHIVE_FATAL)
  1997. return (err);
  1998. /* Copy filename over (to ensure null termination). */
  1999. header = (const struct archive_entry_header_gnutar *)h;
  2000. if (archive_entry_copy_pathname_l(entry,
  2001. header->name, sizeof(header->name), tar->sconv) != 0) {
  2002. err = set_conversion_failed_error(a, tar->sconv, "Pathname");
  2003. if (err == ARCHIVE_FATAL)
  2004. return (err);
  2005. }
  2006. /* Fields common to ustar and GNU */
  2007. /* XXX Can the following be factored out since it's common
  2008. * to ustar and gnu tar? Is it okay to move it down into
  2009. * header_common, perhaps? */
  2010. if (archive_entry_copy_uname_l(entry,
  2011. header->uname, sizeof(header->uname), tar->sconv) != 0) {
  2012. err = set_conversion_failed_error(a, tar->sconv, "Uname");
  2013. if (err == ARCHIVE_FATAL)
  2014. return (err);
  2015. }
  2016. if (archive_entry_copy_gname_l(entry,
  2017. header->gname, sizeof(header->gname), tar->sconv) != 0) {
  2018. err = set_conversion_failed_error(a, tar->sconv, "Gname");
  2019. if (err == ARCHIVE_FATAL)
  2020. return (err);
  2021. }
  2022. /* Parse out device numbers only for char and block specials */
  2023. if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
  2024. archive_entry_set_rdevmajor(entry, (dev_t)
  2025. tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
  2026. archive_entry_set_rdevminor(entry, (dev_t)
  2027. tar_atol(header->rdevminor, sizeof(header->rdevminor)));
  2028. } else
  2029. archive_entry_set_rdev(entry, 0);
  2030. tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
  2031. /* Grab GNU-specific fields. */
  2032. t = tar_atol(header->atime, sizeof(header->atime));
  2033. if (t > 0)
  2034. archive_entry_set_atime(entry, t, 0);
  2035. t = tar_atol(header->ctime, sizeof(header->ctime));
  2036. if (t > 0)
  2037. archive_entry_set_ctime(entry, t, 0);
  2038. if (header->realsize[0] != 0) {
  2039. tar->realsize
  2040. = tar_atol(header->realsize, sizeof(header->realsize));
  2041. archive_entry_set_size(entry, tar->realsize);
  2042. tar->realsize_override = 1;
  2043. }
  2044. if (header->sparse[0].offset[0] != 0) {
  2045. if (gnu_sparse_old_read(a, tar, header, unconsumed)
  2046. != ARCHIVE_OK)
  2047. return (ARCHIVE_FATAL);
  2048. } else {
  2049. if (header->isextended[0] != 0) {
  2050. /* XXX WTF? XXX */
  2051. }
  2052. }
  2053. return (err);
  2054. }
  2055. static int
  2056. gnu_add_sparse_entry(struct archive_read *a, struct tar *tar,
  2057. int64_t offset, int64_t remaining)
  2058. {
  2059. struct sparse_block *p;
  2060. p = (struct sparse_block *)calloc(1, sizeof(*p));
  2061. if (p == NULL) {
  2062. archive_set_error(&a->archive, ENOMEM, "Out of memory");
  2063. return (ARCHIVE_FATAL);
  2064. }
  2065. if (tar->sparse_last != NULL)
  2066. tar->sparse_last->next = p;
  2067. else
  2068. tar->sparse_list = p;
  2069. tar->sparse_last = p;
  2070. if (remaining < 0 || offset < 0 || offset > INT64_MAX - remaining) {
  2071. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed sparse map data");
  2072. return (ARCHIVE_FATAL);
  2073. }
  2074. p->offset = offset;
  2075. p->remaining = remaining;
  2076. return (ARCHIVE_OK);
  2077. }
  2078. static void
  2079. gnu_clear_sparse_list(struct tar *tar)
  2080. {
  2081. struct sparse_block *p;
  2082. while (tar->sparse_list != NULL) {
  2083. p = tar->sparse_list;
  2084. tar->sparse_list = p->next;
  2085. free(p);
  2086. }
  2087. tar->sparse_last = NULL;
  2088. }
  2089. /*
  2090. * GNU tar old-format sparse data.
  2091. *
  2092. * GNU old-format sparse data is stored in a fixed-field
  2093. * format. Offset/size values are 11-byte octal fields (same
  2094. * format as 'size' field in ustart header). These are
  2095. * stored in the header, allocating subsequent header blocks
  2096. * as needed. Extending the header in this way is a pretty
  2097. * severe POSIX violation; this design has earned GNU tar a
  2098. * lot of criticism.
  2099. */
  2100. static int
  2101. gnu_sparse_old_read(struct archive_read *a, struct tar *tar,
  2102. const struct archive_entry_header_gnutar *header, size_t *unconsumed)
  2103. {
  2104. ssize_t bytes_read;
  2105. const void *data;
  2106. struct extended {
  2107. struct gnu_sparse sparse[21];
  2108. char isextended[1];
  2109. char padding[7];
  2110. };
  2111. const struct extended *ext;
  2112. if (gnu_sparse_old_parse(a, tar, header->sparse, 4) != ARCHIVE_OK)
  2113. return (ARCHIVE_FATAL);
  2114. if (header->isextended[0] == 0)
  2115. return (ARCHIVE_OK);
  2116. do {
  2117. tar_flush_unconsumed(a, unconsumed);
  2118. data = __archive_read_ahead(a, 512, &bytes_read);
  2119. if (bytes_read < 0)
  2120. return (ARCHIVE_FATAL);
  2121. if (bytes_read < 512) {
  2122. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  2123. "Truncated tar archive "
  2124. "detected while reading sparse file data");
  2125. return (ARCHIVE_FATAL);
  2126. }
  2127. *unconsumed = 512;
  2128. ext = (const struct extended *)data;
  2129. if (gnu_sparse_old_parse(a, tar, ext->sparse, 21) != ARCHIVE_OK)
  2130. return (ARCHIVE_FATAL);
  2131. } while (ext->isextended[0] != 0);
  2132. if (tar->sparse_list != NULL)
  2133. tar->entry_offset = tar->sparse_list->offset;
  2134. return (ARCHIVE_OK);
  2135. }
  2136. static int
  2137. gnu_sparse_old_parse(struct archive_read *a, struct tar *tar,
  2138. const struct gnu_sparse *sparse, int length)
  2139. {
  2140. while (length > 0 && sparse->offset[0] != 0) {
  2141. if (gnu_add_sparse_entry(a, tar,
  2142. tar_atol(sparse->offset, sizeof(sparse->offset)),
  2143. tar_atol(sparse->numbytes, sizeof(sparse->numbytes)))
  2144. != ARCHIVE_OK)
  2145. return (ARCHIVE_FATAL);
  2146. sparse++;
  2147. length--;
  2148. }
  2149. return (ARCHIVE_OK);
  2150. }
  2151. /*
  2152. * GNU tar sparse format 0.0
  2153. *
  2154. * Beginning with GNU tar 1.15, sparse files are stored using
  2155. * information in the pax extended header. The GNU tar maintainers
  2156. * have gone through a number of variations in the process of working
  2157. * out this scheme; fortunately, they're all numbered.
  2158. *
  2159. * Sparse format 0.0 uses attribute GNU.sparse.numblocks to store the
  2160. * number of blocks, and GNU.sparse.offset/GNU.sparse.numbytes to
  2161. * store offset/size for each block. The repeated instances of these
  2162. * latter fields violate the pax specification (which frowns on
  2163. * duplicate keys), so this format was quickly replaced.
  2164. */
  2165. /*
  2166. * GNU tar sparse format 0.1
  2167. *
  2168. * This version replaced the offset/numbytes attributes with
  2169. * a single "map" attribute that stored a list of integers. This
  2170. * format had two problems: First, the "map" attribute could be very
  2171. * long, which caused problems for some implementations. More
  2172. * importantly, the sparse data was lost when extracted by archivers
  2173. * that didn't recognize this extension.
  2174. */
  2175. static int
  2176. gnu_sparse_01_parse(struct archive_read *a, struct tar *tar, const char *p)
  2177. {
  2178. const char *e;
  2179. int64_t offset = -1, size = -1;
  2180. for (;;) {
  2181. e = p;
  2182. while (*e != '\0' && *e != ',') {
  2183. if (*e < '0' || *e > '9')
  2184. return (ARCHIVE_WARN);
  2185. e++;
  2186. }
  2187. if (offset < 0) {
  2188. offset = tar_atol10(p, e - p);
  2189. if (offset < 0)
  2190. return (ARCHIVE_WARN);
  2191. } else {
  2192. size = tar_atol10(p, e - p);
  2193. if (size < 0)
  2194. return (ARCHIVE_WARN);
  2195. if (gnu_add_sparse_entry(a, tar, offset, size)
  2196. != ARCHIVE_OK)
  2197. return (ARCHIVE_FATAL);
  2198. offset = -1;
  2199. }
  2200. if (*e == '\0')
  2201. return (ARCHIVE_OK);
  2202. p = e + 1;
  2203. }
  2204. }
  2205. /*
  2206. * GNU tar sparse format 1.0
  2207. *
  2208. * The idea: The offset/size data is stored as a series of base-10
  2209. * ASCII numbers prepended to the file data, so that dearchivers that
  2210. * don't support this format will extract the block map along with the
  2211. * data and a separate post-process can restore the sparseness.
  2212. *
  2213. * Unfortunately, GNU tar 1.16 had a bug that added unnecessary
  2214. * padding to the body of the file when using this format. GNU tar
  2215. * 1.17 corrected this bug without bumping the version number, so
  2216. * it's not possible to support both variants. This code supports
  2217. * the later variant at the expense of not supporting the former.
  2218. *
  2219. * This variant also replaced GNU.sparse.size with GNU.sparse.realsize
  2220. * and introduced the GNU.sparse.major/GNU.sparse.minor attributes.
  2221. */
  2222. /*
  2223. * Read the next line from the input, and parse it as a decimal
  2224. * integer followed by '\n'. Returns positive integer value or
  2225. * negative on error.
  2226. */
  2227. static int64_t
  2228. gnu_sparse_10_atol(struct archive_read *a, struct tar *tar,
  2229. int64_t *remaining, size_t *unconsumed)
  2230. {
  2231. int64_t l, limit, last_digit_limit;
  2232. const char *p;
  2233. ssize_t bytes_read;
  2234. int base, digit;
  2235. base = 10;
  2236. limit = INT64_MAX / base;
  2237. last_digit_limit = INT64_MAX % base;
  2238. /*
  2239. * Skip any lines starting with '#'; GNU tar specs
  2240. * don't require this, but they should.
  2241. */
  2242. do {
  2243. bytes_read = readline(a, tar, &p,
  2244. (ssize_t)tar_min(*remaining, 100), unconsumed);
  2245. if (bytes_read <= 0)
  2246. return (ARCHIVE_FATAL);
  2247. *remaining -= bytes_read;
  2248. } while (p[0] == '#');
  2249. l = 0;
  2250. while (bytes_read > 0) {
  2251. if (*p == '\n')
  2252. return (l);
  2253. if (*p < '0' || *p >= '0' + base)
  2254. return (ARCHIVE_WARN);
  2255. digit = *p - '0';
  2256. if (l > limit || (l == limit && digit > last_digit_limit))
  2257. l = INT64_MAX; /* Truncate on overflow. */
  2258. else
  2259. l = (l * base) + digit;
  2260. p++;
  2261. bytes_read--;
  2262. }
  2263. /* TODO: Error message. */
  2264. return (ARCHIVE_WARN);
  2265. }
  2266. /*
  2267. * Returns length (in bytes) of the sparse data description
  2268. * that was read.
  2269. */
  2270. static ssize_t
  2271. gnu_sparse_10_read(struct archive_read *a, struct tar *tar, size_t *unconsumed)
  2272. {
  2273. ssize_t bytes_read;
  2274. int entries;
  2275. int64_t offset, size, to_skip, remaining;
  2276. /* Clear out the existing sparse list. */
  2277. gnu_clear_sparse_list(tar);
  2278. remaining = tar->entry_bytes_remaining;
  2279. /* Parse entries. */
  2280. entries = (int)gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
  2281. if (entries < 0)
  2282. return (ARCHIVE_FATAL);
  2283. /* Parse the individual entries. */
  2284. while (entries-- > 0) {
  2285. /* Parse offset/size */
  2286. offset = gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
  2287. if (offset < 0)
  2288. return (ARCHIVE_FATAL);
  2289. size = gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
  2290. if (size < 0)
  2291. return (ARCHIVE_FATAL);
  2292. /* Add a new sparse entry. */
  2293. if (gnu_add_sparse_entry(a, tar, offset, size) != ARCHIVE_OK)
  2294. return (ARCHIVE_FATAL);
  2295. }
  2296. /* Skip rest of block... */
  2297. tar_flush_unconsumed(a, unconsumed);
  2298. bytes_read = (ssize_t)(tar->entry_bytes_remaining - remaining);
  2299. to_skip = 0x1ff & -bytes_read;
  2300. /* Fail if tar->entry_bytes_remaing would get negative */
  2301. if (to_skip > remaining)
  2302. return (ARCHIVE_FATAL);
  2303. if (to_skip != __archive_read_consume(a, to_skip))
  2304. return (ARCHIVE_FATAL);
  2305. return ((ssize_t)(bytes_read + to_skip));
  2306. }
  2307. /*
  2308. * Solaris pax extension for a sparse file. This is recorded with the
  2309. * data and hole pairs. The way recording sparse information by Solaris'
  2310. * pax simply indicates where data and sparse are, so the stored contents
  2311. * consist of both data and hole.
  2312. */
  2313. static int
  2314. solaris_sparse_parse(struct archive_read *a, struct tar *tar,
  2315. struct archive_entry *entry, const char *p)
  2316. {
  2317. const char *e;
  2318. int64_t start, end;
  2319. int hole = 1;
  2320. (void)entry; /* UNUSED */
  2321. end = 0;
  2322. if (*p == ' ')
  2323. p++;
  2324. else
  2325. return (ARCHIVE_WARN);
  2326. for (;;) {
  2327. e = p;
  2328. while (*e != '\0' && *e != ' ') {
  2329. if (*e < '0' || *e > '9')
  2330. return (ARCHIVE_WARN);
  2331. e++;
  2332. }
  2333. start = end;
  2334. end = tar_atol10(p, e - p);
  2335. if (end < 0)
  2336. return (ARCHIVE_WARN);
  2337. if (start < end) {
  2338. if (gnu_add_sparse_entry(a, tar, start,
  2339. end - start) != ARCHIVE_OK)
  2340. return (ARCHIVE_FATAL);
  2341. tar->sparse_last->hole = hole;
  2342. }
  2343. if (*e == '\0')
  2344. return (ARCHIVE_OK);
  2345. p = e + 1;
  2346. hole = hole == 0;
  2347. }
  2348. }
  2349. /*-
  2350. * Convert text->integer.
  2351. *
  2352. * Traditional tar formats (including POSIX) specify base-8 for
  2353. * all of the standard numeric fields. This is a significant limitation
  2354. * in practice:
  2355. * = file size is limited to 8GB
  2356. * = rdevmajor and rdevminor are limited to 21 bits
  2357. * = uid/gid are limited to 21 bits
  2358. *
  2359. * There are two workarounds for this:
  2360. * = pax extended headers, which use variable-length string fields
  2361. * = GNU tar and STAR both allow either base-8 or base-256 in
  2362. * most fields. The high bit is set to indicate base-256.
  2363. *
  2364. * On read, this implementation supports both extensions.
  2365. */
  2366. static int64_t
  2367. tar_atol(const char *p, size_t char_cnt)
  2368. {
  2369. /*
  2370. * Technically, GNU tar considers a field to be in base-256
  2371. * only if the first byte is 0xff or 0x80.
  2372. */
  2373. if (*p & 0x80)
  2374. return (tar_atol256(p, char_cnt));
  2375. return (tar_atol8(p, char_cnt));
  2376. }
  2377. /*
  2378. * Note that this implementation does not (and should not!) obey
  2379. * locale settings; you cannot simply substitute strtol here, since
  2380. * it does obey locale.
  2381. */
  2382. static int64_t
  2383. tar_atol_base_n(const char *p, size_t char_cnt, int base)
  2384. {
  2385. int64_t l, maxval, limit, last_digit_limit;
  2386. int digit, sign;
  2387. maxval = INT64_MAX;
  2388. limit = INT64_MAX / base;
  2389. last_digit_limit = INT64_MAX % base;
  2390. /* the pointer will not be dereferenced if char_cnt is zero
  2391. * due to the way the && operator is evaluated.
  2392. */
  2393. while (char_cnt != 0 && (*p == ' ' || *p == '\t')) {
  2394. p++;
  2395. char_cnt--;
  2396. }
  2397. sign = 1;
  2398. if (char_cnt != 0 && *p == '-') {
  2399. sign = -1;
  2400. p++;
  2401. char_cnt--;
  2402. maxval = INT64_MIN;
  2403. limit = -(INT64_MIN / base);
  2404. last_digit_limit = INT64_MIN % base;
  2405. }
  2406. l = 0;
  2407. if (char_cnt != 0) {
  2408. digit = *p - '0';
  2409. while (digit >= 0 && digit < base && char_cnt != 0) {
  2410. if (l>limit || (l == limit && digit > last_digit_limit)) {
  2411. return maxval; /* Truncate on overflow. */
  2412. }
  2413. l = (l * base) + digit;
  2414. digit = *++p - '0';
  2415. char_cnt--;
  2416. }
  2417. }
  2418. return (sign < 0) ? -l : l;
  2419. }
  2420. static int64_t
  2421. tar_atol8(const char *p, size_t char_cnt)
  2422. {
  2423. return tar_atol_base_n(p, char_cnt, 8);
  2424. }
  2425. static int64_t
  2426. tar_atol10(const char *p, size_t char_cnt)
  2427. {
  2428. return tar_atol_base_n(p, char_cnt, 10);
  2429. }
  2430. /*
  2431. * Parse a base-256 integer. This is just a variable-length
  2432. * twos-complement signed binary value in big-endian order, except
  2433. * that the high-order bit is ignored. The values here can be up to
  2434. * 12 bytes, so we need to be careful about overflowing 64-bit
  2435. * (8-byte) integers.
  2436. *
  2437. * This code unashamedly assumes that the local machine uses 8-bit
  2438. * bytes and twos-complement arithmetic.
  2439. */
  2440. static int64_t
  2441. tar_atol256(const char *_p, size_t char_cnt)
  2442. {
  2443. uint64_t l;
  2444. const unsigned char *p = (const unsigned char *)_p;
  2445. unsigned char c, neg;
  2446. /* Extend 7-bit 2s-comp to 8-bit 2s-comp, decide sign. */
  2447. c = *p;
  2448. if (c & 0x40) {
  2449. neg = 0xff;
  2450. c |= 0x80;
  2451. l = ~ARCHIVE_LITERAL_ULL(0);
  2452. } else {
  2453. neg = 0;
  2454. c &= 0x7f;
  2455. l = 0;
  2456. }
  2457. /* If more than 8 bytes, check that we can ignore
  2458. * high-order bits without overflow. */
  2459. while (char_cnt > sizeof(int64_t)) {
  2460. --char_cnt;
  2461. if (c != neg)
  2462. return neg ? INT64_MIN : INT64_MAX;
  2463. c = *++p;
  2464. }
  2465. /* c is first byte that fits; if sign mismatch, return overflow */
  2466. if ((c ^ neg) & 0x80) {
  2467. return neg ? INT64_MIN : INT64_MAX;
  2468. }
  2469. /* Accumulate remaining bytes. */
  2470. while (--char_cnt > 0) {
  2471. l = (l << 8) | c;
  2472. c = *++p;
  2473. }
  2474. l = (l << 8) | c;
  2475. /* Return signed twos-complement value. */
  2476. return (int64_t)(l);
  2477. }
  2478. /*
  2479. * Returns length of line (including trailing newline)
  2480. * or negative on error. 'start' argument is updated to
  2481. * point to first character of line. This avoids copying
  2482. * when possible.
  2483. */
  2484. static ssize_t
  2485. readline(struct archive_read *a, struct tar *tar, const char **start,
  2486. ssize_t limit, size_t *unconsumed)
  2487. {
  2488. ssize_t bytes_read;
  2489. ssize_t total_size = 0;
  2490. const void *t;
  2491. const char *s;
  2492. void *p;
  2493. tar_flush_unconsumed(a, unconsumed);
  2494. t = __archive_read_ahead(a, 1, &bytes_read);
  2495. if (bytes_read <= 0)
  2496. return (ARCHIVE_FATAL);
  2497. s = t; /* Start of line? */
  2498. p = memchr(t, '\n', bytes_read);
  2499. /* If we found '\n' in the read buffer, return pointer to that. */
  2500. if (p != NULL) {
  2501. bytes_read = 1 + ((const char *)p) - s;
  2502. if (bytes_read > limit) {
  2503. archive_set_error(&a->archive,
  2504. ARCHIVE_ERRNO_FILE_FORMAT,
  2505. "Line too long");
  2506. return (ARCHIVE_FATAL);
  2507. }
  2508. *unconsumed = bytes_read;
  2509. *start = s;
  2510. return (bytes_read);
  2511. }
  2512. *unconsumed = bytes_read;
  2513. /* Otherwise, we need to accumulate in a line buffer. */
  2514. for (;;) {
  2515. if (total_size + bytes_read > limit) {
  2516. archive_set_error(&a->archive,
  2517. ARCHIVE_ERRNO_FILE_FORMAT,
  2518. "Line too long");
  2519. return (ARCHIVE_FATAL);
  2520. }
  2521. if (archive_string_ensure(&tar->line, total_size + bytes_read) == NULL) {
  2522. archive_set_error(&a->archive, ENOMEM,
  2523. "Can't allocate working buffer");
  2524. return (ARCHIVE_FATAL);
  2525. }
  2526. memcpy(tar->line.s + total_size, t, bytes_read);
  2527. tar_flush_unconsumed(a, unconsumed);
  2528. total_size += bytes_read;
  2529. /* If we found '\n', clean up and return. */
  2530. if (p != NULL) {
  2531. *start = tar->line.s;
  2532. return (total_size);
  2533. }
  2534. /* Read some more. */
  2535. t = __archive_read_ahead(a, 1, &bytes_read);
  2536. if (bytes_read <= 0)
  2537. return (ARCHIVE_FATAL);
  2538. s = t; /* Start of line? */
  2539. p = memchr(t, '\n', bytes_read);
  2540. /* If we found '\n', trim the read. */
  2541. if (p != NULL) {
  2542. bytes_read = 1 + ((const char *)p) - s;
  2543. }
  2544. *unconsumed = bytes_read;
  2545. }
  2546. }
  2547. /*
  2548. * base64_decode - Base64 decode
  2549. *
  2550. * This accepts most variations of base-64 encoding, including:
  2551. * * with or without line breaks
  2552. * * with or without the final group padded with '=' or '_' characters
  2553. * (The most economical Base-64 variant does not pad the last group and
  2554. * omits line breaks; RFC1341 used for MIME requires both.)
  2555. */
  2556. static char *
  2557. base64_decode(const char *s, size_t len, size_t *out_len)
  2558. {
  2559. static const unsigned char digits[64] = {
  2560. 'A','B','C','D','E','F','G','H','I','J','K','L','M','N',
  2561. 'O','P','Q','R','S','T','U','V','W','X','Y','Z','a','b',
  2562. 'c','d','e','f','g','h','i','j','k','l','m','n','o','p',
  2563. 'q','r','s','t','u','v','w','x','y','z','0','1','2','3',
  2564. '4','5','6','7','8','9','+','/' };
  2565. static unsigned char decode_table[128];
  2566. char *out, *d;
  2567. const unsigned char *src = (const unsigned char *)s;
  2568. /* If the decode table is not yet initialized, prepare it. */
  2569. if (decode_table[digits[1]] != 1) {
  2570. unsigned i;
  2571. memset(decode_table, 0xff, sizeof(decode_table));
  2572. for (i = 0; i < sizeof(digits); i++)
  2573. decode_table[digits[i]] = i;
  2574. }
  2575. /* Allocate enough space to hold the entire output. */
  2576. /* Note that we may not use all of this... */
  2577. out = (char *)malloc(len - len / 4 + 1);
  2578. if (out == NULL) {
  2579. *out_len = 0;
  2580. return (NULL);
  2581. }
  2582. d = out;
  2583. while (len > 0) {
  2584. /* Collect the next group of (up to) four characters. */
  2585. int v = 0;
  2586. int group_size = 0;
  2587. while (group_size < 4 && len > 0) {
  2588. /* '=' or '_' padding indicates final group. */
  2589. if (*src == '=' || *src == '_') {
  2590. len = 0;
  2591. break;
  2592. }
  2593. /* Skip illegal characters (including line breaks) */
  2594. if (*src > 127 || *src < 32
  2595. || decode_table[*src] == 0xff) {
  2596. len--;
  2597. src++;
  2598. continue;
  2599. }
  2600. v <<= 6;
  2601. v |= decode_table[*src++];
  2602. len --;
  2603. group_size++;
  2604. }
  2605. /* Align a short group properly. */
  2606. v <<= 6 * (4 - group_size);
  2607. /* Unpack the group we just collected. */
  2608. switch (group_size) {
  2609. case 4: d[2] = v & 0xff;
  2610. /* FALLTHROUGH */
  2611. case 3: d[1] = (v >> 8) & 0xff;
  2612. /* FALLTHROUGH */
  2613. case 2: d[0] = (v >> 16) & 0xff;
  2614. break;
  2615. case 1: /* this is invalid! */
  2616. break;
  2617. }
  2618. d += group_size * 3 / 4;
  2619. }
  2620. *out_len = d - out;
  2621. return (out);
  2622. }
  2623. static char *
  2624. url_decode(const char *in)
  2625. {
  2626. char *out, *d;
  2627. const char *s;
  2628. out = (char *)malloc(strlen(in) + 1);
  2629. if (out == NULL)
  2630. return (NULL);
  2631. for (s = in, d = out; *s != '\0'; ) {
  2632. if (s[0] == '%' && s[1] != '\0' && s[2] != '\0') {
  2633. /* Try to convert % escape */
  2634. int digit1 = tohex(s[1]);
  2635. int digit2 = tohex(s[2]);
  2636. if (digit1 >= 0 && digit2 >= 0) {
  2637. /* Looks good, consume three chars */
  2638. s += 3;
  2639. /* Convert output */
  2640. *d++ = ((digit1 << 4) | digit2);
  2641. continue;
  2642. }
  2643. /* Else fall through and treat '%' as normal char */
  2644. }
  2645. *d++ = *s++;
  2646. }
  2647. *d = '\0';
  2648. return (out);
  2649. }
  2650. static int
  2651. tohex(int c)
  2652. {
  2653. if (c >= '0' && c <= '9')
  2654. return (c - '0');
  2655. else if (c >= 'A' && c <= 'F')
  2656. return (c - 'A' + 10);
  2657. else if (c >= 'a' && c <= 'f')
  2658. return (c - 'a' + 10);
  2659. else
  2660. return (-1);
  2661. }