archive_read_support_format_rar5.c 114 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347
  1. /*-
  2. * Copyright (c) 2018 Grzegorz Antoniak (http://antoniak.org)
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  15. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  16. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  17. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  18. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  19. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  20. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  21. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  22. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  23. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24. */
  25. #include "archive_platform.h"
  26. #include "archive_endian.h"
  27. #ifdef HAVE_ERRNO_H
  28. #include <errno.h>
  29. #endif
  30. #include <time.h>
  31. #ifdef HAVE_ZLIB_H
  32. #include <cm3p/zlib.h> /* crc32 */
  33. #endif
  34. #ifdef HAVE_LIMITS_H
  35. #include <limits.h>
  36. #endif
  37. #include "archive.h"
  38. #ifndef HAVE_ZLIB_H
  39. #include "archive_crc32.h"
  40. #endif
  41. #include "archive_entry.h"
  42. #include "archive_entry_locale.h"
  43. #include "archive_ppmd7_private.h"
  44. #include "archive_entry_private.h"
  45. #ifdef HAVE_BLAKE2_H
  46. #include <blake2.h>
  47. #else
  48. #include "archive_blake2.h"
  49. #endif
  50. /*#define CHECK_CRC_ON_SOLID_SKIP*/
  51. /*#define DONT_FAIL_ON_CRC_ERROR*/
  52. /*#define DEBUG*/
  53. #define rar5_min(a, b) (((a) > (b)) ? (b) : (a))
  54. #define rar5_max(a, b) (((a) > (b)) ? (a) : (b))
  55. #define rar5_countof(X) ((const ssize_t) (sizeof(X) / sizeof(*X)))
  56. #if defined DEBUG
  57. #define DEBUG_CODE if(1)
  58. #define LOG(...) do { printf("rar5: " __VA_ARGS__); puts(""); } while(0)
  59. #else
  60. #define DEBUG_CODE if(0)
  61. #endif
  62. /* Real RAR5 magic number is:
  63. *
  64. * 0x52, 0x61, 0x72, 0x21, 0x1a, 0x07, 0x01, 0x00
  65. * "Rar!→•☺·\x00"
  66. *
  67. * Retrieved with `rar5_signature()` by XOR'ing it with 0xA1, because I don't
  68. * want to put this magic sequence in each binary that uses libarchive, so
  69. * applications that scan through the file for this marker won't trigger on
  70. * this "false" one.
  71. *
  72. * The array itself is decrypted in `rar5_init` function. */
  73. static unsigned char rar5_signature_xor[] = { 243, 192, 211, 128, 187, 166, 160, 161 };
  74. static const size_t g_unpack_window_size = 0x20000;
  75. /* These could have been static const's, but they aren't, because of
  76. * Visual Studio. */
  77. #define MAX_NAME_IN_CHARS 2048
  78. #define MAX_NAME_IN_BYTES (4 * MAX_NAME_IN_CHARS)
  79. struct file_header {
  80. ssize_t bytes_remaining;
  81. ssize_t unpacked_size;
  82. int64_t last_offset; /* Used in sanity checks. */
  83. int64_t last_size; /* Used in sanity checks. */
  84. uint8_t solid : 1; /* Is this a solid stream? */
  85. uint8_t service : 1; /* Is this file a service data? */
  86. uint8_t eof : 1; /* Did we finish unpacking the file? */
  87. uint8_t dir : 1; /* Is this file entry a directory? */
  88. /* Optional time fields. */
  89. uint64_t e_mtime;
  90. uint64_t e_ctime;
  91. uint64_t e_atime;
  92. uint32_t e_unix_ns;
  93. /* Optional hash fields. */
  94. uint32_t stored_crc32;
  95. uint32_t calculated_crc32;
  96. uint8_t blake2sp[32];
  97. blake2sp_state b2state;
  98. char has_blake2;
  99. /* Optional redir fields */
  100. uint64_t redir_type;
  101. uint64_t redir_flags;
  102. ssize_t solid_window_size; /* Used in file format check. */
  103. };
  104. enum EXTRA {
  105. EX_CRYPT = 0x01,
  106. EX_HASH = 0x02,
  107. EX_HTIME = 0x03,
  108. EX_VERSION = 0x04,
  109. EX_REDIR = 0x05,
  110. EX_UOWNER = 0x06,
  111. EX_SUBDATA = 0x07
  112. };
  113. #define REDIR_SYMLINK_IS_DIR 1
  114. enum REDIR_TYPE {
  115. REDIR_TYPE_NONE = 0,
  116. REDIR_TYPE_UNIXSYMLINK = 1,
  117. REDIR_TYPE_WINSYMLINK = 2,
  118. REDIR_TYPE_JUNCTION = 3,
  119. REDIR_TYPE_HARDLINK = 4,
  120. REDIR_TYPE_FILECOPY = 5,
  121. };
  122. #define OWNER_USER_NAME 0x01
  123. #define OWNER_GROUP_NAME 0x02
  124. #define OWNER_USER_UID 0x04
  125. #define OWNER_GROUP_GID 0x08
  126. #define OWNER_MAXNAMELEN 256
  127. enum FILTER_TYPE {
  128. FILTER_DELTA = 0, /* Generic pattern. */
  129. FILTER_E8 = 1, /* Intel x86 code. */
  130. FILTER_E8E9 = 2, /* Intel x86 code. */
  131. FILTER_ARM = 3, /* ARM code. */
  132. FILTER_AUDIO = 4, /* Audio filter, not used in RARv5. */
  133. FILTER_RGB = 5, /* Color palette, not used in RARv5. */
  134. FILTER_ITANIUM = 6, /* Intel's Itanium, not used in RARv5. */
  135. FILTER_PPM = 7, /* Predictive pattern matching, not used in
  136. RARv5. */
  137. FILTER_NONE = 8,
  138. };
  139. struct filter_info {
  140. int type;
  141. int channels;
  142. int pos_r;
  143. int64_t block_start;
  144. ssize_t block_length;
  145. uint16_t width;
  146. };
  147. struct data_ready {
  148. char used;
  149. const uint8_t* buf;
  150. size_t size;
  151. int64_t offset;
  152. };
  153. struct cdeque {
  154. uint16_t beg_pos;
  155. uint16_t end_pos;
  156. uint16_t cap_mask;
  157. uint16_t size;
  158. size_t* arr;
  159. };
  160. struct decode_table {
  161. uint32_t size;
  162. int32_t decode_len[16];
  163. uint32_t decode_pos[16];
  164. uint32_t quick_bits;
  165. uint8_t quick_len[1 << 10];
  166. uint16_t quick_num[1 << 10];
  167. uint16_t decode_num[306];
  168. };
  169. struct comp_state {
  170. /* Flag used to specify if unpacker needs to reinitialize the
  171. uncompression context. */
  172. uint8_t initialized : 1;
  173. /* Flag used when applying filters. */
  174. uint8_t all_filters_applied : 1;
  175. /* Flag used to skip file context reinitialization, used when unpacker
  176. is skipping through different multivolume archives. */
  177. uint8_t switch_multivolume : 1;
  178. /* Flag used to specify if unpacker has processed the whole data block
  179. or just a part of it. */
  180. uint8_t block_parsing_finished : 1;
  181. /* Flag used to indicate that a previous file using this buffer was
  182. encrypted, meaning no data in the buffer can be trusted */
  183. uint8_t data_encrypted : 1;
  184. signed int notused : 3;
  185. int flags; /* Uncompression flags. */
  186. int method; /* Uncompression algorithm method. */
  187. int version; /* Uncompression algorithm version. */
  188. ssize_t window_size; /* Size of window_buf. */
  189. uint8_t* window_buf; /* Circular buffer used during
  190. decompression. */
  191. uint8_t* filtered_buf; /* Buffer used when applying filters. */
  192. const uint8_t* block_buf; /* Buffer used when merging blocks. */
  193. ssize_t window_mask; /* Convenience field; window_size - 1. */
  194. int64_t write_ptr; /* This amount of data has been unpacked
  195. in the window buffer. */
  196. int64_t last_write_ptr; /* This amount of data has been stored in
  197. the output file. */
  198. int64_t last_unstore_ptr; /* Counter of bytes extracted during
  199. unstoring. This is separate from
  200. last_write_ptr because of how SERVICE
  201. base blocks are handled during skipping
  202. in solid multiarchive archives. */
  203. int64_t solid_offset; /* Additional offset inside the window
  204. buffer, used in unpacking solid
  205. archives. */
  206. ssize_t cur_block_size; /* Size of current data block. */
  207. int last_len; /* Flag used in lzss decompression. */
  208. /* Decode tables used during lzss uncompression. */
  209. #define HUFF_BC 20
  210. struct decode_table bd; /* huffman bit lengths */
  211. #define HUFF_NC 306
  212. struct decode_table ld; /* literals */
  213. #define HUFF_DC 64
  214. struct decode_table dd; /* distances */
  215. #define HUFF_LDC 16
  216. struct decode_table ldd; /* lower bits of distances */
  217. #define HUFF_RC 44
  218. struct decode_table rd; /* repeating distances */
  219. #define HUFF_TABLE_SIZE (HUFF_NC + HUFF_DC + HUFF_RC + HUFF_LDC)
  220. /* Circular deque for storing filters. */
  221. struct cdeque filters;
  222. int64_t last_block_start; /* Used for sanity checking. */
  223. ssize_t last_block_length; /* Used for sanity checking. */
  224. /* Distance cache used during lzss uncompression. */
  225. int dist_cache[4];
  226. /* Data buffer stack. */
  227. struct data_ready dready[2];
  228. };
  229. /* Bit reader state. */
  230. struct bit_reader {
  231. int8_t bit_addr; /* Current bit pointer inside current byte. */
  232. int in_addr; /* Current byte pointer. */
  233. };
  234. /* RARv5 block header structure. Use bf_* functions to get values from
  235. * block_flags_u8 field. I.e. bf_byte_count, etc. */
  236. struct compressed_block_header {
  237. /* block_flags_u8 contain fields encoded in little-endian bitfield:
  238. *
  239. * - table present flag (shr 7, and 1),
  240. * - last block flag (shr 6, and 1),
  241. * - byte_count (shr 3, and 7),
  242. * - bit_size (shr 0, and 7).
  243. */
  244. uint8_t block_flags_u8;
  245. uint8_t block_cksum;
  246. };
  247. /* RARv5 main header structure. */
  248. struct main_header {
  249. /* Does the archive contain solid streams? */
  250. uint8_t solid : 1;
  251. /* If this a multi-file archive? */
  252. uint8_t volume : 1;
  253. uint8_t endarc : 1;
  254. uint8_t notused : 5;
  255. unsigned int vol_no;
  256. };
  257. struct generic_header {
  258. uint8_t split_after : 1;
  259. uint8_t split_before : 1;
  260. uint8_t padding : 6;
  261. int size;
  262. int last_header_id;
  263. };
  264. struct multivolume {
  265. unsigned int expected_vol_no;
  266. uint8_t* push_buf;
  267. };
  268. /* Main context structure. */
  269. struct rar5 {
  270. int header_initialized;
  271. /* Set to 1 if current file is positioned AFTER the magic value
  272. * of the archive file. This is used in header reading functions. */
  273. int skipped_magic;
  274. /* Set to not zero if we're in skip mode (either by calling
  275. * rar5_data_skip function or when skipping over solid streams).
  276. * Set to 0 when in * extraction mode. This is used during checksum
  277. * calculation functions. */
  278. int skip_mode;
  279. /* Set to not zero if we're in block merging mode (i.e. when switching
  280. * to another file in multivolume archive, last block from 1st archive
  281. * needs to be merged with 1st block from 2nd archive). This flag
  282. * guards against recursive use of the merging function, which doesn't
  283. * support recursive calls. */
  284. int merge_mode;
  285. /* An offset to QuickOpen list. This is not supported by this unpacker,
  286. * because we're focusing on streaming interface. QuickOpen is designed
  287. * to make things quicker for non-stream interfaces, so it's not our
  288. * use case. */
  289. uint64_t qlist_offset;
  290. /* An offset to additional Recovery data. This is not supported by this
  291. * unpacker. Recovery data are additional Reed-Solomon codes that could
  292. * be used to calculate bytes that are missing in archive or are
  293. * corrupted. */
  294. uint64_t rr_offset;
  295. /* Various context variables grouped to different structures. */
  296. struct generic_header generic;
  297. struct main_header main;
  298. struct comp_state cstate;
  299. struct file_header file;
  300. struct bit_reader bits;
  301. struct multivolume vol;
  302. /* The header of currently processed RARv5 block. Used in main
  303. * decompression logic loop. */
  304. struct compressed_block_header last_block_hdr;
  305. /*
  306. * Custom field to denote that this archive contains encrypted entries
  307. */
  308. int has_encrypted_entries;
  309. int headers_are_encrypted;
  310. };
  311. /* Forward function declarations. */
  312. static void rar5_signature(char *buf);
  313. static int verify_global_checksums(struct archive_read* a);
  314. static int rar5_read_data_skip(struct archive_read *a);
  315. static int push_data_ready(struct archive_read* a, struct rar5* rar,
  316. const uint8_t* buf, size_t size, int64_t offset);
  317. static void clear_data_ready_stack(struct rar5* rar);
  318. /* CDE_xxx = Circular Double Ended (Queue) return values. */
  319. enum CDE_RETURN_VALUES {
  320. CDE_OK, CDE_ALLOC, CDE_PARAM, CDE_OUT_OF_BOUNDS,
  321. };
  322. /* Clears the contents of this circular deque. */
  323. static void cdeque_clear(struct cdeque* d) {
  324. d->size = 0;
  325. d->beg_pos = 0;
  326. d->end_pos = 0;
  327. }
  328. /* Creates a new circular deque object. Capacity must be power of 2: 8, 16, 32,
  329. * 64, 256, etc. When the user will add another item above current capacity,
  330. * the circular deque will overwrite the oldest entry. */
  331. static int cdeque_init(struct cdeque* d, int max_capacity_power_of_2) {
  332. if(d == NULL || max_capacity_power_of_2 == 0)
  333. return CDE_PARAM;
  334. d->cap_mask = max_capacity_power_of_2 - 1;
  335. d->arr = NULL;
  336. if((max_capacity_power_of_2 & d->cap_mask) != 0)
  337. return CDE_PARAM;
  338. cdeque_clear(d);
  339. d->arr = malloc(sizeof(*d->arr) * max_capacity_power_of_2);
  340. return d->arr ? CDE_OK : CDE_ALLOC;
  341. }
  342. /* Return the current size (not capacity) of circular deque `d`. */
  343. static size_t cdeque_size(struct cdeque* d) {
  344. return d->size;
  345. }
  346. /* Returns the first element of current circular deque. Note that this function
  347. * doesn't perform any bounds checking. If you need bounds checking, use
  348. * `cdeque_front()` function instead. */
  349. static void cdeque_front_fast(struct cdeque* d, void** value) {
  350. *value = (void*) d->arr[d->beg_pos];
  351. }
  352. /* Returns the first element of current circular deque. This function
  353. * performs bounds checking. */
  354. static int cdeque_front(struct cdeque* d, void** value) {
  355. if(d->size > 0) {
  356. cdeque_front_fast(d, value);
  357. return CDE_OK;
  358. } else
  359. return CDE_OUT_OF_BOUNDS;
  360. }
  361. /* Pushes a new element into the end of this circular deque object. If current
  362. * size will exceed capacity, the oldest element will be overwritten. */
  363. static int cdeque_push_back(struct cdeque* d, void* item) {
  364. if(d == NULL)
  365. return CDE_PARAM;
  366. if(d->size == d->cap_mask + 1)
  367. return CDE_OUT_OF_BOUNDS;
  368. d->arr[d->end_pos] = (size_t) item;
  369. d->end_pos = (d->end_pos + 1) & d->cap_mask;
  370. d->size++;
  371. return CDE_OK;
  372. }
  373. /* Pops a front element of this circular deque object and returns its value.
  374. * This function doesn't perform any bounds checking. */
  375. static void cdeque_pop_front_fast(struct cdeque* d, void** value) {
  376. *value = (void*) d->arr[d->beg_pos];
  377. d->beg_pos = (d->beg_pos + 1) & d->cap_mask;
  378. d->size--;
  379. }
  380. /* Pops a front element of this circular deque object and returns its value.
  381. * This function performs bounds checking. */
  382. static int cdeque_pop_front(struct cdeque* d, void** value) {
  383. if(!d || !value)
  384. return CDE_PARAM;
  385. if(d->size == 0)
  386. return CDE_OUT_OF_BOUNDS;
  387. cdeque_pop_front_fast(d, value);
  388. return CDE_OK;
  389. }
  390. /* Convenience function to cast filter_info** to void **. */
  391. static void** cdeque_filter_p(struct filter_info** f) {
  392. return (void**) (size_t) f;
  393. }
  394. /* Convenience function to cast filter_info* to void *. */
  395. static void* cdeque_filter(struct filter_info* f) {
  396. return (void**) (size_t) f;
  397. }
  398. /* Destroys this circular deque object. Deallocates the memory of the
  399. * collection buffer, but doesn't deallocate the memory of any pointer passed
  400. * to this deque as a value. */
  401. static void cdeque_free(struct cdeque* d) {
  402. if(!d)
  403. return;
  404. if(!d->arr)
  405. return;
  406. free(d->arr);
  407. d->arr = NULL;
  408. d->beg_pos = -1;
  409. d->end_pos = -1;
  410. d->cap_mask = 0;
  411. }
  412. static inline
  413. uint8_t bf_bit_size(const struct compressed_block_header* hdr) {
  414. return hdr->block_flags_u8 & 7;
  415. }
  416. static inline
  417. uint8_t bf_byte_count(const struct compressed_block_header* hdr) {
  418. return (hdr->block_flags_u8 >> 3) & 7;
  419. }
  420. static inline
  421. uint8_t bf_is_table_present(const struct compressed_block_header* hdr) {
  422. return (hdr->block_flags_u8 >> 7) & 1;
  423. }
  424. static inline
  425. uint8_t bf_is_last_block(const struct compressed_block_header* hdr) {
  426. return (hdr->block_flags_u8 >> 6) & 1;
  427. }
  428. static inline struct rar5* get_context(struct archive_read* a) {
  429. return (struct rar5*) a->format->data;
  430. }
  431. /* Convenience functions used by filter implementations. */
  432. static void circular_memcpy(uint8_t* dst, uint8_t* window, const ssize_t mask,
  433. int64_t start, int64_t end)
  434. {
  435. if((start & mask) > (end & mask)) {
  436. ssize_t len1 = mask + 1 - (start & mask);
  437. ssize_t len2 = end & mask;
  438. memcpy(dst, &window[start & mask], len1);
  439. memcpy(dst + len1, window, len2);
  440. } else {
  441. memcpy(dst, &window[start & mask], (size_t) (end - start));
  442. }
  443. }
  444. static uint32_t read_filter_data(struct rar5* rar, uint32_t offset) {
  445. uint8_t linear_buf[4];
  446. circular_memcpy(linear_buf, rar->cstate.window_buf,
  447. rar->cstate.window_mask, offset, offset + 4);
  448. return archive_le32dec(linear_buf);
  449. }
  450. static void write_filter_data(struct rar5* rar, uint32_t offset,
  451. uint32_t value)
  452. {
  453. archive_le32enc(&rar->cstate.filtered_buf[offset], value);
  454. }
  455. /* Allocates a new filter descriptor and adds it to the filter array. */
  456. static struct filter_info* add_new_filter(struct rar5* rar) {
  457. struct filter_info* f = calloc(1, sizeof(*f));
  458. if(!f) {
  459. return NULL;
  460. }
  461. cdeque_push_back(&rar->cstate.filters, cdeque_filter(f));
  462. return f;
  463. }
  464. static int run_delta_filter(struct rar5* rar, struct filter_info* flt) {
  465. int i;
  466. ssize_t dest_pos, src_pos = 0;
  467. for(i = 0; i < flt->channels; i++) {
  468. uint8_t prev_byte = 0;
  469. for(dest_pos = i;
  470. dest_pos < flt->block_length;
  471. dest_pos += flt->channels)
  472. {
  473. uint8_t byte;
  474. byte = rar->cstate.window_buf[
  475. (rar->cstate.solid_offset + flt->block_start +
  476. src_pos) & rar->cstate.window_mask];
  477. prev_byte -= byte;
  478. rar->cstate.filtered_buf[dest_pos] = prev_byte;
  479. src_pos++;
  480. }
  481. }
  482. return ARCHIVE_OK;
  483. }
  484. static int run_e8e9_filter(struct rar5* rar, struct filter_info* flt,
  485. int extended)
  486. {
  487. const uint32_t file_size = 0x1000000;
  488. ssize_t i;
  489. circular_memcpy(rar->cstate.filtered_buf,
  490. rar->cstate.window_buf, rar->cstate.window_mask,
  491. rar->cstate.solid_offset + flt->block_start,
  492. rar->cstate.solid_offset + flt->block_start + flt->block_length);
  493. for(i = 0; i < flt->block_length - 4;) {
  494. uint8_t b = rar->cstate.window_buf[
  495. (rar->cstate.solid_offset + flt->block_start +
  496. i++) & rar->cstate.window_mask];
  497. /*
  498. * 0xE8 = x86's call <relative_addr_uint32> (function call)
  499. * 0xE9 = x86's jmp <relative_addr_uint32> (unconditional jump)
  500. */
  501. if(b == 0xE8 || (extended && b == 0xE9)) {
  502. uint32_t addr;
  503. uint32_t offset = (i + flt->block_start) % file_size;
  504. addr = read_filter_data(rar,
  505. (uint32_t)(rar->cstate.solid_offset +
  506. flt->block_start + i) & rar->cstate.window_mask);
  507. if(addr & 0x80000000) {
  508. if(((addr + offset) & 0x80000000) == 0) {
  509. write_filter_data(rar, (uint32_t)i,
  510. addr + file_size);
  511. }
  512. } else {
  513. if((addr - file_size) & 0x80000000) {
  514. uint32_t naddr = addr - offset;
  515. write_filter_data(rar, (uint32_t)i,
  516. naddr);
  517. }
  518. }
  519. i += 4;
  520. }
  521. }
  522. return ARCHIVE_OK;
  523. }
  524. static int run_arm_filter(struct rar5* rar, struct filter_info* flt) {
  525. ssize_t i = 0;
  526. uint32_t offset;
  527. circular_memcpy(rar->cstate.filtered_buf,
  528. rar->cstate.window_buf, rar->cstate.window_mask,
  529. rar->cstate.solid_offset + flt->block_start,
  530. rar->cstate.solid_offset + flt->block_start + flt->block_length);
  531. for(i = 0; i < flt->block_length - 3; i += 4) {
  532. uint8_t* b = &rar->cstate.window_buf[
  533. (rar->cstate.solid_offset +
  534. flt->block_start + i + 3) & rar->cstate.window_mask];
  535. if(*b == 0xEB) {
  536. /* 0xEB = ARM's BL (branch + link) instruction. */
  537. offset = read_filter_data(rar,
  538. (rar->cstate.solid_offset + flt->block_start + i) &
  539. (uint32_t)rar->cstate.window_mask) & 0x00ffffff;
  540. offset -= (uint32_t) ((i + flt->block_start) / 4);
  541. offset = (offset & 0x00ffffff) | 0xeb000000;
  542. write_filter_data(rar, (uint32_t)i, offset);
  543. }
  544. }
  545. return ARCHIVE_OK;
  546. }
  547. static int run_filter(struct archive_read* a, struct filter_info* flt) {
  548. int ret;
  549. struct rar5* rar = get_context(a);
  550. clear_data_ready_stack(rar);
  551. free(rar->cstate.filtered_buf);
  552. rar->cstate.filtered_buf = malloc(flt->block_length);
  553. if(!rar->cstate.filtered_buf) {
  554. archive_set_error(&a->archive, ENOMEM,
  555. "Can't allocate memory for filter data.");
  556. return ARCHIVE_FATAL;
  557. }
  558. switch(flt->type) {
  559. case FILTER_DELTA:
  560. ret = run_delta_filter(rar, flt);
  561. break;
  562. case FILTER_E8:
  563. /* fallthrough */
  564. case FILTER_E8E9:
  565. ret = run_e8e9_filter(rar, flt,
  566. flt->type == FILTER_E8E9);
  567. break;
  568. case FILTER_ARM:
  569. ret = run_arm_filter(rar, flt);
  570. break;
  571. default:
  572. archive_set_error(&a->archive,
  573. ARCHIVE_ERRNO_FILE_FORMAT,
  574. "Unsupported filter type: 0x%x",
  575. (unsigned int)flt->type);
  576. return ARCHIVE_FATAL;
  577. }
  578. if(ret != ARCHIVE_OK) {
  579. /* Filter has failed. */
  580. return ret;
  581. }
  582. if(ARCHIVE_OK != push_data_ready(a, rar, rar->cstate.filtered_buf,
  583. flt->block_length, rar->cstate.last_write_ptr))
  584. {
  585. archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
  586. "Stack overflow when submitting unpacked data");
  587. return ARCHIVE_FATAL;
  588. }
  589. rar->cstate.last_write_ptr += flt->block_length;
  590. return ARCHIVE_OK;
  591. }
  592. /* The `push_data` function submits the selected data range to the user.
  593. * Next call of `use_data` will use the pointer, size and offset arguments
  594. * that are specified here. These arguments are pushed to the FIFO stack here,
  595. * and popped from the stack by the `use_data` function. */
  596. static void push_data(struct archive_read* a, struct rar5* rar,
  597. const uint8_t* buf, int64_t idx_begin, int64_t idx_end)
  598. {
  599. const ssize_t wmask = rar->cstate.window_mask;
  600. const ssize_t solid_write_ptr = (rar->cstate.solid_offset +
  601. rar->cstate.last_write_ptr) & wmask;
  602. idx_begin += rar->cstate.solid_offset;
  603. idx_end += rar->cstate.solid_offset;
  604. /* Check if our unpacked data is wrapped inside the window circular
  605. * buffer. If it's not wrapped, it can be copied out by using
  606. * a single memcpy, but when it's wrapped, we need to copy the first
  607. * part with one memcpy, and the second part with another memcpy. */
  608. if((idx_begin & wmask) > (idx_end & wmask)) {
  609. /* The data is wrapped (begin offset sis bigger than end
  610. * offset). */
  611. const ssize_t frag1_size = rar->cstate.window_size -
  612. (idx_begin & wmask);
  613. const ssize_t frag2_size = idx_end & wmask;
  614. /* Copy the first part of the buffer first. */
  615. push_data_ready(a, rar, buf + solid_write_ptr, frag1_size,
  616. rar->cstate.last_write_ptr);
  617. /* Copy the second part of the buffer. */
  618. push_data_ready(a, rar, buf, frag2_size,
  619. rar->cstate.last_write_ptr + frag1_size);
  620. rar->cstate.last_write_ptr += frag1_size + frag2_size;
  621. } else {
  622. /* Data is not wrapped, so we can just use one call to copy the
  623. * data. */
  624. push_data_ready(a, rar,
  625. buf + solid_write_ptr, (idx_end - idx_begin) & wmask,
  626. rar->cstate.last_write_ptr);
  627. rar->cstate.last_write_ptr += idx_end - idx_begin;
  628. }
  629. }
  630. /* Convenience function that submits the data to the user. It uses the
  631. * unpack window buffer as a source location. */
  632. static void push_window_data(struct archive_read* a, struct rar5* rar,
  633. int64_t idx_begin, int64_t idx_end)
  634. {
  635. push_data(a, rar, rar->cstate.window_buf, idx_begin, idx_end);
  636. }
  637. static int apply_filters(struct archive_read* a) {
  638. struct filter_info* flt;
  639. struct rar5* rar = get_context(a);
  640. int ret;
  641. rar->cstate.all_filters_applied = 0;
  642. /* Get the first filter that can be applied to our data. The data
  643. * needs to be fully unpacked before the filter can be run. */
  644. if(CDE_OK == cdeque_front(&rar->cstate.filters,
  645. cdeque_filter_p(&flt))) {
  646. /* Check if our unpacked data fully covers this filter's
  647. * range. */
  648. if(rar->cstate.write_ptr > flt->block_start &&
  649. rar->cstate.write_ptr >= flt->block_start +
  650. flt->block_length) {
  651. /* Check if we have some data pending to be written
  652. * right before the filter's start offset. */
  653. if(rar->cstate.last_write_ptr == flt->block_start) {
  654. /* Run the filter specified by descriptor
  655. * `flt`. */
  656. ret = run_filter(a, flt);
  657. if(ret != ARCHIVE_OK) {
  658. /* Filter failure, return error. */
  659. return ret;
  660. }
  661. /* Filter descriptor won't be needed anymore
  662. * after it's used, * so remove it from the
  663. * filter list and free its memory. */
  664. (void) cdeque_pop_front(&rar->cstate.filters,
  665. cdeque_filter_p(&flt));
  666. free(flt);
  667. } else {
  668. /* We can't run filters yet, dump the memory
  669. * right before the filter. */
  670. push_window_data(a, rar,
  671. rar->cstate.last_write_ptr,
  672. flt->block_start);
  673. }
  674. /* Return 'filter applied or not needed' state to the
  675. * caller. */
  676. return ARCHIVE_RETRY;
  677. }
  678. }
  679. rar->cstate.all_filters_applied = 1;
  680. return ARCHIVE_OK;
  681. }
  682. static void dist_cache_push(struct rar5* rar, int value) {
  683. int* q = rar->cstate.dist_cache;
  684. q[3] = q[2];
  685. q[2] = q[1];
  686. q[1] = q[0];
  687. q[0] = value;
  688. }
  689. static int dist_cache_touch(struct rar5* rar, int idx) {
  690. int* q = rar->cstate.dist_cache;
  691. int i, dist = q[idx];
  692. for(i = idx; i > 0; i--)
  693. q[i] = q[i - 1];
  694. q[0] = dist;
  695. return dist;
  696. }
  697. static void free_filters(struct rar5* rar) {
  698. struct cdeque* d = &rar->cstate.filters;
  699. /* Free any remaining filters. All filters should be naturally
  700. * consumed by the unpacking function, so remaining filters after
  701. * unpacking normally mean that unpacking wasn't successful.
  702. * But still of course we shouldn't leak memory in such case. */
  703. /* cdeque_size() is a fast operation, so we can use it as a loop
  704. * expression. */
  705. while(cdeque_size(d) > 0) {
  706. struct filter_info* f = NULL;
  707. /* Pop_front will also decrease the collection's size. */
  708. if (CDE_OK == cdeque_pop_front(d, cdeque_filter_p(&f)))
  709. free(f);
  710. }
  711. cdeque_clear(d);
  712. /* Also clear out the variables needed for sanity checking. */
  713. rar->cstate.last_block_start = 0;
  714. rar->cstate.last_block_length = 0;
  715. }
  716. static void reset_file_context(struct rar5* rar) {
  717. memset(&rar->file, 0, sizeof(rar->file));
  718. blake2sp_init(&rar->file.b2state, 32);
  719. if(rar->main.solid) {
  720. rar->cstate.solid_offset += rar->cstate.write_ptr;
  721. } else {
  722. rar->cstate.solid_offset = 0;
  723. }
  724. rar->cstate.write_ptr = 0;
  725. rar->cstate.last_write_ptr = 0;
  726. rar->cstate.last_unstore_ptr = 0;
  727. rar->file.redir_type = REDIR_TYPE_NONE;
  728. rar->file.redir_flags = 0;
  729. free_filters(rar);
  730. }
  731. static inline int get_archive_read(struct archive* a,
  732. struct archive_read** ar)
  733. {
  734. *ar = (struct archive_read*) a;
  735. archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
  736. "archive_read_support_format_rar5");
  737. return ARCHIVE_OK;
  738. }
  739. static int read_ahead(struct archive_read* a, size_t how_many,
  740. const uint8_t** ptr)
  741. {
  742. ssize_t avail = -1;
  743. if(!ptr)
  744. return 0;
  745. *ptr = __archive_read_ahead(a, how_many, &avail);
  746. if(*ptr == NULL) {
  747. return 0;
  748. }
  749. return 1;
  750. }
  751. static int consume(struct archive_read* a, int64_t how_many) {
  752. int ret;
  753. ret = how_many == __archive_read_consume(a, how_many)
  754. ? ARCHIVE_OK
  755. : ARCHIVE_FATAL;
  756. return ret;
  757. }
  758. /**
  759. * Read a RAR5 variable sized numeric value. This value will be stored in
  760. * `pvalue`. The `pvalue_len` argument points to a variable that will receive
  761. * the byte count that was consumed in order to decode the `pvalue` value, plus
  762. * one.
  763. *
  764. * pvalue_len is optional and can be NULL.
  765. *
  766. * NOTE: if `pvalue_len` is NOT NULL, the caller needs to manually consume
  767. * the number of bytes that `pvalue_len` value contains. If the `pvalue_len`
  768. * is NULL, this consuming operation is done automatically.
  769. *
  770. * Returns 1 if *pvalue was successfully read.
  771. * Returns 0 if there was an error. In this case, *pvalue contains an
  772. * invalid value.
  773. */
  774. static int read_var(struct archive_read* a, uint64_t* pvalue,
  775. uint64_t* pvalue_len)
  776. {
  777. uint64_t result = 0;
  778. size_t shift, i;
  779. const uint8_t* p;
  780. uint8_t b;
  781. /* We will read maximum of 8 bytes. We don't have to handle the
  782. * situation to read the RAR5 variable-sized value stored at the end of
  783. * the file, because such situation will never happen. */
  784. if(!read_ahead(a, 8, &p))
  785. return 0;
  786. for(shift = 0, i = 0; i < 8; i++, shift += 7) {
  787. b = p[i];
  788. /* Strip the MSB from the input byte and add the resulting
  789. * number to the `result`. */
  790. result += (b & (uint64_t)0x7F) << shift;
  791. /* MSB set to 1 means we need to continue decoding process.
  792. * MSB set to 0 means we're done.
  793. *
  794. * This conditional checks for the second case. */
  795. if((b & 0x80) == 0) {
  796. if(pvalue) {
  797. *pvalue = result;
  798. }
  799. /* If the caller has passed the `pvalue_len` pointer,
  800. * store the number of consumed bytes in it and do NOT
  801. * consume those bytes, since the caller has all the
  802. * information it needs to perform */
  803. if(pvalue_len) {
  804. *pvalue_len = 1 + i;
  805. } else {
  806. /* If the caller did not provide the
  807. * `pvalue_len` pointer, it will not have the
  808. * possibility to advance the file pointer,
  809. * because it will not know how many bytes it
  810. * needs to consume. This is why we handle
  811. * such situation here automatically. */
  812. if(ARCHIVE_OK != consume(a, 1 + i)) {
  813. return 0;
  814. }
  815. }
  816. /* End of decoding process, return success. */
  817. return 1;
  818. }
  819. }
  820. /* The decoded value takes the maximum number of 8 bytes.
  821. * It's a maximum number of bytes, so end decoding process here
  822. * even if the first bit of last byte is 1. */
  823. if(pvalue) {
  824. *pvalue = result;
  825. }
  826. if(pvalue_len) {
  827. *pvalue_len = 9;
  828. } else {
  829. if(ARCHIVE_OK != consume(a, 9)) {
  830. return 0;
  831. }
  832. }
  833. return 1;
  834. }
  835. static int read_var_sized(struct archive_read* a, size_t* pvalue,
  836. size_t* pvalue_len)
  837. {
  838. uint64_t v;
  839. uint64_t v_size = 0;
  840. const int ret = pvalue_len ? read_var(a, &v, &v_size)
  841. : read_var(a, &v, NULL);
  842. if(ret == 1 && pvalue) {
  843. *pvalue = (size_t) v;
  844. }
  845. if(pvalue_len) {
  846. /* Possible data truncation should be safe. */
  847. *pvalue_len = (size_t) v_size;
  848. }
  849. return ret;
  850. }
  851. static int read_bits_32(struct archive_read* a, struct rar5* rar,
  852. const uint8_t* p, uint32_t* value)
  853. {
  854. if(rar->bits.in_addr >= rar->cstate.cur_block_size) {
  855. archive_set_error(&a->archive,
  856. ARCHIVE_ERRNO_PROGRAMMER,
  857. "Premature end of stream during extraction of data (#1)");
  858. return ARCHIVE_FATAL;
  859. }
  860. uint32_t bits = ((uint32_t) p[rar->bits.in_addr]) << 24;
  861. bits |= p[rar->bits.in_addr + 1] << 16;
  862. bits |= p[rar->bits.in_addr + 2] << 8;
  863. bits |= p[rar->bits.in_addr + 3];
  864. bits <<= rar->bits.bit_addr;
  865. bits |= p[rar->bits.in_addr + 4] >> (8 - rar->bits.bit_addr);
  866. *value = bits;
  867. return ARCHIVE_OK;
  868. }
  869. static int read_bits_16(struct archive_read* a, struct rar5* rar,
  870. const uint8_t* p, uint16_t* value)
  871. {
  872. if(rar->bits.in_addr >= rar->cstate.cur_block_size) {
  873. archive_set_error(&a->archive,
  874. ARCHIVE_ERRNO_PROGRAMMER,
  875. "Premature end of stream during extraction of data (#2)");
  876. return ARCHIVE_FATAL;
  877. }
  878. int bits = (int) ((uint32_t) p[rar->bits.in_addr]) << 16;
  879. bits |= (int) p[rar->bits.in_addr + 1] << 8;
  880. bits |= (int) p[rar->bits.in_addr + 2];
  881. bits >>= (8 - rar->bits.bit_addr);
  882. *value = bits & 0xffff;
  883. return ARCHIVE_OK;
  884. }
  885. static void skip_bits(struct rar5* rar, int bits) {
  886. const int new_bits = rar->bits.bit_addr + bits;
  887. rar->bits.in_addr += new_bits >> 3;
  888. rar->bits.bit_addr = new_bits & 7;
  889. }
  890. /* n = up to 16 */
  891. static int read_consume_bits(struct archive_read* a, struct rar5* rar,
  892. const uint8_t* p, int n, int* value)
  893. {
  894. uint16_t v;
  895. int ret, num;
  896. if(n == 0 || n > 16) {
  897. /* This is a programmer error and should never happen
  898. * in runtime. */
  899. return ARCHIVE_FATAL;
  900. }
  901. ret = read_bits_16(a, rar, p, &v);
  902. if(ret != ARCHIVE_OK)
  903. return ret;
  904. num = (int) v;
  905. num >>= 16 - n;
  906. skip_bits(rar, n);
  907. if(value)
  908. *value = num;
  909. return ARCHIVE_OK;
  910. }
  911. static int read_u32(struct archive_read* a, uint32_t* pvalue) {
  912. const uint8_t* p;
  913. if(!read_ahead(a, 4, &p))
  914. return 0;
  915. *pvalue = archive_le32dec(p);
  916. return ARCHIVE_OK == consume(a, 4) ? 1 : 0;
  917. }
  918. static int read_u64(struct archive_read* a, uint64_t* pvalue) {
  919. const uint8_t* p;
  920. if(!read_ahead(a, 8, &p))
  921. return 0;
  922. *pvalue = archive_le64dec(p);
  923. return ARCHIVE_OK == consume(a, 8) ? 1 : 0;
  924. }
  925. static int bid_standard(struct archive_read* a) {
  926. const uint8_t* p;
  927. char signature[sizeof(rar5_signature_xor)];
  928. rar5_signature(signature);
  929. if(!read_ahead(a, sizeof(rar5_signature_xor), &p))
  930. return -1;
  931. if(!memcmp(signature, p, sizeof(rar5_signature_xor)))
  932. return 30;
  933. return -1;
  934. }
  935. static int bid_sfx(struct archive_read *a)
  936. {
  937. const char *p;
  938. if ((p = __archive_read_ahead(a, 7, NULL)) == NULL)
  939. return -1;
  940. if ((p[0] == 'M' && p[1] == 'Z') || memcmp(p, "\x7F\x45LF", 4) == 0) {
  941. /* This is a PE file */
  942. char signature[sizeof(rar5_signature_xor)];
  943. ssize_t offset = 0x10000;
  944. ssize_t window = 4096;
  945. ssize_t bytes_avail;
  946. rar5_signature(signature);
  947. while (offset + window <= (1024 * 512)) {
  948. const char *buff = __archive_read_ahead(a, offset + window, &bytes_avail);
  949. if (buff == NULL) {
  950. /* Remaining bytes are less than window. */
  951. window >>= 1;
  952. if (window < 0x40)
  953. return 0;
  954. continue;
  955. }
  956. p = buff + offset;
  957. while (p + 8 < buff + bytes_avail) {
  958. if (memcmp(p, signature, sizeof(signature)) == 0)
  959. return 30;
  960. p += 0x10;
  961. }
  962. offset = p - buff;
  963. }
  964. }
  965. return 0;
  966. }
  967. static int rar5_bid(struct archive_read* a, int best_bid) {
  968. int my_bid;
  969. if(best_bid > 30)
  970. return -1;
  971. my_bid = bid_standard(a);
  972. if(my_bid > -1) {
  973. return my_bid;
  974. }
  975. my_bid = bid_sfx(a);
  976. if (my_bid > -1) {
  977. return my_bid;
  978. }
  979. return -1;
  980. }
  981. static int rar5_options(struct archive_read *a, const char *key,
  982. const char *val) {
  983. (void) a;
  984. (void) key;
  985. (void) val;
  986. /* No options supported in this version. Return the ARCHIVE_WARN code
  987. * to signal the options supervisor that the unpacker didn't handle
  988. * setting this option. */
  989. return ARCHIVE_WARN;
  990. }
  991. static void init_header(struct archive_read* a) {
  992. a->archive.archive_format = ARCHIVE_FORMAT_RAR_V5;
  993. a->archive.archive_format_name = "RAR5";
  994. }
  995. static void init_window_mask(struct rar5* rar) {
  996. if (rar->cstate.window_size)
  997. rar->cstate.window_mask = rar->cstate.window_size - 1;
  998. else
  999. rar->cstate.window_mask = 0;
  1000. }
  1001. enum HEADER_FLAGS {
  1002. HFL_EXTRA_DATA = 0x0001,
  1003. HFL_DATA = 0x0002,
  1004. HFL_SKIP_IF_UNKNOWN = 0x0004,
  1005. HFL_SPLIT_BEFORE = 0x0008,
  1006. HFL_SPLIT_AFTER = 0x0010,
  1007. HFL_CHILD = 0x0020,
  1008. HFL_INHERITED = 0x0040
  1009. };
  1010. static int process_main_locator_extra_block(struct archive_read* a,
  1011. struct rar5* rar)
  1012. {
  1013. uint64_t locator_flags;
  1014. enum LOCATOR_FLAGS {
  1015. QLIST = 0x01, RECOVERY = 0x02,
  1016. };
  1017. if(!read_var(a, &locator_flags, NULL)) {
  1018. return ARCHIVE_EOF;
  1019. }
  1020. if(locator_flags & QLIST) {
  1021. if(!read_var(a, &rar->qlist_offset, NULL)) {
  1022. return ARCHIVE_EOF;
  1023. }
  1024. /* qlist is not used */
  1025. }
  1026. if(locator_flags & RECOVERY) {
  1027. if(!read_var(a, &rar->rr_offset, NULL)) {
  1028. return ARCHIVE_EOF;
  1029. }
  1030. /* rr is not used */
  1031. }
  1032. return ARCHIVE_OK;
  1033. }
  1034. static int parse_file_extra_hash(struct archive_read* a, struct rar5* rar,
  1035. int64_t* extra_data_size)
  1036. {
  1037. size_t hash_type = 0;
  1038. size_t value_len;
  1039. enum HASH_TYPE {
  1040. BLAKE2sp = 0x00
  1041. };
  1042. if(!read_var_sized(a, &hash_type, &value_len))
  1043. return ARCHIVE_EOF;
  1044. *extra_data_size -= value_len;
  1045. if(ARCHIVE_OK != consume(a, value_len)) {
  1046. return ARCHIVE_EOF;
  1047. }
  1048. /* The file uses BLAKE2sp checksum algorithm instead of plain old
  1049. * CRC32. */
  1050. if(hash_type == BLAKE2sp) {
  1051. const uint8_t* p;
  1052. const int hash_size = sizeof(rar->file.blake2sp);
  1053. if(!read_ahead(a, hash_size, &p))
  1054. return ARCHIVE_EOF;
  1055. rar->file.has_blake2 = 1;
  1056. memcpy(&rar->file.blake2sp, p, hash_size);
  1057. if(ARCHIVE_OK != consume(a, hash_size)) {
  1058. return ARCHIVE_EOF;
  1059. }
  1060. *extra_data_size -= hash_size;
  1061. } else {
  1062. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  1063. "Unsupported hash type (0x%jx)", (uintmax_t)hash_type);
  1064. return ARCHIVE_FATAL;
  1065. }
  1066. return ARCHIVE_OK;
  1067. }
  1068. static uint64_t time_win_to_unix(uint64_t win_time) {
  1069. const size_t ns_in_sec = 10000000;
  1070. const uint64_t sec_to_unix = 11644473600LL;
  1071. return win_time / ns_in_sec - sec_to_unix;
  1072. }
  1073. static int parse_htime_item(struct archive_read* a, char unix_time,
  1074. uint64_t* where, int64_t* extra_data_size)
  1075. {
  1076. if(unix_time) {
  1077. uint32_t time_val;
  1078. if(!read_u32(a, &time_val))
  1079. return ARCHIVE_EOF;
  1080. *extra_data_size -= 4;
  1081. *where = (uint64_t) time_val;
  1082. } else {
  1083. uint64_t windows_time;
  1084. if(!read_u64(a, &windows_time))
  1085. return ARCHIVE_EOF;
  1086. *where = time_win_to_unix(windows_time);
  1087. *extra_data_size -= 8;
  1088. }
  1089. return ARCHIVE_OK;
  1090. }
  1091. static int parse_file_extra_version(struct archive_read* a,
  1092. struct archive_entry* e, int64_t* extra_data_size)
  1093. {
  1094. size_t flags = 0;
  1095. size_t version = 0;
  1096. size_t value_len = 0;
  1097. struct archive_string version_string;
  1098. struct archive_string name_utf8_string;
  1099. const char* cur_filename;
  1100. /* Flags are ignored. */
  1101. if(!read_var_sized(a, &flags, &value_len))
  1102. return ARCHIVE_EOF;
  1103. *extra_data_size -= value_len;
  1104. if(ARCHIVE_OK != consume(a, value_len))
  1105. return ARCHIVE_EOF;
  1106. if(!read_var_sized(a, &version, &value_len))
  1107. return ARCHIVE_EOF;
  1108. *extra_data_size -= value_len;
  1109. if(ARCHIVE_OK != consume(a, value_len))
  1110. return ARCHIVE_EOF;
  1111. /* extra_data_size should be zero here. */
  1112. cur_filename = archive_entry_pathname_utf8(e);
  1113. if(cur_filename == NULL) {
  1114. archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
  1115. "Version entry without file name");
  1116. return ARCHIVE_FATAL;
  1117. }
  1118. archive_string_init(&version_string);
  1119. archive_string_init(&name_utf8_string);
  1120. /* Prepare a ;123 suffix for the filename, where '123' is the version
  1121. * value of this file. */
  1122. archive_string_sprintf(&version_string, ";%zu", version);
  1123. /* Build the new filename. */
  1124. archive_strcat(&name_utf8_string, cur_filename);
  1125. archive_strcat(&name_utf8_string, version_string.s);
  1126. /* Apply the new filename into this file's context. */
  1127. archive_entry_update_pathname_utf8(e, name_utf8_string.s);
  1128. /* Free buffers. */
  1129. archive_string_free(&version_string);
  1130. archive_string_free(&name_utf8_string);
  1131. return ARCHIVE_OK;
  1132. }
  1133. static int parse_file_extra_htime(struct archive_read* a,
  1134. struct archive_entry* e, struct rar5* rar, int64_t* extra_data_size)
  1135. {
  1136. char unix_time = 0;
  1137. size_t flags = 0;
  1138. size_t value_len;
  1139. enum HTIME_FLAGS {
  1140. IS_UNIX = 0x01,
  1141. HAS_MTIME = 0x02,
  1142. HAS_CTIME = 0x04,
  1143. HAS_ATIME = 0x08,
  1144. HAS_UNIX_NS = 0x10,
  1145. };
  1146. if(!read_var_sized(a, &flags, &value_len))
  1147. return ARCHIVE_EOF;
  1148. *extra_data_size -= value_len;
  1149. if(ARCHIVE_OK != consume(a, value_len)) {
  1150. return ARCHIVE_EOF;
  1151. }
  1152. unix_time = flags & IS_UNIX;
  1153. if(flags & HAS_MTIME) {
  1154. parse_htime_item(a, unix_time, &rar->file.e_mtime,
  1155. extra_data_size);
  1156. archive_entry_set_mtime(e, rar->file.e_mtime, 0);
  1157. }
  1158. if(flags & HAS_CTIME) {
  1159. parse_htime_item(a, unix_time, &rar->file.e_ctime,
  1160. extra_data_size);
  1161. archive_entry_set_ctime(e, rar->file.e_ctime, 0);
  1162. }
  1163. if(flags & HAS_ATIME) {
  1164. parse_htime_item(a, unix_time, &rar->file.e_atime,
  1165. extra_data_size);
  1166. archive_entry_set_atime(e, rar->file.e_atime, 0);
  1167. }
  1168. if(flags & HAS_UNIX_NS) {
  1169. if(!read_u32(a, &rar->file.e_unix_ns))
  1170. return ARCHIVE_EOF;
  1171. *extra_data_size -= 4;
  1172. }
  1173. return ARCHIVE_OK;
  1174. }
  1175. static int parse_file_extra_redir(struct archive_read* a,
  1176. struct archive_entry* e, struct rar5* rar, int64_t* extra_data_size)
  1177. {
  1178. uint64_t value_size = 0;
  1179. size_t target_size = 0;
  1180. char target_utf8_buf[MAX_NAME_IN_BYTES];
  1181. const uint8_t* p;
  1182. if(!read_var(a, &rar->file.redir_type, &value_size))
  1183. return ARCHIVE_EOF;
  1184. if(ARCHIVE_OK != consume(a, (int64_t)value_size))
  1185. return ARCHIVE_EOF;
  1186. *extra_data_size -= value_size;
  1187. if(!read_var(a, &rar->file.redir_flags, &value_size))
  1188. return ARCHIVE_EOF;
  1189. if(ARCHIVE_OK != consume(a, (int64_t)value_size))
  1190. return ARCHIVE_EOF;
  1191. *extra_data_size -= value_size;
  1192. if(!read_var_sized(a, &target_size, NULL))
  1193. return ARCHIVE_EOF;
  1194. *extra_data_size -= target_size + 1;
  1195. if(target_size > (MAX_NAME_IN_CHARS - 1)) {
  1196. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  1197. "Link target is too long");
  1198. return ARCHIVE_FATAL;
  1199. }
  1200. if(target_size == 0) {
  1201. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  1202. "No link target specified");
  1203. return ARCHIVE_FATAL;
  1204. }
  1205. if(!read_ahead(a, target_size, &p))
  1206. return ARCHIVE_EOF;
  1207. memcpy(target_utf8_buf, p, target_size);
  1208. target_utf8_buf[target_size] = 0;
  1209. if(ARCHIVE_OK != consume(a, (int64_t)target_size))
  1210. return ARCHIVE_EOF;
  1211. switch(rar->file.redir_type) {
  1212. case REDIR_TYPE_UNIXSYMLINK:
  1213. case REDIR_TYPE_WINSYMLINK:
  1214. archive_entry_set_filetype(e, AE_IFLNK);
  1215. archive_entry_update_symlink_utf8(e, target_utf8_buf);
  1216. if (rar->file.redir_flags & REDIR_SYMLINK_IS_DIR) {
  1217. archive_entry_set_symlink_type(e,
  1218. AE_SYMLINK_TYPE_DIRECTORY);
  1219. } else {
  1220. archive_entry_set_symlink_type(e,
  1221. AE_SYMLINK_TYPE_FILE);
  1222. }
  1223. break;
  1224. case REDIR_TYPE_HARDLINK:
  1225. archive_entry_set_filetype(e, AE_IFREG);
  1226. archive_entry_update_hardlink_utf8(e, target_utf8_buf);
  1227. break;
  1228. default:
  1229. /* Unknown redir type, skip it. */
  1230. break;
  1231. }
  1232. return ARCHIVE_OK;
  1233. }
  1234. static int parse_file_extra_owner(struct archive_read* a,
  1235. struct archive_entry* e, int64_t* extra_data_size)
  1236. {
  1237. uint64_t flags = 0;
  1238. uint64_t value_size = 0;
  1239. uint64_t id = 0;
  1240. size_t name_len = 0;
  1241. size_t name_size = 0;
  1242. char namebuf[OWNER_MAXNAMELEN];
  1243. const uint8_t* p;
  1244. if(!read_var(a, &flags, &value_size))
  1245. return ARCHIVE_EOF;
  1246. if(ARCHIVE_OK != consume(a, (int64_t)value_size))
  1247. return ARCHIVE_EOF;
  1248. *extra_data_size -= value_size;
  1249. if ((flags & OWNER_USER_NAME) != 0) {
  1250. if(!read_var_sized(a, &name_size, NULL))
  1251. return ARCHIVE_EOF;
  1252. *extra_data_size -= name_size + 1;
  1253. if(!read_ahead(a, name_size, &p))
  1254. return ARCHIVE_EOF;
  1255. if (name_size >= OWNER_MAXNAMELEN) {
  1256. name_len = OWNER_MAXNAMELEN - 1;
  1257. } else {
  1258. name_len = name_size;
  1259. }
  1260. memcpy(namebuf, p, name_len);
  1261. namebuf[name_len] = 0;
  1262. if(ARCHIVE_OK != consume(a, (int64_t)name_size))
  1263. return ARCHIVE_EOF;
  1264. archive_entry_set_uname(e, namebuf);
  1265. }
  1266. if ((flags & OWNER_GROUP_NAME) != 0) {
  1267. if(!read_var_sized(a, &name_size, NULL))
  1268. return ARCHIVE_EOF;
  1269. *extra_data_size -= name_size + 1;
  1270. if(!read_ahead(a, name_size, &p))
  1271. return ARCHIVE_EOF;
  1272. if (name_size >= OWNER_MAXNAMELEN) {
  1273. name_len = OWNER_MAXNAMELEN - 1;
  1274. } else {
  1275. name_len = name_size;
  1276. }
  1277. memcpy(namebuf, p, name_len);
  1278. namebuf[name_len] = 0;
  1279. if(ARCHIVE_OK != consume(a, (int64_t)name_size))
  1280. return ARCHIVE_EOF;
  1281. archive_entry_set_gname(e, namebuf);
  1282. }
  1283. if ((flags & OWNER_USER_UID) != 0) {
  1284. if(!read_var(a, &id, &value_size))
  1285. return ARCHIVE_EOF;
  1286. if(ARCHIVE_OK != consume(a, (int64_t)value_size))
  1287. return ARCHIVE_EOF;
  1288. *extra_data_size -= value_size;
  1289. archive_entry_set_uid(e, (la_int64_t)id);
  1290. }
  1291. if ((flags & OWNER_GROUP_GID) != 0) {
  1292. if(!read_var(a, &id, &value_size))
  1293. return ARCHIVE_EOF;
  1294. if(ARCHIVE_OK != consume(a, (int64_t)value_size))
  1295. return ARCHIVE_EOF;
  1296. *extra_data_size -= value_size;
  1297. archive_entry_set_gid(e, (la_int64_t)id);
  1298. }
  1299. return ARCHIVE_OK;
  1300. }
  1301. static int process_head_file_extra(struct archive_read* a,
  1302. struct archive_entry* e, struct rar5* rar, int64_t extra_data_size)
  1303. {
  1304. uint64_t extra_field_size;
  1305. uint64_t extra_field_id = 0;
  1306. int ret = ARCHIVE_FATAL;
  1307. uint64_t var_size;
  1308. while(extra_data_size > 0) {
  1309. if(!read_var(a, &extra_field_size, &var_size))
  1310. return ARCHIVE_EOF;
  1311. extra_data_size -= var_size;
  1312. if(ARCHIVE_OK != consume(a, var_size)) {
  1313. return ARCHIVE_EOF;
  1314. }
  1315. if(!read_var(a, &extra_field_id, &var_size))
  1316. return ARCHIVE_EOF;
  1317. extra_field_size -= var_size;
  1318. extra_data_size -= var_size;
  1319. if(ARCHIVE_OK != consume(a, var_size)) {
  1320. return ARCHIVE_EOF;
  1321. }
  1322. switch(extra_field_id) {
  1323. case EX_HASH:
  1324. ret = parse_file_extra_hash(a, rar,
  1325. &extra_data_size);
  1326. break;
  1327. case EX_HTIME:
  1328. ret = parse_file_extra_htime(a, e, rar,
  1329. &extra_data_size);
  1330. break;
  1331. case EX_REDIR:
  1332. ret = parse_file_extra_redir(a, e, rar,
  1333. &extra_data_size);
  1334. break;
  1335. case EX_UOWNER:
  1336. ret = parse_file_extra_owner(a, e,
  1337. &extra_data_size);
  1338. break;
  1339. case EX_VERSION:
  1340. ret = parse_file_extra_version(a, e,
  1341. &extra_data_size);
  1342. break;
  1343. case EX_CRYPT:
  1344. /* Mark the entry as encrypted */
  1345. archive_entry_set_is_data_encrypted(e, 1);
  1346. rar->has_encrypted_entries = 1;
  1347. rar->cstate.data_encrypted = 1;
  1348. /* fallthrough */
  1349. case EX_SUBDATA:
  1350. /* fallthrough */
  1351. default:
  1352. /* Skip unsupported entry. */
  1353. extra_data_size -= extra_field_size;
  1354. if (ARCHIVE_OK != consume(a, extra_field_size)) {
  1355. return ARCHIVE_EOF;
  1356. }
  1357. }
  1358. }
  1359. if(ret != ARCHIVE_OK) {
  1360. /* Attribute not implemented. */
  1361. return ret;
  1362. }
  1363. return ARCHIVE_OK;
  1364. }
  1365. static int process_head_file(struct archive_read* a, struct rar5* rar,
  1366. struct archive_entry* entry, size_t block_flags)
  1367. {
  1368. int64_t extra_data_size = 0;
  1369. size_t data_size = 0;
  1370. size_t file_flags = 0;
  1371. size_t file_attr = 0;
  1372. size_t compression_info = 0;
  1373. size_t host_os = 0;
  1374. size_t name_size = 0;
  1375. uint64_t unpacked_size, window_size;
  1376. uint32_t mtime = 0, crc = 0;
  1377. int c_method = 0, c_version = 0;
  1378. char name_utf8_buf[MAX_NAME_IN_BYTES];
  1379. const uint8_t* p;
  1380. enum FILE_FLAGS {
  1381. DIRECTORY = 0x0001, UTIME = 0x0002, CRC32 = 0x0004,
  1382. UNKNOWN_UNPACKED_SIZE = 0x0008,
  1383. };
  1384. enum FILE_ATTRS {
  1385. ATTR_READONLY = 0x1, ATTR_HIDDEN = 0x2, ATTR_SYSTEM = 0x4,
  1386. ATTR_DIRECTORY = 0x10,
  1387. };
  1388. enum COMP_INFO_FLAGS {
  1389. SOLID = 0x0040,
  1390. };
  1391. enum HOST_OS {
  1392. HOST_WINDOWS = 0,
  1393. HOST_UNIX = 1,
  1394. };
  1395. archive_entry_clear(entry);
  1396. /* Do not reset file context if we're switching archives. */
  1397. if(!rar->cstate.switch_multivolume) {
  1398. reset_file_context(rar);
  1399. }
  1400. if(block_flags & HFL_EXTRA_DATA) {
  1401. uint64_t edata_size = 0;
  1402. if(!read_var(a, &edata_size, NULL))
  1403. return ARCHIVE_EOF;
  1404. /* Intentional type cast from unsigned to signed. */
  1405. extra_data_size = (int64_t) edata_size;
  1406. }
  1407. if(block_flags & HFL_DATA) {
  1408. if(!read_var_sized(a, &data_size, NULL))
  1409. return ARCHIVE_EOF;
  1410. rar->file.bytes_remaining = data_size;
  1411. } else {
  1412. rar->file.bytes_remaining = 0;
  1413. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  1414. "no data found in file/service block");
  1415. return ARCHIVE_FATAL;
  1416. }
  1417. if(!read_var_sized(a, &file_flags, NULL))
  1418. return ARCHIVE_EOF;
  1419. if(!read_var(a, &unpacked_size, NULL))
  1420. return ARCHIVE_EOF;
  1421. if(file_flags & UNKNOWN_UNPACKED_SIZE) {
  1422. archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
  1423. "Files with unknown unpacked size are not supported");
  1424. return ARCHIVE_FATAL;
  1425. }
  1426. rar->file.dir = (uint8_t) ((file_flags & DIRECTORY) > 0);
  1427. if(!read_var_sized(a, &file_attr, NULL))
  1428. return ARCHIVE_EOF;
  1429. if(file_flags & UTIME) {
  1430. if(!read_u32(a, &mtime))
  1431. return ARCHIVE_EOF;
  1432. }
  1433. if(file_flags & CRC32) {
  1434. if(!read_u32(a, &crc))
  1435. return ARCHIVE_EOF;
  1436. }
  1437. if(!read_var_sized(a, &compression_info, NULL))
  1438. return ARCHIVE_EOF;
  1439. c_method = (int) (compression_info >> 7) & 0x7;
  1440. c_version = (int) (compression_info & 0x3f);
  1441. /* RAR5 seems to limit the dictionary size to 64MB. */
  1442. window_size = (rar->file.dir > 0) ?
  1443. 0 :
  1444. g_unpack_window_size << ((compression_info >> 10) & 15);
  1445. rar->cstate.method = c_method;
  1446. rar->cstate.version = c_version + 50;
  1447. rar->file.solid = (compression_info & SOLID) > 0;
  1448. /* Archives which declare solid files without initializing the window
  1449. * buffer first are invalid, unless previous data was encrypted, in
  1450. * which case we may never have had the chance */
  1451. if(rar->file.solid > 0 && rar->cstate.data_encrypted == 0 &&
  1452. rar->cstate.window_buf == NULL) {
  1453. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  1454. "Declared solid file, but no window buffer "
  1455. "initialized yet.");
  1456. return ARCHIVE_FATAL;
  1457. }
  1458. /* Check if window_size is a sane value. Also, if the file is not
  1459. * declared as a directory, disallow window_size == 0. */
  1460. if(window_size > (64 * 1024 * 1024) ||
  1461. (rar->file.dir == 0 && window_size == 0))
  1462. {
  1463. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  1464. "Declared dictionary size is not supported.");
  1465. return ARCHIVE_FATAL;
  1466. }
  1467. if(rar->file.solid > 0) {
  1468. /* Re-check if current window size is the same as previous
  1469. * window size (for solid files only). */
  1470. if(rar->file.solid_window_size > 0 &&
  1471. rar->file.solid_window_size != (ssize_t) window_size)
  1472. {
  1473. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  1474. "Window size for this solid file doesn't match "
  1475. "the window size used in previous solid file. ");
  1476. return ARCHIVE_FATAL;
  1477. }
  1478. }
  1479. else
  1480. rar->cstate.data_encrypted = 0; /* Reset for new buffer */
  1481. if(rar->cstate.window_size < (ssize_t) window_size &&
  1482. rar->cstate.window_buf)
  1483. {
  1484. /* The `data_ready` stack contains pointers to the `window_buf` or
  1485. * `filtered_buf` buffers. Since we're about to reallocate the first
  1486. * buffer, some of those pointers could become invalid. Therefore, we
  1487. * need to dispose of all entries from the stack before attempting the
  1488. * realloc. */
  1489. clear_data_ready_stack(rar);
  1490. /* If window_buf has been allocated before, reallocate it, so
  1491. * that its size will match new window_size. */
  1492. uint8_t* new_window_buf =
  1493. realloc(rar->cstate.window_buf, (size_t) window_size);
  1494. if(!new_window_buf) {
  1495. archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
  1496. "Not enough memory when trying to realloc the window "
  1497. "buffer.");
  1498. return ARCHIVE_FATAL;
  1499. }
  1500. rar->cstate.window_buf = new_window_buf;
  1501. }
  1502. /* Values up to 64M should fit into ssize_t on every
  1503. * architecture. */
  1504. rar->cstate.window_size = (ssize_t) window_size;
  1505. if(rar->file.solid > 0 && rar->file.solid_window_size == 0) {
  1506. /* Solid files have to have the same window_size across
  1507. whole archive. Remember the window_size parameter
  1508. for first solid file found. */
  1509. rar->file.solid_window_size = rar->cstate.window_size;
  1510. }
  1511. init_window_mask(rar);
  1512. rar->file.service = 0;
  1513. if(!read_var_sized(a, &host_os, NULL))
  1514. return ARCHIVE_EOF;
  1515. if(host_os == HOST_WINDOWS) {
  1516. /* Host OS is Windows */
  1517. __LA_MODE_T mode;
  1518. if(file_attr & ATTR_DIRECTORY) {
  1519. if (file_attr & ATTR_READONLY) {
  1520. mode = 0555 | AE_IFDIR;
  1521. } else {
  1522. mode = 0755 | AE_IFDIR;
  1523. }
  1524. } else {
  1525. if (file_attr & ATTR_READONLY) {
  1526. mode = 0444 | AE_IFREG;
  1527. } else {
  1528. mode = 0644 | AE_IFREG;
  1529. }
  1530. }
  1531. archive_entry_set_mode(entry, mode);
  1532. if (file_attr & (ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM)) {
  1533. char *fflags_text, *ptr;
  1534. /* allocate for ",rdonly,hidden,system" */
  1535. fflags_text = malloc(22 * sizeof(*fflags_text));
  1536. if (fflags_text != NULL) {
  1537. ptr = fflags_text;
  1538. if (file_attr & ATTR_READONLY) {
  1539. strcpy(ptr, ",rdonly");
  1540. ptr = ptr + 7;
  1541. }
  1542. if (file_attr & ATTR_HIDDEN) {
  1543. strcpy(ptr, ",hidden");
  1544. ptr = ptr + 7;
  1545. }
  1546. if (file_attr & ATTR_SYSTEM) {
  1547. strcpy(ptr, ",system");
  1548. ptr = ptr + 7;
  1549. }
  1550. if (ptr > fflags_text) {
  1551. archive_entry_copy_fflags_text(entry,
  1552. fflags_text + 1);
  1553. }
  1554. free(fflags_text);
  1555. }
  1556. }
  1557. } else if(host_os == HOST_UNIX) {
  1558. /* Host OS is Unix */
  1559. archive_entry_set_mode(entry, (__LA_MODE_T) file_attr);
  1560. } else {
  1561. /* Unknown host OS */
  1562. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  1563. "Unsupported Host OS: 0x%jx",
  1564. (uintmax_t)host_os);
  1565. return ARCHIVE_FATAL;
  1566. }
  1567. if(!read_var_sized(a, &name_size, NULL))
  1568. return ARCHIVE_EOF;
  1569. if(name_size > (MAX_NAME_IN_CHARS - 1)) {
  1570. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  1571. "Filename is too long");
  1572. return ARCHIVE_FATAL;
  1573. }
  1574. if(name_size == 0) {
  1575. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  1576. "No filename specified");
  1577. return ARCHIVE_FATAL;
  1578. }
  1579. if(!read_ahead(a, name_size, &p))
  1580. return ARCHIVE_EOF;
  1581. memcpy(name_utf8_buf, p, name_size);
  1582. name_utf8_buf[name_size] = 0;
  1583. if(ARCHIVE_OK != consume(a, name_size)) {
  1584. return ARCHIVE_EOF;
  1585. }
  1586. archive_entry_update_pathname_utf8(entry, name_utf8_buf);
  1587. if(extra_data_size > 0) {
  1588. int ret = process_head_file_extra(a, entry, rar,
  1589. extra_data_size);
  1590. /*
  1591. * TODO: rewrite or remove useless sanity check
  1592. * as extra_data_size is not passed as a pointer
  1593. *
  1594. if(extra_data_size < 0) {
  1595. archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
  1596. "File extra data size is not zero");
  1597. return ARCHIVE_FATAL;
  1598. }
  1599. */
  1600. if(ret != ARCHIVE_OK)
  1601. return ret;
  1602. }
  1603. if((file_flags & UNKNOWN_UNPACKED_SIZE) == 0) {
  1604. rar->file.unpacked_size = (ssize_t) unpacked_size;
  1605. if(rar->file.redir_type == REDIR_TYPE_NONE)
  1606. archive_entry_set_size(entry, unpacked_size);
  1607. }
  1608. if(file_flags & UTIME) {
  1609. archive_entry_set_mtime(entry, (time_t) mtime, 0);
  1610. }
  1611. if(file_flags & CRC32) {
  1612. rar->file.stored_crc32 = crc;
  1613. }
  1614. if(!rar->cstate.switch_multivolume) {
  1615. /* Do not reinitialize unpacking state if we're switching
  1616. * archives. */
  1617. rar->cstate.block_parsing_finished = 1;
  1618. rar->cstate.all_filters_applied = 1;
  1619. rar->cstate.initialized = 0;
  1620. }
  1621. if(rar->generic.split_before > 0) {
  1622. /* If now we're standing on a header that has a 'split before'
  1623. * mark, it means we're standing on a 'continuation' file
  1624. * header. Signal the caller that if it wants to move to
  1625. * another file, it must call rar5_read_header() function
  1626. * again. */
  1627. return ARCHIVE_RETRY;
  1628. } else {
  1629. return ARCHIVE_OK;
  1630. }
  1631. }
  1632. static int process_head_service(struct archive_read* a, struct rar5* rar,
  1633. struct archive_entry* entry, size_t block_flags)
  1634. {
  1635. /* Process this SERVICE block the same way as FILE blocks. */
  1636. int ret = process_head_file(a, rar, entry, block_flags);
  1637. if(ret != ARCHIVE_OK)
  1638. return ret;
  1639. rar->file.service = 1;
  1640. /* But skip the data part automatically. It's no use for the user
  1641. * anyway. It contains only service data, not even needed to
  1642. * properly unpack the file. */
  1643. ret = rar5_read_data_skip(a);
  1644. if(ret != ARCHIVE_OK)
  1645. return ret;
  1646. /* After skipping, try parsing another block automatically. */
  1647. return ARCHIVE_RETRY;
  1648. }
  1649. static int process_head_main(struct archive_read* a, struct rar5* rar,
  1650. struct archive_entry* entry, size_t block_flags)
  1651. {
  1652. int ret;
  1653. uint64_t extra_data_size = 0;
  1654. size_t extra_field_size = 0;
  1655. size_t extra_field_id = 0;
  1656. size_t archive_flags = 0;
  1657. enum MAIN_FLAGS {
  1658. VOLUME = 0x0001, /* multi-volume archive */
  1659. VOLUME_NUMBER = 0x0002, /* volume number, first vol doesn't
  1660. * have it */
  1661. SOLID = 0x0004, /* solid archive */
  1662. PROTECT = 0x0008, /* contains Recovery info */
  1663. LOCK = 0x0010, /* readonly flag, not used */
  1664. };
  1665. enum MAIN_EXTRA {
  1666. // Just one attribute here.
  1667. LOCATOR = 0x01,
  1668. };
  1669. (void) entry;
  1670. if(block_flags & HFL_EXTRA_DATA) {
  1671. if(!read_var(a, &extra_data_size, NULL))
  1672. return ARCHIVE_EOF;
  1673. } else {
  1674. extra_data_size = 0;
  1675. }
  1676. if(!read_var_sized(a, &archive_flags, NULL)) {
  1677. return ARCHIVE_EOF;
  1678. }
  1679. rar->main.volume = (archive_flags & VOLUME) > 0;
  1680. rar->main.solid = (archive_flags & SOLID) > 0;
  1681. if(archive_flags & VOLUME_NUMBER) {
  1682. size_t v = 0;
  1683. if(!read_var_sized(a, &v, NULL)) {
  1684. return ARCHIVE_EOF;
  1685. }
  1686. if (v > UINT_MAX) {
  1687. archive_set_error(&a->archive,
  1688. ARCHIVE_ERRNO_FILE_FORMAT,
  1689. "Invalid volume number");
  1690. return ARCHIVE_FATAL;
  1691. }
  1692. rar->main.vol_no = (unsigned int) v;
  1693. } else {
  1694. rar->main.vol_no = 0;
  1695. }
  1696. if(rar->vol.expected_vol_no > 0 &&
  1697. rar->main.vol_no != rar->vol.expected_vol_no)
  1698. {
  1699. /* Returning EOF instead of FATAL because of strange
  1700. * libarchive behavior. When opening multiple files via
  1701. * archive_read_open_filenames(), after reading up the whole
  1702. * last file, the __archive_read_ahead function wraps up to
  1703. * the first archive instead of returning EOF. */
  1704. return ARCHIVE_EOF;
  1705. }
  1706. if(extra_data_size == 0) {
  1707. /* Early return. */
  1708. return ARCHIVE_OK;
  1709. }
  1710. if(!read_var_sized(a, &extra_field_size, NULL)) {
  1711. return ARCHIVE_EOF;
  1712. }
  1713. if(!read_var_sized(a, &extra_field_id, NULL)) {
  1714. return ARCHIVE_EOF;
  1715. }
  1716. if(extra_field_size == 0) {
  1717. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  1718. "Invalid extra field size");
  1719. return ARCHIVE_FATAL;
  1720. }
  1721. switch(extra_field_id) {
  1722. case LOCATOR:
  1723. ret = process_main_locator_extra_block(a, rar);
  1724. if(ret != ARCHIVE_OK) {
  1725. /* Error while parsing main locator extra
  1726. * block. */
  1727. return ret;
  1728. }
  1729. break;
  1730. default:
  1731. archive_set_error(&a->archive,
  1732. ARCHIVE_ERRNO_FILE_FORMAT,
  1733. "Unsupported extra type (0x%jx)",
  1734. (uintmax_t)extra_field_id);
  1735. return ARCHIVE_FATAL;
  1736. }
  1737. return ARCHIVE_OK;
  1738. }
  1739. static int skip_unprocessed_bytes(struct archive_read* a) {
  1740. struct rar5* rar = get_context(a);
  1741. int ret;
  1742. if(rar->file.bytes_remaining) {
  1743. /* Use different skipping method in block merging mode than in
  1744. * normal mode. If merge mode is active, rar5_read_data_skip
  1745. * can't be used, because it could allow recursive use of
  1746. * merge_block() * function, and this function doesn't support
  1747. * recursive use. */
  1748. if(rar->merge_mode) {
  1749. /* Discard whole merged block. This is valid in solid
  1750. * mode as well, because the code will discard blocks
  1751. * only if those blocks are safe to discard (i.e.
  1752. * they're not FILE blocks). */
  1753. ret = consume(a, rar->file.bytes_remaining);
  1754. if(ret != ARCHIVE_OK) {
  1755. return ret;
  1756. }
  1757. rar->file.bytes_remaining = 0;
  1758. } else {
  1759. /* If we're not in merge mode, use safe skipping code.
  1760. * This will ensure we'll handle solid archives
  1761. * properly. */
  1762. ret = rar5_read_data_skip(a);
  1763. if(ret != ARCHIVE_OK) {
  1764. return ret;
  1765. }
  1766. }
  1767. }
  1768. return ARCHIVE_OK;
  1769. }
  1770. static int scan_for_signature(struct archive_read* a);
  1771. /* Base block processing function. A 'base block' is a RARv5 header block
  1772. * that tells the reader what kind of data is stored inside the block.
  1773. *
  1774. * From the birds-eye view a RAR file looks file this:
  1775. *
  1776. * <magic><base_block_1><base_block_2>...<base_block_n>
  1777. *
  1778. * There are a few types of base blocks. Those types are specified inside
  1779. * the 'switch' statement in this function. For example purposes, I'll write
  1780. * how a standard RARv5 file could look like here:
  1781. *
  1782. * <magic><MAIN><FILE><FILE><FILE><SERVICE><ENDARC>
  1783. *
  1784. * The structure above could describe an archive file with 3 files in it,
  1785. * one service "QuickOpen" block (that is ignored by this parser), and an
  1786. * end of file base block marker.
  1787. *
  1788. * If the file is stored in multiple archive files ("multiarchive"), it might
  1789. * look like this:
  1790. *
  1791. * .part01.rar: <magic><MAIN><FILE><ENDARC>
  1792. * .part02.rar: <magic><MAIN><FILE><ENDARC>
  1793. * .part03.rar: <magic><MAIN><FILE><ENDARC>
  1794. *
  1795. * This example could describe 3 RAR files that contain ONE archived file.
  1796. * Or it could describe 3 RAR files that contain 3 different files. Or 3
  1797. * RAR files than contain 2 files. It all depends what metadata is stored in
  1798. * the headers of <FILE> blocks.
  1799. *
  1800. * Each <FILE> block contains info about its size, the name of the file it's
  1801. * storing inside, and whether this FILE block is a continuation block of
  1802. * previous archive ('split before'), and is this FILE block should be
  1803. * continued in another archive ('split after'). By parsing the 'split before'
  1804. * and 'split after' flags, we're able to tell if multiple <FILE> base blocks
  1805. * are describing one file, or multiple files (with the same filename, for
  1806. * example).
  1807. *
  1808. * One thing to note is that if we're parsing the first <FILE> block, and
  1809. * we see 'split after' flag, then we need to jump over to another <FILE>
  1810. * block to be able to decompress rest of the data. To do this, we need
  1811. * to skip the <ENDARC> block, then switch to another file, then skip the
  1812. * <magic> block, <MAIN> block, and then we're standing on the proper
  1813. * <FILE> block.
  1814. */
  1815. static int process_base_block(struct archive_read* a,
  1816. struct archive_entry* entry)
  1817. {
  1818. const size_t SMALLEST_RAR5_BLOCK_SIZE = 3;
  1819. struct rar5* rar = get_context(a);
  1820. uint32_t hdr_crc, computed_crc;
  1821. size_t raw_hdr_size = 0, hdr_size_len, hdr_size;
  1822. size_t header_id = 0;
  1823. size_t header_flags = 0;
  1824. const uint8_t* p;
  1825. int ret;
  1826. enum HEADER_TYPE {
  1827. HEAD_MARK = 0x00, HEAD_MAIN = 0x01, HEAD_FILE = 0x02,
  1828. HEAD_SERVICE = 0x03, HEAD_CRYPT = 0x04, HEAD_ENDARC = 0x05,
  1829. HEAD_UNKNOWN = 0xff,
  1830. };
  1831. /* Skip any unprocessed data for this file. */
  1832. ret = skip_unprocessed_bytes(a);
  1833. if(ret != ARCHIVE_OK)
  1834. return ret;
  1835. /* Read the expected CRC32 checksum. */
  1836. if(!read_u32(a, &hdr_crc)) {
  1837. return ARCHIVE_EOF;
  1838. }
  1839. /* Read header size. */
  1840. if(!read_var_sized(a, &raw_hdr_size, &hdr_size_len)) {
  1841. return ARCHIVE_EOF;
  1842. }
  1843. hdr_size = raw_hdr_size + hdr_size_len;
  1844. /* Sanity check, maximum header size for RAR5 is 2MB. */
  1845. if(hdr_size > (2 * 1024 * 1024)) {
  1846. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  1847. "Base block header is too large");
  1848. return ARCHIVE_FATAL;
  1849. }
  1850. /* Additional sanity checks to weed out invalid files. */
  1851. if(raw_hdr_size == 0 || hdr_size_len == 0 ||
  1852. hdr_size < SMALLEST_RAR5_BLOCK_SIZE)
  1853. {
  1854. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  1855. "Too small block encountered (%zu bytes)",
  1856. raw_hdr_size);
  1857. return ARCHIVE_FATAL;
  1858. }
  1859. /* Read the whole header data into memory, maximum memory use here is
  1860. * 2MB. */
  1861. if(!read_ahead(a, hdr_size, &p)) {
  1862. return ARCHIVE_EOF;
  1863. }
  1864. /* Verify the CRC32 of the header data. */
  1865. computed_crc = (uint32_t) crc32(0, p, (int) hdr_size);
  1866. if(computed_crc != hdr_crc) {
  1867. #ifndef DONT_FAIL_ON_CRC_ERROR
  1868. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  1869. "Header CRC error");
  1870. return ARCHIVE_FATAL;
  1871. #endif
  1872. }
  1873. /* If the checksum is OK, we proceed with parsing. */
  1874. if(ARCHIVE_OK != consume(a, hdr_size_len)) {
  1875. return ARCHIVE_EOF;
  1876. }
  1877. if(!read_var_sized(a, &header_id, NULL))
  1878. return ARCHIVE_EOF;
  1879. if(!read_var_sized(a, &header_flags, NULL))
  1880. return ARCHIVE_EOF;
  1881. rar->generic.split_after = (header_flags & HFL_SPLIT_AFTER) > 0;
  1882. rar->generic.split_before = (header_flags & HFL_SPLIT_BEFORE) > 0;
  1883. rar->generic.size = (int)hdr_size;
  1884. rar->generic.last_header_id = (int)header_id;
  1885. rar->main.endarc = 0;
  1886. /* Those are possible header ids in RARv5. */
  1887. switch(header_id) {
  1888. case HEAD_MAIN:
  1889. ret = process_head_main(a, rar, entry, header_flags);
  1890. /* Main header doesn't have any files in it, so it's
  1891. * pointless to return to the caller. Retry to next
  1892. * header, which should be HEAD_FILE/HEAD_SERVICE. */
  1893. if(ret == ARCHIVE_OK)
  1894. return ARCHIVE_RETRY;
  1895. return ret;
  1896. case HEAD_SERVICE:
  1897. ret = process_head_service(a, rar, entry, header_flags);
  1898. return ret;
  1899. case HEAD_FILE:
  1900. ret = process_head_file(a, rar, entry, header_flags);
  1901. return ret;
  1902. case HEAD_CRYPT:
  1903. archive_entry_set_is_metadata_encrypted(entry, 1);
  1904. archive_entry_set_is_data_encrypted(entry, 1);
  1905. rar->has_encrypted_entries = 1;
  1906. rar->headers_are_encrypted = 1;
  1907. archive_set_error(&a->archive,
  1908. ARCHIVE_ERRNO_FILE_FORMAT,
  1909. "Encryption is not supported");
  1910. return ARCHIVE_FATAL;
  1911. case HEAD_ENDARC:
  1912. rar->main.endarc = 1;
  1913. /* After encountering an end of file marker, we need
  1914. * to take into consideration if this archive is
  1915. * continued in another file (i.e. is it part01.rar:
  1916. * is there a part02.rar?) */
  1917. if(rar->main.volume) {
  1918. /* In case there is part02.rar, position the
  1919. * read pointer in a proper place, so we can
  1920. * resume parsing. */
  1921. ret = scan_for_signature(a);
  1922. if(ret == ARCHIVE_FATAL) {
  1923. return ARCHIVE_EOF;
  1924. } else {
  1925. if(rar->vol.expected_vol_no ==
  1926. UINT_MAX) {
  1927. archive_set_error(&a->archive,
  1928. ARCHIVE_ERRNO_FILE_FORMAT,
  1929. "Header error");
  1930. return ARCHIVE_FATAL;
  1931. }
  1932. rar->vol.expected_vol_no =
  1933. rar->main.vol_no + 1;
  1934. return ARCHIVE_OK;
  1935. }
  1936. } else {
  1937. return ARCHIVE_EOF;
  1938. }
  1939. case HEAD_MARK:
  1940. return ARCHIVE_EOF;
  1941. default:
  1942. if((header_flags & HFL_SKIP_IF_UNKNOWN) == 0) {
  1943. archive_set_error(&a->archive,
  1944. ARCHIVE_ERRNO_FILE_FORMAT,
  1945. "Header type error");
  1946. return ARCHIVE_FATAL;
  1947. } else {
  1948. /* If the block is marked as 'skip if unknown',
  1949. * do as the flag says: skip the block
  1950. * instead on failing on it. */
  1951. return ARCHIVE_RETRY;
  1952. }
  1953. }
  1954. #if !defined WIN32
  1955. // Not reached.
  1956. archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
  1957. "Internal unpacker error");
  1958. return ARCHIVE_FATAL;
  1959. #endif
  1960. }
  1961. static int skip_base_block(struct archive_read* a) {
  1962. int ret;
  1963. struct rar5* rar = get_context(a);
  1964. /* Create a new local archive_entry structure that will be operated on
  1965. * by header reader; operations on this archive_entry will be discarded.
  1966. */
  1967. struct archive_entry* entry = archive_entry_new();
  1968. ret = process_base_block(a, entry);
  1969. /* Discard operations on this archive_entry structure. */
  1970. archive_entry_free(entry);
  1971. if(ret == ARCHIVE_FATAL)
  1972. return ret;
  1973. if(rar->generic.last_header_id == 2 && rar->generic.split_before > 0)
  1974. return ARCHIVE_OK;
  1975. if(ret == ARCHIVE_OK)
  1976. return ARCHIVE_RETRY;
  1977. else
  1978. return ret;
  1979. }
  1980. static int try_skip_sfx(struct archive_read *a)
  1981. {
  1982. const char *p;
  1983. if ((p = __archive_read_ahead(a, 7, NULL)) == NULL)
  1984. return ARCHIVE_EOF;
  1985. if ((p[0] == 'M' && p[1] == 'Z') || memcmp(p, "\x7F\x45LF", 4) == 0)
  1986. {
  1987. char signature[sizeof(rar5_signature_xor)];
  1988. const void *h;
  1989. const char *q;
  1990. size_t skip, total = 0;
  1991. ssize_t bytes, window = 4096;
  1992. rar5_signature(signature);
  1993. while (total + window <= (1024 * 512)) {
  1994. h = __archive_read_ahead(a, window, &bytes);
  1995. if (h == NULL) {
  1996. /* Remaining bytes are less than window. */
  1997. window >>= 1;
  1998. if (window < 0x40)
  1999. goto fatal;
  2000. continue;
  2001. }
  2002. if (bytes < 0x40)
  2003. goto fatal;
  2004. p = h;
  2005. q = p + bytes;
  2006. /*
  2007. * Scan ahead until we find something that looks
  2008. * like the RAR header.
  2009. */
  2010. while (p + 8 < q) {
  2011. if (memcmp(p, signature, sizeof(signature)) == 0) {
  2012. skip = p - (const char *)h;
  2013. __archive_read_consume(a, skip);
  2014. return (ARCHIVE_OK);
  2015. }
  2016. p += 0x10;
  2017. }
  2018. skip = p - (const char *)h;
  2019. __archive_read_consume(a, skip);
  2020. total += skip;
  2021. }
  2022. }
  2023. return ARCHIVE_OK;
  2024. fatal:
  2025. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  2026. "Couldn't find out RAR header");
  2027. return (ARCHIVE_FATAL);
  2028. }
  2029. static int rar5_read_header(struct archive_read *a,
  2030. struct archive_entry *entry)
  2031. {
  2032. struct rar5* rar = get_context(a);
  2033. int ret;
  2034. /*
  2035. * It should be sufficient to call archive_read_next_header() for
  2036. * a reader to determine if an entry is encrypted or not.
  2037. */
  2038. if (rar->has_encrypted_entries == ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW) {
  2039. rar->has_encrypted_entries = 0;
  2040. }
  2041. if(rar->header_initialized == 0) {
  2042. init_header(a);
  2043. if ((ret = try_skip_sfx(a)) < ARCHIVE_WARN)
  2044. return ret;
  2045. rar->header_initialized = 1;
  2046. }
  2047. if(rar->skipped_magic == 0) {
  2048. if(ARCHIVE_OK != consume(a, sizeof(rar5_signature_xor))) {
  2049. return ARCHIVE_EOF;
  2050. }
  2051. rar->skipped_magic = 1;
  2052. }
  2053. do {
  2054. ret = process_base_block(a, entry);
  2055. } while(ret == ARCHIVE_RETRY ||
  2056. (rar->main.endarc > 0 && ret == ARCHIVE_OK));
  2057. return ret;
  2058. }
  2059. static void init_unpack(struct rar5* rar) {
  2060. rar->file.calculated_crc32 = 0;
  2061. init_window_mask(rar);
  2062. free(rar->cstate.window_buf);
  2063. free(rar->cstate.filtered_buf);
  2064. if(rar->cstate.window_size > 0) {
  2065. rar->cstate.window_buf = calloc(1, rar->cstate.window_size);
  2066. rar->cstate.filtered_buf = calloc(1, rar->cstate.window_size);
  2067. } else {
  2068. rar->cstate.window_buf = NULL;
  2069. rar->cstate.filtered_buf = NULL;
  2070. }
  2071. clear_data_ready_stack(rar);
  2072. rar->cstate.write_ptr = 0;
  2073. rar->cstate.last_write_ptr = 0;
  2074. memset(&rar->cstate.bd, 0, sizeof(rar->cstate.bd));
  2075. memset(&rar->cstate.ld, 0, sizeof(rar->cstate.ld));
  2076. memset(&rar->cstate.dd, 0, sizeof(rar->cstate.dd));
  2077. memset(&rar->cstate.ldd, 0, sizeof(rar->cstate.ldd));
  2078. memset(&rar->cstate.rd, 0, sizeof(rar->cstate.rd));
  2079. }
  2080. static void update_crc(struct rar5* rar, const uint8_t* p, size_t to_read) {
  2081. int verify_crc;
  2082. if(rar->skip_mode) {
  2083. #if defined CHECK_CRC_ON_SOLID_SKIP
  2084. verify_crc = 1;
  2085. #else
  2086. verify_crc = 0;
  2087. #endif
  2088. } else
  2089. verify_crc = 1;
  2090. if(verify_crc) {
  2091. /* Don't update CRC32 if the file doesn't have the
  2092. * `stored_crc32` info filled in. */
  2093. if(rar->file.stored_crc32 > 0) {
  2094. rar->file.calculated_crc32 =
  2095. crc32(rar->file.calculated_crc32, p, (unsigned int)to_read);
  2096. }
  2097. /* Check if the file uses an optional BLAKE2sp checksum
  2098. * algorithm. */
  2099. if(rar->file.has_blake2 > 0) {
  2100. /* Return value of the `update` function is always 0,
  2101. * so we can explicitly ignore it here. */
  2102. (void) blake2sp_update(&rar->file.b2state, p, to_read);
  2103. }
  2104. }
  2105. }
  2106. static int create_decode_tables(uint8_t* bit_length,
  2107. struct decode_table* table, int size)
  2108. {
  2109. int code, upper_limit = 0, i, lc[16];
  2110. uint32_t decode_pos_clone[rar5_countof(table->decode_pos)];
  2111. ssize_t cur_len, quick_data_size;
  2112. memset(&lc, 0, sizeof(lc));
  2113. memset(table->decode_num, 0, sizeof(table->decode_num));
  2114. table->size = size;
  2115. table->quick_bits = size == HUFF_NC ? 10 : 7;
  2116. for(i = 0; i < size; i++) {
  2117. lc[bit_length[i] & 15]++;
  2118. }
  2119. lc[0] = 0;
  2120. table->decode_pos[0] = 0;
  2121. table->decode_len[0] = 0;
  2122. for(i = 1; i < 16; i++) {
  2123. upper_limit += lc[i];
  2124. table->decode_len[i] = upper_limit << (16 - i);
  2125. table->decode_pos[i] = table->decode_pos[i - 1] + lc[i - 1];
  2126. upper_limit <<= 1;
  2127. }
  2128. memcpy(decode_pos_clone, table->decode_pos, sizeof(decode_pos_clone));
  2129. for(i = 0; i < size; i++) {
  2130. uint8_t clen = bit_length[i] & 15;
  2131. if(clen > 0) {
  2132. int last_pos = decode_pos_clone[clen];
  2133. table->decode_num[last_pos] = i;
  2134. decode_pos_clone[clen]++;
  2135. }
  2136. }
  2137. quick_data_size = (int64_t)1 << table->quick_bits;
  2138. cur_len = 1;
  2139. for(code = 0; code < quick_data_size; code++) {
  2140. int bit_field = code << (16 - table->quick_bits);
  2141. int dist, pos;
  2142. while(cur_len < rar5_countof(table->decode_len) &&
  2143. bit_field >= table->decode_len[cur_len]) {
  2144. cur_len++;
  2145. }
  2146. table->quick_len[code] = (uint8_t) cur_len;
  2147. dist = bit_field - table->decode_len[cur_len - 1];
  2148. dist >>= (16 - cur_len);
  2149. pos = table->decode_pos[cur_len & 15] + dist;
  2150. if(cur_len < rar5_countof(table->decode_pos) && pos < size) {
  2151. table->quick_num[code] = table->decode_num[pos];
  2152. } else {
  2153. table->quick_num[code] = 0;
  2154. }
  2155. }
  2156. return ARCHIVE_OK;
  2157. }
  2158. static int decode_number(struct archive_read* a, struct decode_table* table,
  2159. const uint8_t* p, uint16_t* num)
  2160. {
  2161. int i, bits, dist, ret;
  2162. uint16_t bitfield;
  2163. uint32_t pos;
  2164. struct rar5* rar = get_context(a);
  2165. if(ARCHIVE_OK != (ret = read_bits_16(a, rar, p, &bitfield))) {
  2166. return ret;
  2167. }
  2168. bitfield &= 0xfffe;
  2169. if(bitfield < table->decode_len[table->quick_bits]) {
  2170. int code = bitfield >> (16 - table->quick_bits);
  2171. skip_bits(rar, table->quick_len[code]);
  2172. *num = table->quick_num[code];
  2173. return ARCHIVE_OK;
  2174. }
  2175. bits = 15;
  2176. for(i = table->quick_bits + 1; i < 15; i++) {
  2177. if(bitfield < table->decode_len[i]) {
  2178. bits = i;
  2179. break;
  2180. }
  2181. }
  2182. skip_bits(rar, bits);
  2183. dist = bitfield - table->decode_len[bits - 1];
  2184. dist >>= (16 - bits);
  2185. pos = table->decode_pos[bits] + dist;
  2186. if(pos >= table->size)
  2187. pos = 0;
  2188. *num = table->decode_num[pos];
  2189. return ARCHIVE_OK;
  2190. }
  2191. /* Reads and parses Huffman tables from the beginning of the block. */
  2192. static int parse_tables(struct archive_read* a, struct rar5* rar,
  2193. const uint8_t* p)
  2194. {
  2195. int ret, value, i, w, idx = 0;
  2196. uint8_t bit_length[HUFF_BC],
  2197. table[HUFF_TABLE_SIZE],
  2198. nibble_mask = 0xF0,
  2199. nibble_shift = 4;
  2200. enum { ESCAPE = 15 };
  2201. /* The data for table generation is compressed using a simple RLE-like
  2202. * algorithm when storing zeroes, so we need to unpack it first. */
  2203. for(w = 0, i = 0; w < HUFF_BC;) {
  2204. if(i >= rar->cstate.cur_block_size) {
  2205. /* Truncated data, can't continue. */
  2206. archive_set_error(&a->archive,
  2207. ARCHIVE_ERRNO_FILE_FORMAT,
  2208. "Truncated data in huffman tables");
  2209. return ARCHIVE_FATAL;
  2210. }
  2211. value = (p[i] & nibble_mask) >> nibble_shift;
  2212. if(nibble_mask == 0x0F)
  2213. ++i;
  2214. nibble_mask ^= 0xFF;
  2215. nibble_shift ^= 4;
  2216. /* Values smaller than 15 is data, so we write it directly.
  2217. * Value 15 is a flag telling us that we need to unpack more
  2218. * bytes. */
  2219. if(value == ESCAPE) {
  2220. value = (p[i] & nibble_mask) >> nibble_shift;
  2221. if(nibble_mask == 0x0F)
  2222. ++i;
  2223. nibble_mask ^= 0xFF;
  2224. nibble_shift ^= 4;
  2225. if(value == 0) {
  2226. /* We sometimes need to write the actual value
  2227. * of 15, so this case handles that. */
  2228. bit_length[w++] = ESCAPE;
  2229. } else {
  2230. int k;
  2231. /* Fill zeroes. */
  2232. for(k = 0; (k < value + 2) && (w < HUFF_BC);
  2233. k++) {
  2234. bit_length[w++] = 0;
  2235. }
  2236. }
  2237. } else {
  2238. bit_length[w++] = value;
  2239. }
  2240. }
  2241. rar->bits.in_addr = i;
  2242. rar->bits.bit_addr = nibble_shift ^ 4;
  2243. ret = create_decode_tables(bit_length, &rar->cstate.bd, HUFF_BC);
  2244. if(ret != ARCHIVE_OK) {
  2245. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  2246. "Decoding huffman tables failed");
  2247. return ARCHIVE_FATAL;
  2248. }
  2249. for(i = 0; i < HUFF_TABLE_SIZE;) {
  2250. uint16_t num;
  2251. ret = decode_number(a, &rar->cstate.bd, p, &num);
  2252. if(ret != ARCHIVE_OK) {
  2253. archive_set_error(&a->archive,
  2254. ARCHIVE_ERRNO_FILE_FORMAT,
  2255. "Decoding huffman tables failed");
  2256. return ARCHIVE_FATAL;
  2257. }
  2258. if(num < 16) {
  2259. /* 0..15: store directly */
  2260. table[i] = (uint8_t) num;
  2261. i++;
  2262. } else if(num < 18) {
  2263. /* 16..17: repeat previous code */
  2264. uint16_t n;
  2265. if(ARCHIVE_OK != (ret = read_bits_16(a, rar, p, &n)))
  2266. return ret;
  2267. if(num == 16) {
  2268. n >>= 13;
  2269. n += 3;
  2270. skip_bits(rar, 3);
  2271. } else {
  2272. n >>= 9;
  2273. n += 11;
  2274. skip_bits(rar, 7);
  2275. }
  2276. if(i > 0) {
  2277. while(n-- > 0 && i < HUFF_TABLE_SIZE) {
  2278. table[i] = table[i - 1];
  2279. i++;
  2280. }
  2281. } else {
  2282. archive_set_error(&a->archive,
  2283. ARCHIVE_ERRNO_FILE_FORMAT,
  2284. "Unexpected error when decoding "
  2285. "huffman tables");
  2286. return ARCHIVE_FATAL;
  2287. }
  2288. } else {
  2289. /* other codes: fill with zeroes `n` times */
  2290. uint16_t n;
  2291. if(ARCHIVE_OK != (ret = read_bits_16(a, rar, p, &n)))
  2292. return ret;
  2293. if(num == 18) {
  2294. n >>= 13;
  2295. n += 3;
  2296. skip_bits(rar, 3);
  2297. } else {
  2298. n >>= 9;
  2299. n += 11;
  2300. skip_bits(rar, 7);
  2301. }
  2302. while(n-- > 0 && i < HUFF_TABLE_SIZE)
  2303. table[i++] = 0;
  2304. }
  2305. }
  2306. ret = create_decode_tables(&table[idx], &rar->cstate.ld, HUFF_NC);
  2307. if(ret != ARCHIVE_OK) {
  2308. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  2309. "Failed to create literal table");
  2310. return ARCHIVE_FATAL;
  2311. }
  2312. idx += HUFF_NC;
  2313. ret = create_decode_tables(&table[idx], &rar->cstate.dd, HUFF_DC);
  2314. if(ret != ARCHIVE_OK) {
  2315. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  2316. "Failed to create distance table");
  2317. return ARCHIVE_FATAL;
  2318. }
  2319. idx += HUFF_DC;
  2320. ret = create_decode_tables(&table[idx], &rar->cstate.ldd, HUFF_LDC);
  2321. if(ret != ARCHIVE_OK) {
  2322. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  2323. "Failed to create lower bits of distances table");
  2324. return ARCHIVE_FATAL;
  2325. }
  2326. idx += HUFF_LDC;
  2327. ret = create_decode_tables(&table[idx], &rar->cstate.rd, HUFF_RC);
  2328. if(ret != ARCHIVE_OK) {
  2329. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  2330. "Failed to create repeating distances table");
  2331. return ARCHIVE_FATAL;
  2332. }
  2333. return ARCHIVE_OK;
  2334. }
  2335. /* Parses the block header, verifies its CRC byte, and saves the header
  2336. * fields inside the `hdr` pointer. */
  2337. static int parse_block_header(struct archive_read* a, const uint8_t* p,
  2338. ssize_t* block_size, struct compressed_block_header* hdr)
  2339. {
  2340. uint8_t calculated_cksum;
  2341. memcpy(hdr, p, sizeof(struct compressed_block_header));
  2342. if(bf_byte_count(hdr) > 2) {
  2343. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  2344. "Unsupported block header size (was %d, max is 2)",
  2345. bf_byte_count(hdr));
  2346. return ARCHIVE_FATAL;
  2347. }
  2348. /* This should probably use bit reader interface in order to be more
  2349. * future-proof. */
  2350. *block_size = 0;
  2351. switch(bf_byte_count(hdr)) {
  2352. /* 1-byte block size */
  2353. case 0:
  2354. *block_size = *(const uint8_t*) &p[2];
  2355. break;
  2356. /* 2-byte block size */
  2357. case 1:
  2358. *block_size = archive_le16dec(&p[2]);
  2359. break;
  2360. /* 3-byte block size */
  2361. case 2:
  2362. *block_size = archive_le32dec(&p[2]);
  2363. *block_size &= 0x00FFFFFF;
  2364. break;
  2365. /* Other block sizes are not supported. This case is not
  2366. * reached, because we have an 'if' guard before the switch
  2367. * that makes sure of it. */
  2368. default:
  2369. return ARCHIVE_FATAL;
  2370. }
  2371. /* Verify the block header checksum. 0x5A is a magic value and is
  2372. * always * constant. */
  2373. calculated_cksum = 0x5A
  2374. ^ (uint8_t) hdr->block_flags_u8
  2375. ^ (uint8_t) *block_size
  2376. ^ (uint8_t) (*block_size >> 8)
  2377. ^ (uint8_t) (*block_size >> 16);
  2378. if(calculated_cksum != hdr->block_cksum) {
  2379. #ifndef DONT_FAIL_ON_CRC_ERROR
  2380. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  2381. "Block checksum error: got 0x%x, expected 0x%x",
  2382. hdr->block_cksum, calculated_cksum);
  2383. return ARCHIVE_FATAL;
  2384. #endif
  2385. }
  2386. return ARCHIVE_OK;
  2387. }
  2388. /* Convenience function used during filter processing. */
  2389. static int parse_filter_data(struct archive_read* a, struct rar5* rar,
  2390. const uint8_t* p, uint32_t* filter_data)
  2391. {
  2392. int i, bytes, ret;
  2393. uint32_t data = 0;
  2394. if(ARCHIVE_OK != (ret = read_consume_bits(a, rar, p, 2, &bytes)))
  2395. return ret;
  2396. bytes++;
  2397. for(i = 0; i < bytes; i++) {
  2398. uint16_t byte;
  2399. if(ARCHIVE_OK != (ret = read_bits_16(a, rar, p, &byte))) {
  2400. return ret;
  2401. }
  2402. /* Cast to uint32_t will ensure the shift operation will not
  2403. * produce undefined result. */
  2404. data += ((uint32_t) byte >> 8) << (i * 8);
  2405. skip_bits(rar, 8);
  2406. }
  2407. *filter_data = data;
  2408. return ARCHIVE_OK;
  2409. }
  2410. /* Function is used during sanity checking. */
  2411. static int is_valid_filter_block_start(struct rar5* rar,
  2412. uint32_t start)
  2413. {
  2414. const int64_t block_start = (ssize_t) start + rar->cstate.write_ptr;
  2415. const int64_t last_bs = rar->cstate.last_block_start;
  2416. const ssize_t last_bl = rar->cstate.last_block_length;
  2417. if(last_bs == 0 || last_bl == 0) {
  2418. /* We didn't have any filters yet, so accept this offset. */
  2419. return 1;
  2420. }
  2421. if(block_start >= last_bs + last_bl) {
  2422. /* Current offset is bigger than last block's end offset, so
  2423. * accept current offset. */
  2424. return 1;
  2425. }
  2426. /* Any other case is not a normal situation and we should fail. */
  2427. return 0;
  2428. }
  2429. /* The function will create a new filter, read its parameters from the input
  2430. * stream and add it to the filter collection. */
  2431. static int parse_filter(struct archive_read* ar, const uint8_t* p) {
  2432. uint32_t block_start, block_length;
  2433. uint16_t filter_type;
  2434. struct filter_info* filt = NULL;
  2435. struct rar5* rar = get_context(ar);
  2436. int ret;
  2437. /* Read the parameters from the input stream. */
  2438. if(ARCHIVE_OK != (ret = parse_filter_data(ar, rar, p, &block_start)))
  2439. return ret;
  2440. if(ARCHIVE_OK != (ret = parse_filter_data(ar, rar, p, &block_length)))
  2441. return ret;
  2442. if(ARCHIVE_OK != (ret = read_bits_16(ar, rar, p, &filter_type)))
  2443. return ret;
  2444. filter_type >>= 13;
  2445. skip_bits(rar, 3);
  2446. /* Perform some sanity checks on this filter parameters. Note that we
  2447. * allow only DELTA, E8/E9 and ARM filters here, because rest of
  2448. * filters are not used in RARv5. */
  2449. if(block_length < 4 ||
  2450. block_length > 0x400000 ||
  2451. filter_type > FILTER_ARM ||
  2452. !is_valid_filter_block_start(rar, block_start))
  2453. {
  2454. archive_set_error(&ar->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  2455. "Invalid filter encountered");
  2456. return ARCHIVE_FATAL;
  2457. }
  2458. /* Allocate a new filter. */
  2459. filt = add_new_filter(rar);
  2460. if(filt == NULL) {
  2461. archive_set_error(&ar->archive, ENOMEM,
  2462. "Can't allocate memory for a filter descriptor.");
  2463. return ARCHIVE_FATAL;
  2464. }
  2465. filt->type = filter_type;
  2466. filt->block_start = rar->cstate.write_ptr + block_start;
  2467. filt->block_length = block_length;
  2468. rar->cstate.last_block_start = filt->block_start;
  2469. rar->cstate.last_block_length = filt->block_length;
  2470. /* Read some more data in case this is a DELTA filter. Other filter
  2471. * types don't require any additional data over what was already
  2472. * read. */
  2473. if(filter_type == FILTER_DELTA) {
  2474. int channels;
  2475. if(ARCHIVE_OK != (ret = read_consume_bits(ar, rar, p, 5, &channels))) {
  2476. #ifdef __clang_analyzer__
  2477. /* Tell clang-analyzer that 'filt' does not leak.
  2478. add_new_filter passes off ownership. */
  2479. free(filt);
  2480. #endif
  2481. return ret;
  2482. }
  2483. filt->channels = channels + 1;
  2484. }
  2485. #ifdef __clang_analyzer__
  2486. /* Tell clang-analyzer that 'filt' does not leak.
  2487. add_new_filter passes off ownership. */
  2488. free(filt);
  2489. #endif
  2490. return ARCHIVE_OK;
  2491. }
  2492. static int decode_code_length(struct archive_read* a, struct rar5* rar,
  2493. const uint8_t* p, uint16_t code)
  2494. {
  2495. int lbits, length = 2;
  2496. if(code < 8) {
  2497. lbits = 0;
  2498. length += code;
  2499. } else {
  2500. lbits = code / 4 - 1;
  2501. length += (4 | (code & 3)) << lbits;
  2502. }
  2503. if(lbits > 0) {
  2504. int add;
  2505. if(ARCHIVE_OK != read_consume_bits(a, rar, p, lbits, &add))
  2506. return -1;
  2507. length += add;
  2508. }
  2509. return length;
  2510. }
  2511. static int copy_string(struct archive_read* a, int len, int dist) {
  2512. struct rar5* rar = get_context(a);
  2513. const ssize_t cmask = rar->cstate.window_mask;
  2514. const uint64_t write_ptr = rar->cstate.write_ptr +
  2515. rar->cstate.solid_offset;
  2516. int i;
  2517. if (rar->cstate.window_buf == NULL)
  2518. return ARCHIVE_FATAL;
  2519. /* The unpacker spends most of the time in this function. It would be
  2520. * a good idea to introduce some optimizations here.
  2521. *
  2522. * Just remember that this loop treats buffers that overlap differently
  2523. * than buffers that do not overlap. This is why a simple memcpy(3)
  2524. * call will not be enough. */
  2525. for(i = 0; i < len; i++) {
  2526. const ssize_t write_idx = (write_ptr + i) & cmask;
  2527. const ssize_t read_idx = (write_ptr + i - dist) & cmask;
  2528. rar->cstate.window_buf[write_idx] =
  2529. rar->cstate.window_buf[read_idx];
  2530. }
  2531. rar->cstate.write_ptr += len;
  2532. return ARCHIVE_OK;
  2533. }
  2534. static int do_uncompress_block(struct archive_read* a, const uint8_t* p) {
  2535. struct rar5* rar = get_context(a);
  2536. uint16_t num;
  2537. int ret;
  2538. const uint64_t cmask = rar->cstate.window_mask;
  2539. const struct compressed_block_header* hdr = &rar->last_block_hdr;
  2540. const uint8_t bit_size = 1 + bf_bit_size(hdr);
  2541. while(1) {
  2542. if(rar->cstate.write_ptr - rar->cstate.last_write_ptr >
  2543. (rar->cstate.window_size >> 1)) {
  2544. /* Don't allow growing data by more than half of the
  2545. * window size at a time. In such case, break the loop;
  2546. * next call to this function will continue processing
  2547. * from this moment. */
  2548. break;
  2549. }
  2550. if(rar->bits.in_addr > rar->cstate.cur_block_size - 1 ||
  2551. (rar->bits.in_addr == rar->cstate.cur_block_size - 1 &&
  2552. rar->bits.bit_addr >= bit_size))
  2553. {
  2554. /* If the program counter is here, it means the
  2555. * function has finished processing the block. */
  2556. rar->cstate.block_parsing_finished = 1;
  2557. break;
  2558. }
  2559. /* Decode the next literal. */
  2560. if(ARCHIVE_OK != decode_number(a, &rar->cstate.ld, p, &num)) {
  2561. return ARCHIVE_EOF;
  2562. }
  2563. /* Num holds a decompression literal, or 'command code'.
  2564. *
  2565. * - Values lower than 256 are just bytes. Those codes
  2566. * can be stored in the output buffer directly.
  2567. *
  2568. * - Code 256 defines a new filter, which is later used to
  2569. * ransform the data block accordingly to the filter type.
  2570. * The data block needs to be fully uncompressed first.
  2571. *
  2572. * - Code bigger than 257 and smaller than 262 define
  2573. * a repetition pattern that should be copied from
  2574. * an already uncompressed chunk of data.
  2575. */
  2576. if(num < 256) {
  2577. /* Directly store the byte. */
  2578. int64_t write_idx = rar->cstate.solid_offset +
  2579. rar->cstate.write_ptr++;
  2580. rar->cstate.window_buf[write_idx & cmask] =
  2581. (uint8_t) num;
  2582. continue;
  2583. } else if(num >= 262) {
  2584. uint16_t dist_slot;
  2585. int len = decode_code_length(a, rar, p, num - 262),
  2586. dbits,
  2587. dist = 1;
  2588. if(len == -1) {
  2589. archive_set_error(&a->archive,
  2590. ARCHIVE_ERRNO_PROGRAMMER,
  2591. "Failed to decode the code length");
  2592. return ARCHIVE_FATAL;
  2593. }
  2594. if(ARCHIVE_OK != decode_number(a, &rar->cstate.dd, p,
  2595. &dist_slot))
  2596. {
  2597. archive_set_error(&a->archive,
  2598. ARCHIVE_ERRNO_PROGRAMMER,
  2599. "Failed to decode the distance slot");
  2600. return ARCHIVE_FATAL;
  2601. }
  2602. if(dist_slot < 4) {
  2603. dbits = 0;
  2604. dist += dist_slot;
  2605. } else {
  2606. dbits = dist_slot / 2 - 1;
  2607. /* Cast to uint32_t will make sure the shift
  2608. * left operation won't produce undefined
  2609. * result. Then, the uint32_t type will
  2610. * be implicitly casted to int. */
  2611. dist += (uint32_t) (2 |
  2612. (dist_slot & 1)) << dbits;
  2613. }
  2614. if(dbits > 0) {
  2615. if(dbits >= 4) {
  2616. uint32_t add = 0;
  2617. uint16_t low_dist;
  2618. if(dbits > 4) {
  2619. if(ARCHIVE_OK != (ret = read_bits_32(
  2620. a, rar, p, &add))) {
  2621. /* Return EOF if we
  2622. * can't read more
  2623. * data. */
  2624. return ret;
  2625. }
  2626. skip_bits(rar, dbits - 4);
  2627. add = (add >> (
  2628. 36 - dbits)) << 4;
  2629. dist += add;
  2630. }
  2631. if(ARCHIVE_OK != decode_number(a,
  2632. &rar->cstate.ldd, p, &low_dist))
  2633. {
  2634. archive_set_error(&a->archive,
  2635. ARCHIVE_ERRNO_PROGRAMMER,
  2636. "Failed to decode the "
  2637. "distance slot");
  2638. return ARCHIVE_FATAL;
  2639. }
  2640. if(dist >= INT_MAX - low_dist - 1) {
  2641. /* This only happens in
  2642. * invalid archives. */
  2643. archive_set_error(&a->archive,
  2644. ARCHIVE_ERRNO_FILE_FORMAT,
  2645. "Distance pointer "
  2646. "overflow");
  2647. return ARCHIVE_FATAL;
  2648. }
  2649. dist += low_dist;
  2650. } else {
  2651. /* dbits is one of [0,1,2,3] */
  2652. int add;
  2653. if(ARCHIVE_OK != (ret = read_consume_bits(a, rar,
  2654. p, dbits, &add))) {
  2655. /* Return EOF if we can't read
  2656. * more data. */
  2657. return ret;
  2658. }
  2659. dist += add;
  2660. }
  2661. }
  2662. if(dist > 0x100) {
  2663. len++;
  2664. if(dist > 0x2000) {
  2665. len++;
  2666. if(dist > 0x40000) {
  2667. len++;
  2668. }
  2669. }
  2670. }
  2671. dist_cache_push(rar, dist);
  2672. rar->cstate.last_len = len;
  2673. if(ARCHIVE_OK != copy_string(a, len, dist))
  2674. return ARCHIVE_FATAL;
  2675. continue;
  2676. } else if(num == 256) {
  2677. /* Create a filter. */
  2678. ret = parse_filter(a, p);
  2679. if(ret != ARCHIVE_OK)
  2680. return ret;
  2681. continue;
  2682. } else if(num == 257) {
  2683. if(rar->cstate.last_len != 0) {
  2684. if(ARCHIVE_OK != copy_string(a,
  2685. rar->cstate.last_len,
  2686. rar->cstate.dist_cache[0]))
  2687. {
  2688. return ARCHIVE_FATAL;
  2689. }
  2690. }
  2691. continue;
  2692. } else {
  2693. /* num < 262 */
  2694. const int idx = num - 258;
  2695. const int dist = dist_cache_touch(rar, idx);
  2696. uint16_t len_slot;
  2697. int len;
  2698. if(ARCHIVE_OK != decode_number(a, &rar->cstate.rd, p,
  2699. &len_slot)) {
  2700. return ARCHIVE_FATAL;
  2701. }
  2702. len = decode_code_length(a, rar, p, len_slot);
  2703. if (len == -1) {
  2704. return ARCHIVE_FATAL;
  2705. }
  2706. rar->cstate.last_len = len;
  2707. if(ARCHIVE_OK != copy_string(a, len, dist))
  2708. return ARCHIVE_FATAL;
  2709. continue;
  2710. }
  2711. }
  2712. return ARCHIVE_OK;
  2713. }
  2714. /* Binary search for the RARv5 signature. */
  2715. static int scan_for_signature(struct archive_read* a) {
  2716. const uint8_t* p;
  2717. const int chunk_size = 512;
  2718. ssize_t i;
  2719. char signature[sizeof(rar5_signature_xor)];
  2720. /* If we're here, it means we're on an 'unknown territory' data.
  2721. * There's no indication what kind of data we're reading here.
  2722. * It could be some text comment, any kind of binary data,
  2723. * digital sign, dragons, etc.
  2724. *
  2725. * We want to find a valid RARv5 magic header inside this unknown
  2726. * data. */
  2727. /* Is it possible in libarchive to just skip everything until the
  2728. * end of the file? If so, it would be a better approach than the
  2729. * current implementation of this function. */
  2730. rar5_signature(signature);
  2731. while(1) {
  2732. if(!read_ahead(a, chunk_size, &p))
  2733. return ARCHIVE_EOF;
  2734. for(i = 0; i < chunk_size - (int)sizeof(rar5_signature_xor);
  2735. i++) {
  2736. if(memcmp(&p[i], signature,
  2737. sizeof(rar5_signature_xor)) == 0) {
  2738. /* Consume the number of bytes we've used to
  2739. * search for the signature, as well as the
  2740. * number of bytes used by the signature
  2741. * itself. After this we should be standing
  2742. * on a valid base block header. */
  2743. (void) consume(a,
  2744. i + sizeof(rar5_signature_xor));
  2745. return ARCHIVE_OK;
  2746. }
  2747. }
  2748. consume(a, chunk_size);
  2749. }
  2750. return ARCHIVE_FATAL;
  2751. }
  2752. /* This function will switch the multivolume archive file to another file,
  2753. * i.e. from part03 to part 04. */
  2754. static int advance_multivolume(struct archive_read* a) {
  2755. int lret;
  2756. struct rar5* rar = get_context(a);
  2757. /* A small state machine that will skip unnecessary data, needed to
  2758. * switch from one multivolume to another. Such skipping is needed if
  2759. * we want to be an stream-oriented (instead of file-oriented)
  2760. * unpacker.
  2761. *
  2762. * The state machine starts with `rar->main.endarc` == 0. It also
  2763. * assumes that current stream pointer points to some base block
  2764. * header.
  2765. *
  2766. * The `endarc` field is being set when the base block parsing
  2767. * function encounters the 'end of archive' marker.
  2768. */
  2769. while(1) {
  2770. if(rar->main.endarc == 1) {
  2771. int looping = 1;
  2772. rar->main.endarc = 0;
  2773. while(looping) {
  2774. lret = skip_base_block(a);
  2775. switch(lret) {
  2776. case ARCHIVE_RETRY:
  2777. /* Continue looping. */
  2778. break;
  2779. case ARCHIVE_OK:
  2780. /* Break loop. */
  2781. looping = 0;
  2782. break;
  2783. default:
  2784. /* Forward any errors to the
  2785. * caller. */
  2786. return lret;
  2787. }
  2788. }
  2789. break;
  2790. } else {
  2791. /* Skip current base block. In order to properly skip
  2792. * it, we really need to simply parse it and discard
  2793. * the results. */
  2794. lret = skip_base_block(a);
  2795. if(lret == ARCHIVE_FATAL || lret == ARCHIVE_FAILED)
  2796. return lret;
  2797. /* The `skip_base_block` function tells us if we
  2798. * should continue with skipping, or we should stop
  2799. * skipping. We're trying to skip everything up to
  2800. * a base FILE block. */
  2801. if(lret != ARCHIVE_RETRY) {
  2802. /* If there was an error during skipping, or we
  2803. * have just skipped a FILE base block... */
  2804. if(rar->main.endarc == 0) {
  2805. return lret;
  2806. } else {
  2807. continue;
  2808. }
  2809. }
  2810. }
  2811. }
  2812. return ARCHIVE_OK;
  2813. }
  2814. /* Merges the partial block from the first multivolume archive file, and
  2815. * partial block from the second multivolume archive file. The result is
  2816. * a chunk of memory containing the whole block, and the stream pointer
  2817. * is advanced to the next block in the second multivolume archive file. */
  2818. static int merge_block(struct archive_read* a, ssize_t block_size,
  2819. const uint8_t** p)
  2820. {
  2821. struct rar5* rar = get_context(a);
  2822. ssize_t cur_block_size, partial_offset = 0;
  2823. const uint8_t* lp;
  2824. int ret;
  2825. if(rar->merge_mode) {
  2826. archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
  2827. "Recursive merge is not allowed");
  2828. return ARCHIVE_FATAL;
  2829. }
  2830. /* Set a flag that we're in the switching mode. */
  2831. rar->cstate.switch_multivolume = 1;
  2832. /* Reallocate the memory which will hold the whole block. */
  2833. if(rar->vol.push_buf)
  2834. free((void*) rar->vol.push_buf);
  2835. /* Increasing the allocation block by 8 is due to bit reading functions,
  2836. * which are using additional 2 or 4 bytes. Allocating the block size
  2837. * by exact value would make bit reader perform reads from invalid
  2838. * memory block when reading the last byte from the buffer. */
  2839. rar->vol.push_buf = malloc(block_size + 8);
  2840. if(!rar->vol.push_buf) {
  2841. archive_set_error(&a->archive, ENOMEM,
  2842. "Can't allocate memory for a merge block buffer.");
  2843. return ARCHIVE_FATAL;
  2844. }
  2845. /* Valgrind complains if the extension block for bit reader is not
  2846. * initialized, so initialize it. */
  2847. memset(&rar->vol.push_buf[block_size], 0, 8);
  2848. /* A single block can span across multiple multivolume archive files,
  2849. * so we use a loop here. This loop will consume enough multivolume
  2850. * archive files until the whole block is read. */
  2851. while(1) {
  2852. /* Get the size of current block chunk in this multivolume
  2853. * archive file and read it. */
  2854. cur_block_size = rar5_min(rar->file.bytes_remaining,
  2855. block_size - partial_offset);
  2856. if(cur_block_size == 0) {
  2857. archive_set_error(&a->archive,
  2858. ARCHIVE_ERRNO_FILE_FORMAT,
  2859. "Encountered block size == 0 during block merge");
  2860. return ARCHIVE_FATAL;
  2861. }
  2862. if(!read_ahead(a, cur_block_size, &lp))
  2863. return ARCHIVE_EOF;
  2864. /* Sanity check; there should never be a situation where this
  2865. * function reads more data than the block's size. */
  2866. if(partial_offset + cur_block_size > block_size) {
  2867. archive_set_error(&a->archive,
  2868. ARCHIVE_ERRNO_PROGRAMMER,
  2869. "Consumed too much data when merging blocks.");
  2870. return ARCHIVE_FATAL;
  2871. }
  2872. /* Merge previous block chunk with current block chunk,
  2873. * or create first block chunk if this is our first
  2874. * iteration. */
  2875. memcpy(&rar->vol.push_buf[partial_offset], lp, cur_block_size);
  2876. /* Advance the stream read pointer by this block chunk size. */
  2877. if(ARCHIVE_OK != consume(a, cur_block_size))
  2878. return ARCHIVE_EOF;
  2879. /* Update the pointers. `partial_offset` contains information
  2880. * about the sum of merged block chunks. */
  2881. partial_offset += cur_block_size;
  2882. rar->file.bytes_remaining -= cur_block_size;
  2883. /* If `partial_offset` is the same as `block_size`, this means
  2884. * we've merged all block chunks and we have a valid full
  2885. * block. */
  2886. if(partial_offset == block_size) {
  2887. break;
  2888. }
  2889. /* If we don't have any bytes to read, this means we should
  2890. * switch to another multivolume archive file. */
  2891. if(rar->file.bytes_remaining == 0) {
  2892. rar->merge_mode++;
  2893. ret = advance_multivolume(a);
  2894. rar->merge_mode--;
  2895. if(ret != ARCHIVE_OK) {
  2896. return ret;
  2897. }
  2898. }
  2899. }
  2900. *p = rar->vol.push_buf;
  2901. /* If we're here, we can resume unpacking by processing the block
  2902. * pointed to by the `*p` memory pointer. */
  2903. return ARCHIVE_OK;
  2904. }
  2905. static int process_block(struct archive_read* a) {
  2906. const uint8_t* p;
  2907. struct rar5* rar = get_context(a);
  2908. int ret;
  2909. /* If we don't have any data to be processed, this most probably means
  2910. * we need to switch to the next volume. */
  2911. if(rar->main.volume && rar->file.bytes_remaining == 0) {
  2912. ret = advance_multivolume(a);
  2913. if(ret != ARCHIVE_OK)
  2914. return ret;
  2915. }
  2916. if(rar->cstate.block_parsing_finished) {
  2917. ssize_t block_size;
  2918. ssize_t to_skip;
  2919. ssize_t cur_block_size;
  2920. /* The header size won't be bigger than 6 bytes. */
  2921. if(!read_ahead(a, 6, &p)) {
  2922. /* Failed to prefetch data block header. */
  2923. return ARCHIVE_EOF;
  2924. }
  2925. /*
  2926. * Read block_size by parsing block header. Validate the header
  2927. * by calculating CRC byte stored inside the header. Size of
  2928. * the header is not constant (block size can be stored either
  2929. * in 1 or 2 bytes), that's why block size is left out from the
  2930. * `compressed_block_header` structure and returned by
  2931. * `parse_block_header` as the second argument. */
  2932. ret = parse_block_header(a, p, &block_size,
  2933. &rar->last_block_hdr);
  2934. if(ret != ARCHIVE_OK) {
  2935. return ret;
  2936. }
  2937. /* Skip block header. Next data is huffman tables,
  2938. * if present. */
  2939. to_skip = sizeof(struct compressed_block_header) +
  2940. bf_byte_count(&rar->last_block_hdr) + 1;
  2941. if(ARCHIVE_OK != consume(a, to_skip))
  2942. return ARCHIVE_EOF;
  2943. rar->file.bytes_remaining -= to_skip;
  2944. /* The block size gives information about the whole block size,
  2945. * but the block could be stored in split form when using
  2946. * multi-volume archives. In this case, the block size will be
  2947. * bigger than the actual data stored in this file. Remaining
  2948. * part of the data will be in another file. */
  2949. cur_block_size =
  2950. rar5_min(rar->file.bytes_remaining, block_size);
  2951. if(block_size > rar->file.bytes_remaining) {
  2952. /* If current blocks' size is bigger than our data
  2953. * size, this means we have a multivolume archive.
  2954. * In this case, skip all base headers until the end
  2955. * of the file, proceed to next "partXXX.rar" volume,
  2956. * find its signature, skip all headers up to the first
  2957. * FILE base header, and continue from there.
  2958. *
  2959. * Note that `merge_block` will update the `rar`
  2960. * context structure quite extensively. */
  2961. ret = merge_block(a, block_size, &p);
  2962. if(ret != ARCHIVE_OK) {
  2963. return ret;
  2964. }
  2965. cur_block_size = block_size;
  2966. /* Current stream pointer should be now directly
  2967. * *after* the block that spanned through multiple
  2968. * archive files. `p` pointer should have the data of
  2969. * the *whole* block (merged from partial blocks
  2970. * stored in multiple archives files). */
  2971. } else {
  2972. rar->cstate.switch_multivolume = 0;
  2973. /* Read the whole block size into memory. This can take
  2974. * up to 8 megabytes of memory in theoretical cases.
  2975. * Might be worth to optimize this and use a standard
  2976. * chunk of 4kb's. */
  2977. if(!read_ahead(a, 4 + cur_block_size, &p)) {
  2978. /* Failed to prefetch block data. */
  2979. return ARCHIVE_EOF;
  2980. }
  2981. }
  2982. rar->cstate.block_buf = p;
  2983. rar->cstate.cur_block_size = cur_block_size;
  2984. rar->cstate.block_parsing_finished = 0;
  2985. rar->bits.in_addr = 0;
  2986. rar->bits.bit_addr = 0;
  2987. if(bf_is_table_present(&rar->last_block_hdr)) {
  2988. /* Load Huffman tables. */
  2989. ret = parse_tables(a, rar, p);
  2990. if(ret != ARCHIVE_OK) {
  2991. /* Error during decompression of Huffman
  2992. * tables. */
  2993. return ret;
  2994. }
  2995. }
  2996. } else {
  2997. /* Block parsing not finished, reuse previous memory buffer. */
  2998. p = rar->cstate.block_buf;
  2999. }
  3000. /* Uncompress the block, or a part of it, depending on how many bytes
  3001. * will be generated by uncompressing the block.
  3002. *
  3003. * In case too many bytes will be generated, calling this function
  3004. * again will resume the uncompression operation. */
  3005. ret = do_uncompress_block(a, p);
  3006. if(ret != ARCHIVE_OK) {
  3007. return ret;
  3008. }
  3009. if(rar->cstate.block_parsing_finished &&
  3010. rar->cstate.switch_multivolume == 0 &&
  3011. rar->cstate.cur_block_size > 0)
  3012. {
  3013. /* If we're processing a normal block, consume the whole
  3014. * block. We can do this because we've already read the whole
  3015. * block to memory. */
  3016. if(ARCHIVE_OK != consume(a, rar->cstate.cur_block_size))
  3017. return ARCHIVE_FATAL;
  3018. rar->file.bytes_remaining -= rar->cstate.cur_block_size;
  3019. } else if(rar->cstate.switch_multivolume) {
  3020. /* Don't consume the block if we're doing multivolume
  3021. * processing. The volume switching function will consume
  3022. * the proper count of bytes instead. */
  3023. rar->cstate.switch_multivolume = 0;
  3024. }
  3025. return ARCHIVE_OK;
  3026. }
  3027. /* Pops the `buf`, `size` and `offset` from the "data ready" stack.
  3028. *
  3029. * Returns ARCHIVE_OK when those arguments can be used, ARCHIVE_RETRY
  3030. * when there is no data on the stack. */
  3031. static int use_data(struct rar5* rar, const void** buf, size_t* size,
  3032. int64_t* offset)
  3033. {
  3034. int i;
  3035. for(i = 0; i < rar5_countof(rar->cstate.dready); i++) {
  3036. struct data_ready *d = &rar->cstate.dready[i];
  3037. if(d->used) {
  3038. if(buf) *buf = d->buf;
  3039. if(size) *size = d->size;
  3040. if(offset) *offset = d->offset;
  3041. d->used = 0;
  3042. return ARCHIVE_OK;
  3043. }
  3044. }
  3045. return ARCHIVE_RETRY;
  3046. }
  3047. static void clear_data_ready_stack(struct rar5* rar) {
  3048. memset(&rar->cstate.dready, 0, sizeof(rar->cstate.dready));
  3049. }
  3050. /* Pushes the `buf`, `size` and `offset` arguments to the rar->cstate.dready
  3051. * FIFO stack. Those values will be popped from this stack by the `use_data`
  3052. * function. */
  3053. static int push_data_ready(struct archive_read* a, struct rar5* rar,
  3054. const uint8_t* buf, size_t size, int64_t offset)
  3055. {
  3056. int i;
  3057. /* Don't push if we're in skip mode. This is needed because solid
  3058. * streams need full processing even if we're skipping data. After
  3059. * fully processing the stream, we need to discard the generated bytes,
  3060. * because we're interested only in the side effect: building up the
  3061. * internal window circular buffer. This window buffer will be used
  3062. * later during unpacking of requested data. */
  3063. if(rar->skip_mode)
  3064. return ARCHIVE_OK;
  3065. /* Sanity check. */
  3066. if(offset != rar->file.last_offset + rar->file.last_size) {
  3067. archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
  3068. "Sanity check error: output stream is not continuous");
  3069. return ARCHIVE_FATAL;
  3070. }
  3071. for(i = 0; i < rar5_countof(rar->cstate.dready); i++) {
  3072. struct data_ready* d = &rar->cstate.dready[i];
  3073. if(!d->used) {
  3074. d->used = 1;
  3075. d->buf = buf;
  3076. d->size = size;
  3077. d->offset = offset;
  3078. /* These fields are used only in sanity checking. */
  3079. rar->file.last_offset = offset;
  3080. rar->file.last_size = size;
  3081. /* Calculate the checksum of this new block before
  3082. * submitting data to libarchive's engine. */
  3083. update_crc(rar, d->buf, d->size);
  3084. return ARCHIVE_OK;
  3085. }
  3086. }
  3087. /* Program counter will reach this code if the `rar->cstate.data_ready`
  3088. * stack will be filled up so that no new entries will be allowed. The
  3089. * code shouldn't allow such situation to occur. So we treat this case
  3090. * as an internal error. */
  3091. archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
  3092. "Error: premature end of data_ready stack");
  3093. return ARCHIVE_FATAL;
  3094. }
  3095. /* This function uncompresses the data that is stored in the <FILE> base
  3096. * block.
  3097. *
  3098. * The FILE base block looks like this:
  3099. *
  3100. * <header><huffman tables><block_1><block_2>...<block_n>
  3101. *
  3102. * The <header> is a block header, that is parsed in parse_block_header().
  3103. * It's a "compressed_block_header" structure, containing metadata needed
  3104. * to know when we should stop looking for more <block_n> blocks.
  3105. *
  3106. * <huffman tables> contain data needed to set up the huffman tables, needed
  3107. * for the actual decompression.
  3108. *
  3109. * Each <block_n> consists of series of literals:
  3110. *
  3111. * <literal><literal><literal>...<literal>
  3112. *
  3113. * Those literals generate the uncompression data. They operate on a circular
  3114. * buffer, sometimes writing raw data into it, sometimes referencing
  3115. * some previous data inside this buffer, and sometimes declaring a filter
  3116. * that will need to be executed on the data stored in the circular buffer.
  3117. * It all depends on the literal that is used.
  3118. *
  3119. * Sometimes blocks produce output data, sometimes they don't. For example, for
  3120. * some huge files that use lots of filters, sometimes a block is filled with
  3121. * only filter declaration literals. Such blocks won't produce any data in the
  3122. * circular buffer.
  3123. *
  3124. * Sometimes blocks will produce 4 bytes of data, and sometimes 1 megabyte,
  3125. * because a literal can reference previously decompressed data. For example,
  3126. * there can be a literal that says: 'append a byte 0xFE here', and after
  3127. * it another literal can say 'append 1 megabyte of data from circular buffer
  3128. * offset 0x12345'. This is how RAR format handles compressing repeated
  3129. * patterns.
  3130. *
  3131. * The RAR compressor creates those literals and the actual efficiency of
  3132. * compression depends on what those literals are. The literals can also
  3133. * be seen as a kind of a non-turing-complete virtual machine that simply
  3134. * tells the decompressor what it should do.
  3135. * */
  3136. static int do_uncompress_file(struct archive_read* a) {
  3137. struct rar5* rar = get_context(a);
  3138. int ret;
  3139. int64_t max_end_pos;
  3140. if(!rar->cstate.initialized) {
  3141. /* Don't perform full context reinitialization if we're
  3142. * processing a solid archive. */
  3143. if(!rar->main.solid || !rar->cstate.window_buf) {
  3144. init_unpack(rar);
  3145. }
  3146. rar->cstate.initialized = 1;
  3147. }
  3148. /* Don't allow extraction if window_size is invalid. */
  3149. if(rar->cstate.window_size == 0) {
  3150. archive_set_error(&a->archive,
  3151. ARCHIVE_ERRNO_FILE_FORMAT,
  3152. "Invalid window size declaration in this file");
  3153. /* This should never happen in valid files. */
  3154. return ARCHIVE_FATAL;
  3155. }
  3156. if(rar->cstate.all_filters_applied == 1) {
  3157. /* We use while(1) here, but standard case allows for just 1
  3158. * iteration. The loop will iterate if process_block() didn't
  3159. * generate any data at all. This can happen if the block
  3160. * contains only filter definitions (this is common in big
  3161. * files). */
  3162. while(1) {
  3163. ret = process_block(a);
  3164. if(ret == ARCHIVE_EOF || ret == ARCHIVE_FATAL)
  3165. return ret;
  3166. if(rar->cstate.last_write_ptr ==
  3167. rar->cstate.write_ptr) {
  3168. /* The block didn't generate any new data,
  3169. * so just process a new block if this one
  3170. * wasn't the last block in the file. */
  3171. if (bf_is_last_block(&rar->last_block_hdr)) {
  3172. return ARCHIVE_EOF;
  3173. }
  3174. continue;
  3175. }
  3176. /* The block has generated some new data, so break
  3177. * the loop. */
  3178. break;
  3179. }
  3180. }
  3181. /* Try to run filters. If filters won't be applied, it means that
  3182. * insufficient data was generated. */
  3183. ret = apply_filters(a);
  3184. if(ret == ARCHIVE_RETRY) {
  3185. return ARCHIVE_OK;
  3186. } else if(ret == ARCHIVE_FATAL) {
  3187. return ARCHIVE_FATAL;
  3188. }
  3189. /* If apply_filters() will return ARCHIVE_OK, we can continue here. */
  3190. if(cdeque_size(&rar->cstate.filters) > 0) {
  3191. /* Check if we can write something before hitting first
  3192. * filter. */
  3193. struct filter_info* flt;
  3194. /* Get the block_start offset from the first filter. */
  3195. if(CDE_OK != cdeque_front(&rar->cstate.filters,
  3196. cdeque_filter_p(&flt)))
  3197. {
  3198. archive_set_error(&a->archive,
  3199. ARCHIVE_ERRNO_PROGRAMMER,
  3200. "Can't read first filter");
  3201. return ARCHIVE_FATAL;
  3202. }
  3203. max_end_pos = rar5_min(flt->block_start,
  3204. rar->cstate.write_ptr);
  3205. } else {
  3206. /* There are no filters defined, or all filters were applied.
  3207. * This means we can just store the data without any
  3208. * postprocessing. */
  3209. max_end_pos = rar->cstate.write_ptr;
  3210. }
  3211. if(max_end_pos == rar->cstate.last_write_ptr) {
  3212. /* We can't write anything yet. The block uncompression
  3213. * function did not generate enough data, and no filter can be
  3214. * applied. At the same time we don't have any data that can be
  3215. * stored without filter postprocessing. This means we need to
  3216. * wait for more data to be generated, so we can apply the
  3217. * filters.
  3218. *
  3219. * Signal the caller that we need more data to be able to do
  3220. * anything.
  3221. */
  3222. return ARCHIVE_RETRY;
  3223. } else {
  3224. /* We can write the data before hitting the first filter.
  3225. * So let's do it. The push_window_data() function will
  3226. * effectively return the selected data block to the user
  3227. * application. */
  3228. push_window_data(a, rar, rar->cstate.last_write_ptr,
  3229. max_end_pos);
  3230. rar->cstate.last_write_ptr = max_end_pos;
  3231. }
  3232. return ARCHIVE_OK;
  3233. }
  3234. static int uncompress_file(struct archive_read* a) {
  3235. int ret;
  3236. while(1) {
  3237. /* Sometimes the uncompression function will return a
  3238. * 'retry' signal. If this will happen, we have to retry
  3239. * the function. */
  3240. ret = do_uncompress_file(a);
  3241. if(ret != ARCHIVE_RETRY)
  3242. return ret;
  3243. }
  3244. }
  3245. static int do_unstore_file(struct archive_read* a,
  3246. struct rar5* rar, const void** buf, size_t* size, int64_t* offset)
  3247. {
  3248. size_t to_read;
  3249. const uint8_t* p;
  3250. if(rar->file.bytes_remaining == 0 && rar->main.volume > 0 &&
  3251. rar->generic.split_after > 0)
  3252. {
  3253. int ret;
  3254. rar->cstate.switch_multivolume = 1;
  3255. ret = advance_multivolume(a);
  3256. rar->cstate.switch_multivolume = 0;
  3257. if(ret != ARCHIVE_OK) {
  3258. /* Failed to advance to next multivolume archive
  3259. * file. */
  3260. return ret;
  3261. }
  3262. }
  3263. to_read = rar5_min(rar->file.bytes_remaining, 64 * 1024);
  3264. if(to_read == 0) {
  3265. return ARCHIVE_EOF;
  3266. }
  3267. if(!read_ahead(a, to_read, &p)) {
  3268. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  3269. "I/O error when unstoring file");
  3270. return ARCHIVE_FATAL;
  3271. }
  3272. if(ARCHIVE_OK != consume(a, to_read)) {
  3273. return ARCHIVE_EOF;
  3274. }
  3275. if(buf) *buf = p;
  3276. if(size) *size = to_read;
  3277. if(offset) *offset = rar->cstate.last_unstore_ptr;
  3278. rar->file.bytes_remaining -= to_read;
  3279. rar->cstate.last_unstore_ptr += to_read;
  3280. update_crc(rar, p, to_read);
  3281. return ARCHIVE_OK;
  3282. }
  3283. static int do_unpack(struct archive_read* a, struct rar5* rar,
  3284. const void** buf, size_t* size, int64_t* offset)
  3285. {
  3286. enum COMPRESSION_METHOD {
  3287. STORE = 0, FASTEST = 1, FAST = 2, NORMAL = 3, GOOD = 4,
  3288. BEST = 5
  3289. };
  3290. if(rar->file.service > 0) {
  3291. return do_unstore_file(a, rar, buf, size, offset);
  3292. } else {
  3293. switch(rar->cstate.method) {
  3294. case STORE:
  3295. return do_unstore_file(a, rar, buf, size,
  3296. offset);
  3297. case FASTEST:
  3298. /* fallthrough */
  3299. case FAST:
  3300. /* fallthrough */
  3301. case NORMAL:
  3302. /* fallthrough */
  3303. case GOOD:
  3304. /* fallthrough */
  3305. case BEST:
  3306. /* No data is returned here. But because a sparse-file aware
  3307. * caller (like archive_read_data_into_fd) may treat zero-size
  3308. * as a sparse file block, we need to update the offset
  3309. * accordingly. At this point the decoder doesn't have any
  3310. * pending uncompressed data blocks, so the current position in
  3311. * the output file should be last_write_ptr. */
  3312. if (offset) *offset = rar->cstate.last_write_ptr;
  3313. return uncompress_file(a);
  3314. default:
  3315. archive_set_error(&a->archive,
  3316. ARCHIVE_ERRNO_FILE_FORMAT,
  3317. "Compression method not supported: 0x%x",
  3318. (unsigned int)rar->cstate.method);
  3319. return ARCHIVE_FATAL;
  3320. }
  3321. }
  3322. #if !defined WIN32
  3323. /* Not reached. */
  3324. return ARCHIVE_OK;
  3325. #endif
  3326. }
  3327. static int verify_checksums(struct archive_read* a) {
  3328. int verify_crc;
  3329. struct rar5* rar = get_context(a);
  3330. /* Check checksums only when actually unpacking the data. There's no
  3331. * need to calculate checksum when we're skipping data in solid archives
  3332. * (skipping in solid archives is the same thing as unpacking compressed
  3333. * data and discarding the result). */
  3334. if(!rar->skip_mode) {
  3335. /* Always check checksums if we're not in skip mode */
  3336. verify_crc = 1;
  3337. } else {
  3338. /* We can override the logic above with a compile-time option
  3339. * NO_CRC_ON_SOLID_SKIP. This option is used during debugging,
  3340. * and it will check checksums of unpacked data even when
  3341. * we're skipping it. */
  3342. #if defined CHECK_CRC_ON_SOLID_SKIP
  3343. /* Debug case */
  3344. verify_crc = 1;
  3345. #else
  3346. /* Normal case */
  3347. verify_crc = 0;
  3348. #endif
  3349. }
  3350. if(verify_crc) {
  3351. /* During unpacking, on each unpacked block we're calling the
  3352. * update_crc() function. Since we are here, the unpacking
  3353. * process is already over and we can check if calculated
  3354. * checksum (CRC32 or BLAKE2sp) is the same as what is stored
  3355. * in the archive. */
  3356. if(rar->file.stored_crc32 > 0) {
  3357. /* Check CRC32 only when the file contains a CRC32
  3358. * value for this file. */
  3359. if(rar->file.calculated_crc32 !=
  3360. rar->file.stored_crc32) {
  3361. /* Checksums do not match; the unpacked file
  3362. * is corrupted. */
  3363. DEBUG_CODE {
  3364. printf("Checksum error: CRC32 "
  3365. "(was: %08" PRIx32 ", expected: %08" PRIx32 ")\n",
  3366. rar->file.calculated_crc32,
  3367. rar->file.stored_crc32);
  3368. }
  3369. #ifndef DONT_FAIL_ON_CRC_ERROR
  3370. archive_set_error(&a->archive,
  3371. ARCHIVE_ERRNO_FILE_FORMAT,
  3372. "Checksum error: CRC32");
  3373. return ARCHIVE_FATAL;
  3374. #endif
  3375. } else {
  3376. DEBUG_CODE {
  3377. printf("Checksum OK: CRC32 "
  3378. "(%08" PRIx32 "/%08" PRIx32 ")\n",
  3379. rar->file.stored_crc32,
  3380. rar->file.calculated_crc32);
  3381. }
  3382. }
  3383. }
  3384. if(rar->file.has_blake2 > 0) {
  3385. /* BLAKE2sp is an optional checksum algorithm that is
  3386. * added to RARv5 archives when using the `-htb` switch
  3387. * during creation of archive.
  3388. *
  3389. * We now finalize the hash calculation by calling the
  3390. * `final` function. This will generate the final hash
  3391. * value we can use to compare it with the BLAKE2sp
  3392. * checksum that is stored in the archive.
  3393. *
  3394. * The return value of this `final` function is not
  3395. * very helpful, as it guards only against improper use.
  3396. * This is why we're explicitly ignoring it. */
  3397. uint8_t b2_buf[32];
  3398. (void) blake2sp_final(&rar->file.b2state, b2_buf, 32);
  3399. if(memcmp(&rar->file.blake2sp, b2_buf, 32) != 0) {
  3400. #ifndef DONT_FAIL_ON_CRC_ERROR
  3401. archive_set_error(&a->archive,
  3402. ARCHIVE_ERRNO_FILE_FORMAT,
  3403. "Checksum error: BLAKE2");
  3404. return ARCHIVE_FATAL;
  3405. #endif
  3406. }
  3407. }
  3408. }
  3409. /* Finalization for this file has been successfully completed. */
  3410. return ARCHIVE_OK;
  3411. }
  3412. static int verify_global_checksums(struct archive_read* a) {
  3413. return verify_checksums(a);
  3414. }
  3415. /*
  3416. * Decryption function for the magic signature pattern. Check the comment near
  3417. * the `rar5_signature_xor` symbol to read the rationale behind this.
  3418. */
  3419. static void rar5_signature(char *buf) {
  3420. size_t i;
  3421. for(i = 0; i < sizeof(rar5_signature_xor); i++) {
  3422. buf[i] = rar5_signature_xor[i] ^ 0xA1;
  3423. }
  3424. }
  3425. static int rar5_read_data(struct archive_read *a, const void **buff,
  3426. size_t *size, int64_t *offset) {
  3427. int ret;
  3428. struct rar5* rar = get_context(a);
  3429. if (size)
  3430. *size = 0;
  3431. if (rar->has_encrypted_entries == ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW) {
  3432. rar->has_encrypted_entries = 0;
  3433. }
  3434. if (rar->headers_are_encrypted || rar->cstate.data_encrypted) {
  3435. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  3436. "Reading encrypted data is not currently supported");
  3437. return ARCHIVE_FATAL;
  3438. }
  3439. if(rar->file.dir > 0) {
  3440. /* Don't process any data if this file entry was declared
  3441. * as a directory. This is needed, because entries marked as
  3442. * directory doesn't have any dictionary buffer allocated, so
  3443. * it's impossible to perform any decompression. */
  3444. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  3445. "Can't decompress an entry marked as a directory");
  3446. return ARCHIVE_FAILED;
  3447. }
  3448. if(!rar->skip_mode && (rar->cstate.last_write_ptr > rar->file.unpacked_size)) {
  3449. archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
  3450. "Unpacker has written too many bytes");
  3451. return ARCHIVE_FATAL;
  3452. }
  3453. ret = use_data(rar, buff, size, offset);
  3454. if(ret == ARCHIVE_OK) {
  3455. return ret;
  3456. }
  3457. if(rar->file.eof == 1) {
  3458. return ARCHIVE_EOF;
  3459. }
  3460. ret = do_unpack(a, rar, buff, size, offset);
  3461. if(ret != ARCHIVE_OK) {
  3462. return ret;
  3463. }
  3464. if(rar->file.bytes_remaining == 0 &&
  3465. rar->cstate.last_write_ptr == rar->file.unpacked_size)
  3466. {
  3467. /* If all bytes of current file were processed, run
  3468. * finalization.
  3469. *
  3470. * Finalization will check checksum against proper values. If
  3471. * some of the checksums will not match, we'll return an error
  3472. * value in the last `archive_read_data` call to signal an error
  3473. * to the user. */
  3474. rar->file.eof = 1;
  3475. return verify_global_checksums(a);
  3476. }
  3477. return ARCHIVE_OK;
  3478. }
  3479. static int rar5_read_data_skip(struct archive_read *a) {
  3480. struct rar5* rar = get_context(a);
  3481. if(rar->main.solid && (rar->cstate.data_encrypted == 0)) {
  3482. /* In solid archives, instead of skipping the data, we need to
  3483. * extract it, and dispose the result. The side effect of this
  3484. * operation will be setting up the initial window buffer state
  3485. * needed to be able to extract the selected file. Note that
  3486. * this is only possible when data withing this solid block is
  3487. * not encrypted, in which case we'll skip and fail if the user
  3488. * tries to read data. */
  3489. int ret;
  3490. /* Make sure to process all blocks in the compressed stream. */
  3491. while(rar->file.bytes_remaining > 0) {
  3492. /* Setting the "skip mode" will allow us to skip
  3493. * checksum checks during data skipping. Checking the
  3494. * checksum of skipped data isn't really necessary and
  3495. * it's only slowing things down.
  3496. *
  3497. * This is incremented instead of setting to 1 because
  3498. * this data skipping function can be called
  3499. * recursively. */
  3500. rar->skip_mode++;
  3501. /* We're disposing 1 block of data, so we use triple
  3502. * NULLs in arguments. */
  3503. ret = rar5_read_data(a, NULL, NULL, NULL);
  3504. /* Turn off "skip mode". */
  3505. rar->skip_mode--;
  3506. if(ret < 0 || ret == ARCHIVE_EOF) {
  3507. /* Propagate any potential error conditions
  3508. * to the caller. */
  3509. return ret;
  3510. }
  3511. }
  3512. } else {
  3513. /* In standard archives, we can just jump over the compressed
  3514. * stream. Each file in non-solid archives starts from an empty
  3515. * window buffer. */
  3516. if(ARCHIVE_OK != consume(a, rar->file.bytes_remaining)) {
  3517. return ARCHIVE_FATAL;
  3518. }
  3519. rar->file.bytes_remaining = 0;
  3520. }
  3521. return ARCHIVE_OK;
  3522. }
  3523. static int64_t rar5_seek_data(struct archive_read *a, int64_t offset,
  3524. int whence)
  3525. {
  3526. (void) a;
  3527. (void) offset;
  3528. (void) whence;
  3529. /* We're a streaming unpacker, and we don't support seeking. */
  3530. return ARCHIVE_FATAL;
  3531. }
  3532. static int rar5_cleanup(struct archive_read *a) {
  3533. struct rar5* rar = get_context(a);
  3534. free(rar->cstate.window_buf);
  3535. free(rar->cstate.filtered_buf);
  3536. clear_data_ready_stack(rar);
  3537. free(rar->vol.push_buf);
  3538. free_filters(rar);
  3539. cdeque_free(&rar->cstate.filters);
  3540. free(rar);
  3541. a->format->data = NULL;
  3542. return ARCHIVE_OK;
  3543. }
  3544. static int rar5_capabilities(struct archive_read * a) {
  3545. (void) a;
  3546. return (ARCHIVE_READ_FORMAT_CAPS_ENCRYPT_DATA
  3547. | ARCHIVE_READ_FORMAT_CAPS_ENCRYPT_METADATA);
  3548. }
  3549. static int rar5_has_encrypted_entries(struct archive_read *_a) {
  3550. if (_a && _a->format) {
  3551. struct rar5 *rar = (struct rar5 *)_a->format->data;
  3552. if (rar) {
  3553. return rar->has_encrypted_entries;
  3554. }
  3555. }
  3556. return ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW;
  3557. }
  3558. static int rar5_init(struct rar5* rar) {
  3559. memset(rar, 0, sizeof(struct rar5));
  3560. if(CDE_OK != cdeque_init(&rar->cstate.filters, 8192))
  3561. return ARCHIVE_FATAL;
  3562. /*
  3563. * Until enough data has been read, we cannot tell about
  3564. * any encrypted entries yet.
  3565. */
  3566. rar->has_encrypted_entries = ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW;
  3567. return ARCHIVE_OK;
  3568. }
  3569. int archive_read_support_format_rar5(struct archive *_a) {
  3570. struct archive_read* ar;
  3571. int ret;
  3572. struct rar5* rar;
  3573. if(ARCHIVE_OK != (ret = get_archive_read(_a, &ar)))
  3574. return ret;
  3575. rar = malloc(sizeof(*rar));
  3576. if(rar == NULL) {
  3577. archive_set_error(&ar->archive, ENOMEM,
  3578. "Can't allocate rar5 data");
  3579. return ARCHIVE_FATAL;
  3580. }
  3581. if(ARCHIVE_OK != rar5_init(rar)) {
  3582. archive_set_error(&ar->archive, ENOMEM,
  3583. "Can't allocate rar5 filter buffer");
  3584. free(rar);
  3585. return ARCHIVE_FATAL;
  3586. }
  3587. ret = __archive_read_register_format(ar,
  3588. rar,
  3589. "rar5",
  3590. rar5_bid,
  3591. rar5_options,
  3592. rar5_read_header,
  3593. rar5_read_data,
  3594. rar5_read_data_skip,
  3595. rar5_seek_data,
  3596. rar5_cleanup,
  3597. rar5_capabilities,
  3598. rar5_has_encrypted_entries);
  3599. if(ret != ARCHIVE_OK) {
  3600. (void) rar5_cleanup(ar);
  3601. }
  3602. return ret;
  3603. }