| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783 |
- <?xml version="1.0" encoding="utf-8"?>
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
- <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
- <head>
- <!--
- __ __ _
- ___\ \/ /_ __ __ _| |_
- / _ \\ /| '_ \ / _` | __|
- | __// \| |_) | (_| | |_
- \___/_/\_\ .__/ \__,_|\__|
- |_| XML parser
- Copyright (c) 2000 Clark Cooper <[email protected]>
- Copyright (c) 2000-2004 Fred L. Drake, Jr. <[email protected]>
- Copyright (c) 2002-2012 Karl Waclawek <[email protected]>
- Copyright (c) 2017-2026 Sebastian Pipping <[email protected]>
- Copyright (c) 2017 Jakub Wilk <[email protected]>
- Copyright (c) 2021 Tomas Korbar <[email protected]>
- Copyright (c) 2021 Nicolas Cavallari <[email protected]>
- Copyright (c) 2022 Thijs Schreijer <[email protected]>
- Copyright (c) 2023-2025 Hanno Böck <[email protected]>
- Copyright (c) 2023 Sony Corporation / Snild Dolkow <[email protected]>
- Licensed under the MIT license:
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to permit
- persons to whom the Software is furnished to do so, subject to the
- following conditions:
- The above copyright notice and this permission notice shall be included
- in all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
- NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
- DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- USE OR OTHER DEALINGS IN THE SOFTWARE.
- -->
- <title>
- Expat XML Parser
- </title>
- <meta name="author" content="Clark Cooper, [email protected]" />
- <link href="ok.min.css" rel="stylesheet" />
- <link href="style.css" rel="stylesheet" />
- </head>
- <body>
- <div>
- <h1>
- The Expat XML Parser <small>Release 2.7.5</small>
- </h1>
- </div>
- <div class="content">
- <p>
- Expat is a library, written in C, for parsing XML documents. It's the underlying
- XML parser for the open source Mozilla project, Perl's <code>XML::Parser</code>,
- Python's <code>xml.parsers.expat</code>, and other open-source XML parsers.
- </p>
- <p>
- This library is the creation of James Clark, who's also given us groff (an nroff
- look-alike), Jade (an implementation of ISO's DSSSL stylesheet language for
- SGML), XP (a Java XML parser package), XT (a Java XSL engine). James was also the
- technical lead on the XML Working Group at W3C that produced the XML
- specification.
- </p>
- <p>
- This is free software, licensed under the <a href="../COPYING">MIT/X Consortium
- license</a>. You may download it from <a href="https://libexpat.github.io/">the
- Expat home page</a>.
- </p>
- <p>
- The bulk of this document was originally commissioned as an article by <a href=
- "https://www.xml.com/">XML.com</a>. They graciously allowed Clark Cooper to
- retain copyright and to distribute it with Expat. This version has been
- substantially extended to include documentation on features which have been added
- since the original article was published, and additional information on using the
- original interface.
- </p>
- <hr />
- <h2>
- Table of Contents
- </h2>
- <ul>
- <li>
- <a href="#overview">Overview</a>
- </li>
- <li>
- <a href="#building">Building and Installing</a>
- </li>
- <li>
- <a href="#using">Using Expat</a>
- </li>
- <li>
- <a href="#reference">Reference</a>
- <ul>
- <li>
- <a href="#creation">Parser Creation Functions</a>
- <ul>
- <li>
- <a href="#XML_ParserCreate">XML_ParserCreate</a>
- </li>
- <li>
- <a href="#XML_ParserCreateNS">XML_ParserCreateNS</a>
- </li>
- <li>
- <a href="#XML_ParserCreate_MM">XML_ParserCreate_MM</a>
- </li>
- <li>
- <a href=
- "#XML_ExternalEntityParserCreate">XML_ExternalEntityParserCreate</a>
- </li>
- <li>
- <a href="#XML_ParserFree">XML_ParserFree</a>
- </li>
- <li>
- <a href="#XML_ParserReset">XML_ParserReset</a>
- </li>
- </ul>
- </li>
- <li>
- <a href="#parsing">Parsing Functions</a>
- <ul>
- <li>
- <a href="#XML_Parse">XML_Parse</a>
- </li>
- <li>
- <a href="#XML_ParseBuffer">XML_ParseBuffer</a>
- </li>
- <li>
- <a href="#XML_GetBuffer">XML_GetBuffer</a>
- </li>
- <li>
- <a href="#XML_StopParser">XML_StopParser</a>
- </li>
- <li>
- <a href="#XML_ResumeParser">XML_ResumeParser</a>
- </li>
- <li>
- <a href="#XML_GetParsingStatus">XML_GetParsingStatus</a>
- </li>
- </ul>
- </li>
- <li>
- <a href="#setting">Handler Setting Functions</a>
- <ul>
- <li>
- <a href="#XML_SetStartElementHandler">XML_SetStartElementHandler</a>
- </li>
- <li>
- <a href="#XML_SetEndElementHandler">XML_SetEndElementHandler</a>
- </li>
- <li>
- <a href="#XML_SetElementHandler">XML_SetElementHandler</a>
- </li>
- <li>
- <a href="#XML_SetCharacterDataHandler">XML_SetCharacterDataHandler</a>
- </li>
- <li>
- <a href=
- "#XML_SetProcessingInstructionHandler">XML_SetProcessingInstructionHandler</a>
- </li>
- <li>
- <a href="#XML_SetCommentHandler">XML_SetCommentHandler</a>
- </li>
- <li>
- <a href=
- "#XML_SetStartCdataSectionHandler">XML_SetStartCdataSectionHandler</a>
- </li>
- <li>
- <a href=
- "#XML_SetEndCdataSectionHandler">XML_SetEndCdataSectionHandler</a>
- </li>
- <li>
- <a href="#XML_SetCdataSectionHandler">XML_SetCdataSectionHandler</a>
- </li>
- <li>
- <a href="#XML_SetDefaultHandler">XML_SetDefaultHandler</a>
- </li>
- <li>
- <a href="#XML_SetDefaultHandlerExpand">XML_SetDefaultHandlerExpand</a>
- </li>
- <li>
- <a href=
- "#XML_SetExternalEntityRefHandler">XML_SetExternalEntityRefHandler</a>
- </li>
- <li>
- <a href=
- "#XML_SetExternalEntityRefHandlerArg">XML_SetExternalEntityRefHandlerArg</a>
- </li>
- <li>
- <a href="#XML_SetSkippedEntityHandler">XML_SetSkippedEntityHandler</a>
- </li>
- <li>
- <a href=
- "#XML_SetUnknownEncodingHandler">XML_SetUnknownEncodingHandler</a>
- </li>
- <li>
- <a href=
- "#XML_SetStartNamespaceDeclHandler">XML_SetStartNamespaceDeclHandler</a>
- </li>
- <li>
- <a href=
- "#XML_SetEndNamespaceDeclHandler">XML_SetEndNamespaceDeclHandler</a>
- </li>
- <li>
- <a href="#XML_SetNamespaceDeclHandler">XML_SetNamespaceDeclHandler</a>
- </li>
- <li>
- <a href="#XML_SetXmlDeclHandler">XML_SetXmlDeclHandler</a>
- </li>
- <li>
- <a href=
- "#XML_SetStartDoctypeDeclHandler">XML_SetStartDoctypeDeclHandler</a>
- </li>
- <li>
- <a href=
- "#XML_SetEndDoctypeDeclHandler">XML_SetEndDoctypeDeclHandler</a>
- </li>
- <li>
- <a href="#XML_SetDoctypeDeclHandler">XML_SetDoctypeDeclHandler</a>
- </li>
- <li>
- <a href="#XML_SetElementDeclHandler">XML_SetElementDeclHandler</a>
- </li>
- <li>
- <a href="#XML_SetAttlistDeclHandler">XML_SetAttlistDeclHandler</a>
- </li>
- <li>
- <a href="#XML_SetEntityDeclHandler">XML_SetEntityDeclHandler</a>
- </li>
- <li>
- <a href=
- "#XML_SetUnparsedEntityDeclHandler">XML_SetUnparsedEntityDeclHandler</a>
- </li>
- <li>
- <a href="#XML_SetNotationDeclHandler">XML_SetNotationDeclHandler</a>
- </li>
- <li>
- <a href="#XML_SetNotStandaloneHandler">XML_SetNotStandaloneHandler</a>
- </li>
- </ul>
- </li>
- <li>
- <a href="#position">Parse Position and Error Reporting Functions</a>
- <ul>
- <li>
- <a href="#XML_GetErrorCode">XML_GetErrorCode</a>
- </li>
- <li>
- <a href="#XML_ErrorString">XML_ErrorString</a>
- </li>
- <li>
- <a href="#XML_GetCurrentByteIndex">XML_GetCurrentByteIndex</a>
- </li>
- <li>
- <a href="#XML_GetCurrentLineNumber">XML_GetCurrentLineNumber</a>
- </li>
- <li>
- <a href="#XML_GetCurrentColumnNumber">XML_GetCurrentColumnNumber</a>
- </li>
- <li>
- <a href="#XML_GetCurrentByteCount">XML_GetCurrentByteCount</a>
- </li>
- <li>
- <a href="#XML_GetInputContext">XML_GetInputContext</a>
- </li>
- </ul>
- </li>
- <li>
- <a href="#attack-protection">Attack Protection</a>
- <ul>
- <li>
- <a href=
- "#XML_SetBillionLaughsAttackProtectionMaximumAmplification">XML_SetBillionLaughsAttackProtectionMaximumAmplification</a>
- </li>
- <li>
- <a href=
- "#XML_SetBillionLaughsAttackProtectionActivationThreshold">XML_SetBillionLaughsAttackProtectionActivationThreshold</a>
- </li>
- <li>
- <a href=
- "#XML_SetAllocTrackerMaximumAmplification">XML_SetAllocTrackerMaximumAmplification</a>
- </li>
- <li>
- <a href=
- "#XML_SetAllocTrackerActivationThreshold">XML_SetAllocTrackerActivationThreshold</a>
- </li>
- <li>
- <a href=
- "#XML_SetReparseDeferralEnabled">XML_SetReparseDeferralEnabled</a>
- </li>
- </ul>
- </li>
- <li>
- <a href="#miscellaneous">Miscellaneous Functions</a>
- <ul>
- <li>
- <a href="#XML_SetUserData">XML_SetUserData</a>
- </li>
- <li>
- <a href="#XML_GetUserData">XML_GetUserData</a>
- </li>
- <li>
- <a href="#XML_UseParserAsHandlerArg">XML_UseParserAsHandlerArg</a>
- </li>
- <li>
- <a href="#XML_SetBase">XML_SetBase</a>
- </li>
- <li>
- <a href="#XML_GetBase">XML_GetBase</a>
- </li>
- <li>
- <a href=
- "#XML_GetSpecifiedAttributeCount">XML_GetSpecifiedAttributeCount</a>
- </li>
- <li>
- <a href="#XML_GetIdAttributeIndex">XML_GetIdAttributeIndex</a>
- </li>
- <li>
- <a href="#XML_GetAttributeInfo">XML_GetAttributeInfo</a>
- </li>
- <li>
- <a href="#XML_SetEncoding">XML_SetEncoding</a>
- </li>
- <li>
- <a href="#XML_SetParamEntityParsing">XML_SetParamEntityParsing</a>
- </li>
- <li>
- <a href="#XML_SetHashSalt">XML_SetHashSalt</a>
- </li>
- <li>
- <a href="#XML_UseForeignDTD">XML_UseForeignDTD</a>
- </li>
- <li>
- <a href="#XML_SetReturnNSTriplet">XML_SetReturnNSTriplet</a>
- </li>
- <li>
- <a href="#XML_DefaultCurrent">XML_DefaultCurrent</a>
- </li>
- <li>
- <a href="#XML_ExpatVersion">XML_ExpatVersion</a>
- </li>
- <li>
- <a href="#XML_ExpatVersionInfo">XML_ExpatVersionInfo</a>
- </li>
- <li>
- <a href="#XML_GetFeatureList">XML_GetFeatureList</a>
- </li>
- <li>
- <a href="#XML_FreeContentModel">XML_FreeContentModel</a>
- </li>
- <li>
- <a href="#XML_MemMalloc">XML_MemMalloc</a>
- </li>
- <li>
- <a href="#XML_MemRealloc">XML_MemRealloc</a>
- </li>
- <li>
- <a href="#XML_MemFree">XML_MemFree</a>
- </li>
- </ul>
- </li>
- </ul>
- </li>
- </ul>
- <hr />
- <h2>
- <a id="overview" name="overview">Overview</a>
- </h2>
- <p>
- Expat is a stream-oriented parser. You register callback (or handler) functions
- with the parser and then start feeding it the document. As the parser recognizes
- parts of the document, it will call the appropriate handler for that part (if
- you've registered one.) The document is fed to the parser in pieces, so you can
- start parsing before you have all the document. This also allows you to parse
- really huge documents that won't fit into memory.
- </p>
- <p>
- Expat can be intimidating due to the many kinds of handlers and options you can
- set. But you only need to learn four functions in order to do 90% of what you'll
- want to do with it:
- </p>
- <dl>
- <dt>
- <code><a href="#XML_ParserCreate">XML_ParserCreate</a></code>
- </dt>
- <dd>
- Create a new parser object.
- </dd>
- <dt>
- <code><a href="#XML_SetElementHandler">XML_SetElementHandler</a></code>
- </dt>
- <dd>
- Set handlers for start and end tags.
- </dd>
- <dt>
- <code><a href=
- "#XML_SetCharacterDataHandler">XML_SetCharacterDataHandler</a></code>
- </dt>
- <dd>
- Set handler for text.
- </dd>
- <dt>
- <code><a href="#XML_Parse">XML_Parse</a></code>
- </dt>
- <dd>
- Pass a buffer full of document to the parser
- </dd>
- </dl>
- <p>
- These functions and others are described in the <a href=
- "#reference">reference</a> part of this document. The reference section also
- describes in detail the parameters passed to the different types of handlers.
- </p>
- <p>
- Let's look at a very simple example program that only uses 3 of the above
- functions (it doesn't need to set a character handler.) The program <a href=
- "../examples/outline.c">outline.c</a> prints an element outline, indenting child
- elements to distinguish them from the parent element that contains them. The
- start handler does all the work. It prints two indenting spaces for every level
- of ancestor elements, then it prints the element and attribute information.
- Finally it increments the global <code>Depth</code> variable.
- </p>
- <pre class="eg">
- int Depth;
- void XMLCALL
- start(void *data, const char *el, const char **attr) {
- int i;
- for (i = 0; i < Depth; i++)
- printf(" ");
- printf("%s", el);
- for (i = 0; attr[i]; i += 2) {
- printf(" %s='%s'", attr[i], attr[i + 1]);
- }
- printf("\n");
- Depth++;
- } /* End of start handler */
- </pre>
- <p>
- The end tag simply does the bookkeeping work of decrementing <code>Depth</code>.
- </p>
- <pre class="eg">
- void XMLCALL
- end(void *data, const char *el) {
- Depth--;
- } /* End of end handler */
- </pre>
- <p>
- Note the <code>XMLCALL</code> annotation used for the callbacks. This is used to
- ensure that the Expat and the callbacks are using the same calling convention in
- case the compiler options used for Expat itself and the client code are
- different. Expat tries not to care what the default calling convention is, though
- it may require that it be compiled with a default convention of "cdecl" on some
- platforms. For code which uses Expat, however, the calling convention is
- specified by the <code>XMLCALL</code> annotation on most platforms; callbacks
- should be defined using this annotation.
- </p>
- <p>
- The <code>XMLCALL</code> annotation was added in Expat 1.95.7, but existing
- working Expat applications don't need to add it (since they are already using the
- "cdecl" calling convention, or they wouldn't be working). The annotation is only
- needed if the default calling convention may be something other than "cdecl". To
- use the annotation safely with older versions of Expat, you can conditionally
- define it <em>after</em> including Expat's header file:
- </p>
- <pre class="eg">
- #include <expat.h>
- #ifndef XMLCALL
- #if defined(_MSC_VER) && !defined(__BEOS__) && !defined(__CYGWIN__)
- #define XMLCALL __cdecl
- #elif defined(__GNUC__)
- #define XMLCALL __attribute__((cdecl))
- #else
- #define XMLCALL
- #endif
- #endif
- </pre>
- <p>
- After creating the parser, the main program just has the job of shoveling the
- document to the parser so that it can do its work.
- </p>
- <hr />
- <h2>
- <a id="building" name="building">Building and Installing Expat</a>
- </h2>
- <p>
- The Expat distribution comes as a compressed (with GNU gzip) tar file. You may
- download the latest version from <a href=
- "https://sourceforge.net/projects/expat/">Source Forge</a>. After unpacking this,
- cd into the directory. Then follow either the Win32 directions or Unix directions
- below.
- </p>
- <h3>
- Building under Win32
- </h3>
- <p>
- If you're using the GNU compiler under cygwin, follow the Unix directions in the
- next section. Otherwise if you have Microsoft's Developer Studio installed, you
- can use CMake to generate a <code>.sln</code> file, e.g. <code>cmake -G"Visual
- Studio 17 2022" -DCMAKE_BUILD_TYPE=RelWithDebInfo .</code> , and build Expat
- using <code>msbuild /m expat.sln</code> after.
- </p>
- <p>
- Alternatively, you may download the Win32 binary package that contains the
- "expat.h" include file and a pre-built DLL.
- </p>
- <h3>
- Building under Unix (or GNU)
- </h3>
- <p>
- First you'll need to run the configure shell script in order to configure the
- Makefiles and headers for your system.
- </p>
- <p>
- If you're happy with all the defaults that configure picks for you, and you have
- permission on your system to install into /usr/local, you can install Expat with
- this sequence of commands:
- </p>
- <pre class="eg">
- ./configure
- make
- make install
- </pre>
- <p>
- There are some options that you can provide to this script, but the only one
- we'll mention here is the <code>--prefix</code> option. You can find out all the
- options available by running configure with just the <code>--help</code> option.
- </p>
- <p>
- By default, the configure script sets things up so that the library gets
- installed in <code>/usr/local/lib</code> and the associated header file in
- <code>/usr/local/include</code>. But if you were to give the option,
- <code>--prefix=/home/me/mystuff</code>, then the library and header would get
- installed in <code>/home/me/mystuff/lib</code> and
- <code>/home/me/mystuff/include</code> respectively.
- </p>
- <h3>
- Configuring Expat Using the Pre-Processor
- </h3>
- <p>
- Expat's feature set can be configured using a small number of pre-processor
- definitions. The symbols are:
- </p>
- <dl class="cpp-symbols">
- <dt>
- <a id="XML_GE" name="XML_GE">XML_GE</a>
- </dt>
- <dd>
- Added in Expat 2.6.0. Include support for <a href=
- "https://www.w3.org/TR/2006/REC-xml-20060816/#sec-physical-struct">general
- entities</a> (syntax <code>&e1;</code> to reference and syntax
- <code><!ENTITY e1 'value1'></code> (an internal general entity) or
- <code><!ENTITY e2 SYSTEM 'file2'></code> (an external general entity) to
- declare). With <code>XML_GE</code> enabled, general entities will be replaced
- by their declared replacement text; for this to work for <em>external</em>
- general entities, in addition an <code><a href=
- "#XML_SetExternalEntityRefHandler">XML_ExternalEntityRefHandler</a></code> must
- be set using <code><a href=
- "#XML_SetExternalEntityRefHandler">XML_SetExternalEntityRefHandler</a></code>.
- Also, enabling <code>XML_GE</code> makes the functions <code><a href=
- "#XML_SetBillionLaughsAttackProtectionMaximumAmplification">XML_SetBillionLaughsAttackProtectionMaximumAmplification</a></code>
- and <code><a href=
- "#XML_SetBillionLaughsAttackProtectionActivationThreshold">XML_SetBillionLaughsAttackProtectionActivationThreshold</a></code>
- available.<br />
- With <code>XML_GE</code> disabled, Expat has a smaller memory footprint and can
- be faster, but will not load external general entities and will replace all
- general entities (except the <a href=
- "https://www.w3.org/TR/2006/REC-xml-20060816/#sec-predefined-ent">predefined
- five</a>: <code>amp</code>, <code>apos</code>, <code>gt</code>,
- <code>lt</code>, <code>quot</code>) with a self-reference: for example,
- referencing an entity <code>e1</code> via <code>&e1;</code> will be
- replaced by text <code>&e1;</code>.
- </dd>
- <dt>
- <a id="XML_DTD" name="XML_DTD">XML_DTD</a>
- </dt>
- <dd>
- Include support for using and reporting DTD-based content. If this is defined,
- default attribute values from an external DTD subset are reported and attribute
- value normalization occurs based on the type of attributes defined in the
- external subset. Without this, Expat has a smaller memory footprint and can be
- faster, but will not load external parameter entities or process conditional
- sections. If defined, makes the functions <code><a href=
- "#XML_SetBillionLaughsAttackProtectionMaximumAmplification">XML_SetBillionLaughsAttackProtectionMaximumAmplification</a></code>
- and <code><a href=
- "#XML_SetBillionLaughsAttackProtectionActivationThreshold">XML_SetBillionLaughsAttackProtectionActivationThreshold</a></code>
- available.
- </dd>
- <dt>
- <a id="XML_NS" name="XML_NS">XML_NS</a>
- </dt>
- <dd>
- When defined, support for the <cite><a href=
- "https://www.w3.org/TR/REC-xml-names/">Namespaces in XML</a></cite>
- specification is included.
- </dd>
- <dt>
- <a id="XML_UNICODE" name="XML_UNICODE">XML_UNICODE</a>
- </dt>
- <dd>
- When defined, character data reported to the application is encoded in UTF-16
- using wide characters of the type <code>XML_Char</code>. This is implied if
- <code>XML_UNICODE_WCHAR_T</code> is defined.
- </dd>
- <dt>
- <a id="XML_UNICODE_WCHAR_T" name="XML_UNICODE_WCHAR_T">XML_UNICODE_WCHAR_T</a>
- </dt>
- <dd>
- If defined, causes the <code>XML_Char</code> character type to be defined using
- the <code>wchar_t</code> type; otherwise, <code>unsigned short</code> is used.
- Defining this implies <code>XML_UNICODE</code>.
- </dd>
- <dt>
- <a id="XML_LARGE_SIZE" name="XML_LARGE_SIZE">XML_LARGE_SIZE</a>
- </dt>
- <dd>
- If defined, causes the <code>XML_Size</code> and <code>XML_Index</code> integer
- types to be at least 64 bits in size. This is intended to support processing of
- very large input streams, where the return values of <code><a href=
- "#XML_GetCurrentByteIndex">XML_GetCurrentByteIndex</a></code>, <code><a href=
- "#XML_GetCurrentLineNumber">XML_GetCurrentLineNumber</a></code> and
- <code><a href=
- "#XML_GetCurrentColumnNumber">XML_GetCurrentColumnNumber</a></code> could
- overflow. It may not be supported by all compilers, and is turned off by
- default.
- </dd>
- <dt>
- <a id="XML_CONTEXT_BYTES" name="XML_CONTEXT_BYTES">XML_CONTEXT_BYTES</a>
- </dt>
- <dd>
- The number of input bytes of markup context which the parser will ensure are
- available for reporting via <code><a href=
- "#XML_GetInputContext">XML_GetInputContext</a></code>. This is normally set to
- 1024, and must be set to a positive integer to enable. If this is set to zero,
- the input context will not be available and <code><a href=
- "#XML_GetInputContext">XML_GetInputContext</a></code> will always report
- <code>NULL</code>. Without this, Expat has a smaller memory footprint and can
- be faster.
- </dd>
- <dt>
- <a id="XML_STATIC" name="XML_STATIC">XML_STATIC</a>
- </dt>
- <dd>
- On Windows, this should be set if Expat is going to be linked statically with
- the code that calls it; this is required to get all the right MSVC magic
- annotations correct. This is ignored on other platforms.
- </dd>
- <dt>
- <a id="XML_ATTR_INFO" name="XML_ATTR_INFO">XML_ATTR_INFO</a>
- </dt>
- <dd>
- If defined, makes the additional function <code><a href=
- "#XML_GetAttributeInfo">XML_GetAttributeInfo</a></code> available for reporting
- attribute byte offsets.
- </dd>
- </dl>
- <hr />
- <h2>
- <a id="using" name="using">Using Expat</a>
- </h2>
- <h3>
- Compiling and Linking Against Expat
- </h3>
- <p>
- Unless you installed Expat in a location not expected by your compiler and
- linker, all you have to do to use Expat in your programs is to include the Expat
- header (<code>#include <expat.h></code>) in your files that make calls to
- it and to tell the linker that it needs to link against the Expat library. On
- Unix systems, this would usually be done with the <code>-lexpat</code> argument.
- Otherwise, you'll need to tell the compiler where to look for the Expat header
- and the linker where to find the Expat library. You may also need to take steps
- to tell the operating system where to find this library at run time.
- </p>
- <p>
- On a Unix-based system, here's what a Makefile might look like when Expat is
- installed in a standard location:
- </p>
- <pre class="eg">
- CC=cc
- LDFLAGS=
- LIBS= -lexpat
- xmlapp: xmlapp.o
- $(CC) $(LDFLAGS) -o xmlapp xmlapp.o $(LIBS)
- </pre>
- <p>
- If you installed Expat in, say, <code>/home/me/mystuff</code>, then the Makefile
- would look like this:
- </p>
- <pre class="eg">
- CC=cc
- CFLAGS= -I/home/me/mystuff/include
- LDFLAGS=
- LIBS= -L/home/me/mystuff/lib -lexpat
- xmlapp: xmlapp.o
- $(CC) $(LDFLAGS) -o xmlapp xmlapp.o $(LIBS)
- </pre>
- <p>
- You'd also have to set the environment variable <code>LD_LIBRARY_PATH</code> to
- <code>/home/me/mystuff/lib</code> (or to
- <code>${LD_LIBRARY_PATH}:/home/me/mystuff/lib</code> if LD_LIBRARY_PATH already
- has some directories in it) in order to run your application.
- </p>
- <h3>
- Expat Basics
- </h3>
- <p>
- As we saw in the example in the overview, the first step in parsing an XML
- document with Expat is to create a parser object. There are <a href=
- "#creation">three functions</a> in the Expat API for creating a parser object.
- However, only two of these (<code><a href=
- "#XML_ParserCreate">XML_ParserCreate</a></code> and <code><a href=
- "#XML_ParserCreateNS">XML_ParserCreateNS</a></code>) can be used for constructing
- a parser for a top-level document. The object returned by these functions is an
- opaque pointer (i.e. "expat.h" declares it as void *) to data with further
- internal structure. In order to free the memory associated with this object you
- must call <code><a href="#XML_ParserFree">XML_ParserFree</a></code>. Note that if
- you have provided any <a href="#userdata">user data</a> that gets stored in the
- parser, then your application is responsible for freeing it prior to calling
- <code>XML_ParserFree</code>.
- </p>
- <p>
- The objects returned by the parser creation functions are good for parsing only
- one XML document or external parsed entity. If your application needs to parse
- many XML documents, then it needs to create a parser object for each one. The
- best way to deal with this is to create a higher level object that contains all
- the default initialization you want for your parser objects.
- </p>
- <p>
- Walking through a document hierarchy with a stream oriented parser will require a
- good stack mechanism in order to keep track of current context. For instance, to
- answer the simple question, "What element does this text belong to?" requires a
- stack, since the parser may have descended into other elements that are children
- of the current one and has encountered this text on the way out.
- </p>
- <p>
- The things you're likely to want to keep on a stack are the currently opened
- element and it's attributes. You push this information onto the stack in the
- start handler and you pop it off in the end handler.
- </p>
- <p>
- For some tasks, it is sufficient to just keep information on what the depth of
- the stack is (or would be if you had one.) The outline program shown above
- presents one example. Another such task would be skipping over a complete
- element. When you see the start tag for the element you want to skip, you set a
- skip flag and record the depth at which the element started. When the end tag
- handler encounters the same depth, the skipped element has ended and the flag may
- be cleared. If you follow the convention that the root element starts at 1, then
- you can use the same variable for skip flag and skip depth.
- </p>
- <pre class="eg">
- void
- init_info(Parseinfo *info) {
- info->skip = 0;
- info->depth = 1;
- /* Other initializations here */
- } /* End of init_info */
- void XMLCALL
- rawstart(void *data, const char *el, const char **attr) {
- Parseinfo *inf = (Parseinfo *) data;
- if (! inf->skip) {
- if (should_skip(inf, el, attr)) {
- inf->skip = inf->depth;
- }
- else
- start(inf, el, attr); /* This does rest of start handling */
- }
- inf->depth++;
- } /* End of rawstart */
- void XMLCALL
- rawend(void *data, const char *el) {
- Parseinfo *inf = (Parseinfo *) data;
- inf->depth--;
- if (! inf->skip)
- end(inf, el); /* This does rest of end handling */
- if (inf->skip == inf->depth)
- inf->skip = 0;
- } /* End rawend */
- </pre>
- <p>
- Notice in the above example the difference in how depth is manipulated in the
- start and end handlers. The end tag handler should be the mirror image of the
- start tag handler. This is necessary to properly model containment. Since, in the
- start tag handler, we incremented depth <em>after</em> the main body of start tag
- code, then in the end handler, we need to manipulate it <em>before</em> the main
- body. If we'd decided to increment it first thing in the start handler, then we'd
- have had to decrement it last thing in the end handler.
- </p>
- <h3 id="userdata">
- Communicating between handlers
- </h3>
- <p>
- In order to be able to pass information between different handlers without using
- globals, you'll need to define a data structure to hold the shared variables. You
- can then tell Expat (with the <code><a href=
- "#XML_SetUserData">XML_SetUserData</a></code> function) to pass a pointer to this
- structure to the handlers. This is the first argument received by most handlers.
- In the <a href="#reference">reference section</a>, an argument to a callback
- function is named <code>userData</code> and have type <code>void *</code> if the
- user data is passed; it will have the type <code>XML_Parser</code> if the parser
- itself is passed. When the parser is passed, the user data may be retrieved using
- <code><a href="#XML_GetUserData">XML_GetUserData</a></code>.
- </p>
- <p>
- One common case where multiple calls to a single handler may need to communicate
- using an application data structure is the case when content passed to the
- character data handler (set by <code><a href=
- "#XML_SetCharacterDataHandler">XML_SetCharacterDataHandler</a></code>) needs to
- be accumulated. A common first-time mistake with any of the event-oriented
- interfaces to an XML parser is to expect all the text contained in an element to
- be reported by a single call to the character data handler. Expat, like many
- other XML parsers, reports such data as a sequence of calls; there's no way to
- know when the end of the sequence is reached until a different callback is made.
- A buffer referenced by the user data structure proves both an effective and
- convenient place to accumulate character data.
- </p>
- <!-- XXX example needed here -->
- <h3>
- XML Version
- </h3>
- <p>
- Expat is an XML 1.0 parser, and as such never complains based on the value of the
- <code>version</code> pseudo-attribute in the XML declaration, if present.
- </p>
- <p>
- If an application needs to check the version number (to support alternate
- processing), it should use the <code><a href=
- "#XML_SetXmlDeclHandler">XML_SetXmlDeclHandler</a></code> function to set a
- handler that uses the information in the XML declaration to determine what to do.
- This example shows how to check that only a version number of <code>"1.0"</code>
- is accepted:
- </p>
- <pre class="eg">
- static int wrong_version;
- static XML_Parser parser;
- static void XMLCALL
- xmldecl_handler(void *userData,
- const XML_Char *version,
- const XML_Char *encoding,
- int standalone)
- {
- static const XML_Char Version_1_0[] = {'1', '.', '0', 0};
- int i;
- for (i = 0; i < (sizeof(Version_1_0) / sizeof(Version_1_0[0])); ++i) {
- if (version[i] != Version_1_0[i]) {
- wrong_version = 1;
- /* also clear all other handlers: */
- XML_SetCharacterDataHandler(parser, NULL);
- ...
- return;
- }
- }
- ...
- }
- </pre>
- <h3>
- Namespace Processing
- </h3>
- <p>
- When the parser is created using the <code><a href=
- "#XML_ParserCreateNS">XML_ParserCreateNS</a></code>, function, Expat performs
- namespace processing. Under namespace processing, Expat consumes
- <code>xmlns</code> and <code>xmlns:...</code> attributes, which declare
- namespaces for the scope of the element in which they occur. This means that your
- start handler will not see these attributes. Your application can still be
- informed of these declarations by setting namespace declaration handlers with
- <a href=
- "#XML_SetNamespaceDeclHandler"><code>XML_SetNamespaceDeclHandler</code></a>.
- </p>
- <p>
- Element type and attribute names that belong to a given namespace are passed to
- the appropriate handler in expanded form. By default this expanded form is a
- concatenation of the namespace URI, the separator character (which is the 2nd
- argument to <code><a href="#XML_ParserCreateNS">XML_ParserCreateNS</a></code>),
- and the local name (i.e. the part after the colon). Names with undeclared
- prefixes are not well-formed when namespace processing is enabled, and will
- trigger an error. Unprefixed attribute names are never expanded, and unprefixed
- element names are only expanded when they are in the scope of a default
- namespace.
- </p>
- <p>
- However if <code><a href=
- "#XML_SetReturnNSTriplet">XML_SetReturnNSTriplet</a></code> has been called with
- a non-zero <code>do_nst</code> parameter, then the expanded form for names with
- an explicit prefix is a concatenation of: URI, separator, local name, separator,
- prefix.
- </p>
- <p>
- You can set handlers for the start of a namespace declaration and for the end of
- a scope of a declaration with the <code><a href=
- "#XML_SetNamespaceDeclHandler">XML_SetNamespaceDeclHandler</a></code> function.
- The StartNamespaceDeclHandler is called prior to the start tag handler and the
- EndNamespaceDeclHandler is called after the corresponding end tag that ends the
- namespace's scope. The namespace start handler gets passed the prefix and URI for
- the namespace. For a default namespace declaration (xmlns='...'), the prefix will
- be <code>NULL</code>. The URI will be <code>NULL</code> for the case where the
- default namespace is being unset. The namespace end handler just gets the prefix
- for the closing scope.
- </p>
- <p>
- These handlers are called for each declaration. So if, for instance, a start tag
- had three namespace declarations, then the StartNamespaceDeclHandler would be
- called three times before the start tag handler is called, once for each
- declaration.
- </p>
- <h3>
- Character Encodings
- </h3>
- <p>
- While XML is based on Unicode, and every XML processor is required to recognized
- UTF-8 and UTF-16 (1 and 2 byte encodings of Unicode), other encodings may be
- declared in XML documents or entities. For the main document, an XML declaration
- may contain an encoding declaration:
- </p>
- <pre>
- <?xml version="1.0" encoding="ISO-8859-2"?>
- </pre>
- <p>
- External parsed entities may begin with a text declaration, which looks like an
- XML declaration with just an encoding declaration:
- </p>
- <pre>
- <?xml encoding="Big5"?>
- </pre>
- <p>
- With Expat, you may also specify an encoding at the time of creating a parser.
- This is useful when the encoding information may come from a source outside the
- document itself (like a higher level protocol.)
- </p>
- <p>
- <a id="builtin_encodings" name="builtin_encodings"></a>There are four built-in
- encodings in Expat:
- </p>
- <ul>
- <li>UTF-8
- </li>
- <li>UTF-16
- </li>
- <li>ISO-8859-1
- </li>
- <li>US-ASCII
- </li>
- </ul>
- <p>
- Anything else discovered in an encoding declaration or in the protocol encoding
- specified in the parser constructor, triggers a call to the
- <code>UnknownEncodingHandler</code>. This handler gets passed the encoding name
- and a pointer to an <code>XML_Encoding</code> data structure. Your handler must
- fill in this structure and return <code>XML_STATUS_OK</code> if it knows how to
- deal with the encoding. Otherwise the handler should return
- <code>XML_STATUS_ERROR</code>. The handler also gets passed a pointer to an
- optional application data structure that you may indicate when you set the
- handler.
- </p>
- <p>
- Expat places restrictions on character encodings that it can support by filling
- in the <code>XML_Encoding</code> structure. include file:
- </p>
- <ol>
- <li>Every ASCII character that can appear in a well-formed XML document must be
- represented by a single byte, and that byte must correspond to it's ASCII
- encoding (except for the characters $@\^'{}~)
- </li>
- <li>Characters must be encoded in 4 bytes or less.
- </li>
- <li>All characters encoded must have Unicode scalar values less than or equal to
- 65535 (0xFFFF)<em>This does not apply to the built-in support for UTF-16 and
- UTF-8</em>
- </li>
- <li>No character may be encoded by more that one distinct sequence of bytes
- </li>
- </ol>
- <p>
- <code>XML_Encoding</code> contains an array of integers that correspond to the
- 1st byte of an encoding sequence. If the value in the array for a byte is zero or
- positive, then the byte is a single byte encoding that encodes the Unicode scalar
- value contained in the array. A -1 in this array indicates a malformed byte. If
- the value is -2, -3, or -4, then the byte is the beginning of a 2, 3, or 4 byte
- sequence respectively. Multi-byte sequences are sent to the convert function
- pointed at in the <code>XML_Encoding</code> structure. This function should
- return the Unicode scalar value for the sequence or -1 if the sequence is
- malformed.
- </p>
- <p>
- One pitfall that novice Expat users are likely to fall into is that although
- Expat may accept input in various encodings, the strings that it passes to the
- handlers are always encoded in UTF-8 or UTF-16 (depending on how Expat was
- compiled). Your application is responsible for any translation of these strings
- into other encodings.
- </p>
- <h3>
- Handling External Entity References
- </h3>
- <p>
- Expat does not read or parse external entities directly. Note that any external
- DTD is a special case of an external entity. If you've set no
- <code>ExternalEntityRefHandler</code>, then external entity references are
- silently ignored. Otherwise, it calls your handler with the information needed to
- read and parse the external entity.
- </p>
- <p>
- Your handler isn't actually responsible for parsing the entity, but it is
- responsible for creating a subsidiary parser with <code><a href=
- "#XML_ExternalEntityParserCreate">XML_ExternalEntityParserCreate</a></code> that
- will do the job. This returns an instance of <code>XML_Parser</code> that has
- handlers and other data structures initialized from the parent parser. You may
- then use <code><a href="#XML_Parse">XML_Parse</a></code> or <code><a href=
- "#XML_ParseBuffer">XML_ParseBuffer</a></code> calls against this parser. Since
- external entities my refer to other external entities, your handler should be
- prepared to be called recursively.
- </p>
- <h3>
- Parsing DTDs
- </h3>
- <p>
- In order to parse parameter entities, before starting the parse, you must call
- <code><a href="#XML_SetParamEntityParsing">XML_SetParamEntityParsing</a></code>
- with one of the following arguments:
- </p>
- <dl>
- <dt>
- <code>XML_PARAM_ENTITY_PARSING_NEVER</code>
- </dt>
- <dd>
- Don't parse parameter entities or the external subset
- </dd>
- <dt>
- <code>XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE</code>
- </dt>
- <dd>
- Parse parameter entities and the external subset unless <code>standalone</code>
- was set to "yes" in the XML declaration.
- </dd>
- <dt>
- <code>XML_PARAM_ENTITY_PARSING_ALWAYS</code>
- </dt>
- <dd>
- Always parse parameter entities and the external subset
- </dd>
- </dl>
- <p>
- In order to read an external DTD, you also have to set an external entity
- reference handler as described above.
- </p>
- <h3 id="stop-resume">
- Temporarily Stopping Parsing
- </h3>
- <p>
- Expat 1.95.8 introduces a new feature: its now possible to stop parsing
- temporarily from within a handler function, even if more data has already been
- passed into the parser. Applications for this include
- </p>
- <ul>
- <li>Supporting the <a href="https://www.w3.org/TR/xinclude/">XInclude</a>
- specification.
- </li>
- <li>Delaying further processing until additional information is available from
- some other source.
- </li>
- <li>Adjusting processor load as task priorities shift within an application.
- </li>
- <li>Stopping parsing completely (simply free or reset the parser instead of
- resuming in the outer parsing loop). This can be useful if an application-domain
- error is found in the XML being parsed or if the result of the parse is
- determined not to be useful after all.
- </li>
- </ul>
- <p>
- To take advantage of this feature, the main parsing loop of an application needs
- to support this specifically. It cannot be supported with a parsing loop
- compatible with Expat 1.95.7 or earlier (though existing loops will continue to
- work without supporting the stop/resume feature).
- </p>
- <p>
- An application that uses this feature for a single parser will have the rough
- structure (in pseudo-code):
- </p>
- <pre class="pseudocode">
- fd = open_input()
- p = create_parser()
- if parse_xml(p, fd) {
- /* suspended */
- int suspended = 1;
- while (suspended) {
- do_something_else()
- if ready_to_resume() {
- suspended = continue_parsing(p, fd);
- }
- }
- }
- </pre>
- <p>
- An application that may resume any of several parsers based on input (either from
- the XML being parsed or some other source) will certainly have more interesting
- control structures.
- </p>
- <p>
- This C function could be used for the <code>parse_xml</code> function mentioned
- in the pseudo-code above:
- </p>
- <pre class="eg">
- #define BUFF_SIZE 10240
- /* Parse a document from the open file descriptor 'fd' until the parse
- is complete (the document has been completely parsed, or there's
- been an error), or the parse is stopped. Return non-zero when
- the parse is merely suspended.
- */
- int
- parse_xml(XML_Parser p, int fd)
- {
- for (;;) {
- int last_chunk;
- int bytes_read;
- enum XML_Status status;
- void *buff = XML_GetBuffer(p, BUFF_SIZE);
- if (buff == NULL) {
- /* handle error... */
- return 0;
- }
- bytes_read = read(fd, buff, BUFF_SIZE);
- if (bytes_read < 0) {
- /* handle error... */
- return 0;
- }
- status = XML_ParseBuffer(p, bytes_read, bytes_read == 0);
- switch (status) {
- case XML_STATUS_ERROR:
- /* handle error... */
- return 0;
- case XML_STATUS_SUSPENDED:
- return 1;
- }
- if (bytes_read == 0)
- return 0;
- }
- }
- </pre>
- <p>
- The corresponding <code>continue_parsing</code> function is somewhat simpler,
- since it only need deal with the return code from <code><a href=
- "#XML_ResumeParser">XML_ResumeParser</a></code>; it can delegate the input
- handling to the <code>parse_xml</code> function:
- </p>
- <pre class="eg">
- /* Continue parsing a document which had been suspended. The 'p' and
- 'fd' arguments are the same as passed to parse_xml(). Return
- non-zero when the parse is suspended.
- */
- int
- continue_parsing(XML_Parser p, int fd)
- {
- enum XML_Status status = XML_ResumeParser(p);
- switch (status) {
- case XML_STATUS_ERROR:
- /* handle error... */
- return 0;
- case XML_ERROR_NOT_SUSPENDED:
- /* handle error... */
- return 0;.
- case XML_STATUS_SUSPENDED:
- return 1;
- }
- return parse_xml(p, fd);
- }
- </pre>
- <p>
- Now that we've seen what a mess the top-level parsing loop can become, what have
- we gained? Very simply, we can now use the <code><a href=
- "#XML_StopParser">XML_StopParser</a></code> function to stop parsing, without
- having to go to great lengths to avoid additional processing that we're expecting
- to ignore. As a bonus, we get to stop parsing <em>temporarily</em>, and come back
- to it when we're ready.
- </p>
- <p>
- To stop parsing from a handler function, use the <code><a href=
- "#XML_StopParser">XML_StopParser</a></code> function. This function takes two
- arguments; the parser being stopped and a flag indicating whether the parse can
- be resumed in the future.
- </p>
- <!-- XXX really need more here -->
- <hr />
- <!-- ================================================================ -->
- <h2>
- <a id="reference" name="reference">Expat Reference</a>
- </h2>
- <h3>
- <a id="creation" name="creation">Parser Creation</a>
- </h3>
- <h4 id="XML_ParserCreate">
- XML_ParserCreate
- </h4>
- <pre class="fcndec">
- XML_Parser XMLCALL
- XML_ParserCreate(const XML_Char *encoding);
- </pre>
- <div class="fcndef">
- <p>
- Construct a new parser. If encoding is non-<code>NULL</code>, it specifies a
- character encoding to use for the document. This overrides the document
- encoding declaration. There are four built-in encodings:
- </p>
- <ul>
- <li>US-ASCII
- </li>
- <li>UTF-8
- </li>
- <li>UTF-16
- </li>
- <li>ISO-8859-1
- </li>
- </ul>
- <p>
- Any other value will invoke a call to the UnknownEncodingHandler.
- </p>
- </div>
- <h4 id="XML_ParserCreateNS">
- XML_ParserCreateNS
- </h4>
- <pre class="fcndec">
- XML_Parser XMLCALL
- XML_ParserCreateNS(const XML_Char *encoding,
- XML_Char sep);
- </pre>
- <div class="fcndef">
- Constructs a new parser that has namespace processing in effect. Namespace
- expanded element names and attribute names are returned as a concatenation of the
- namespace URI, <em>sep</em>, and the local part of the name. This means that you
- should pick a character for <em>sep</em> that can't be part of an URI. Since
- Expat does not check namespace URIs for conformance, the only safe choice for a
- namespace separator is a character that is illegal in XML. For instance,
- <code>'\xFF'</code> is not legal in UTF-8, and <code>'\xFFFF'</code> is not legal
- in UTF-16. There is a special case when <em>sep</em> is the null character
- <code>'\0'</code>: the namespace URI and the local part will be concatenated
- without any separator - this is intended to support RDF processors. It is a
- programming error to use the null separator with <a href=
- "#XML_SetReturnNSTriplet">namespace triplets</a>.
- </div>
- <p>
- <strong>Note:</strong> Expat does not validate namespace URIs (beyond encoding)
- against RFC 3986 today (and is not required to do so with regard to the XML 1.0
- namespaces specification) but it may start doing that in future releases. Before
- that, an application using Expat must be ready to receive namespace URIs
- containing non-URI characters.
- </p>
- <h4 id="XML_ParserCreate_MM">
- XML_ParserCreate_MM
- </h4>
- <pre class="fcndec">
- XML_Parser XMLCALL
- XML_ParserCreate_MM(const XML_Char *encoding,
- const XML_Memory_Handling_Suite *ms,
- const XML_Char *sep);
- </pre>
- <pre class="signature">
- typedef struct {
- void *(XMLCALL *malloc_fcn)(size_t size);
- void *(XMLCALL *realloc_fcn)(void *ptr, size_t size);
- void (XMLCALL *free_fcn)(void *ptr);
- } XML_Memory_Handling_Suite;
- </pre>
- <div class="fcndef">
- <p>
- Construct a new parser using the suite of memory handling functions specified
- in <code>ms</code>. If <code>ms</code> is <code>NULL</code>, then use the
- standard set of memory management functions. If <code>sep</code> is
- non-<code>NULL</code>, then namespace processing is enabled in the created
- parser and the character pointed at by sep is used as the separator between the
- namespace URI and the local part of the name.
- </p>
- </div>
- <h4 id="XML_ExternalEntityParserCreate">
- XML_ExternalEntityParserCreate
- </h4>
- <pre class="fcndec">
- XML_Parser XMLCALL
- XML_ExternalEntityParserCreate(XML_Parser p,
- const XML_Char *context,
- const XML_Char *encoding);
- </pre>
- <div class="fcndef">
- <p>
- Construct a new <code>XML_Parser</code> object for parsing an external general
- entity. Context is the context argument passed in a call to a
- ExternalEntityRefHandler. Other state information such as handlers, user data,
- namespace processing is inherited from the parser passed as the 1st argument.
- So you shouldn't need to call any of the behavior changing functions on this
- parser (unless you want it to act differently than the parent parser).
- </p>
- <p>
- <strong>Note:</strong> Please be sure to free subparsers created by
- <code><a href=
- "#XML_ExternalEntityParserCreate">XML_ExternalEntityParserCreate</a></code>
- <em>prior to</em> freeing their related parent parser, as subparsers reference
- and use parts of their respective parent parser, internally. Parent parsers
- must outlive subparsers.
- </p>
- </div>
- <h4 id="XML_ParserFree">
- XML_ParserFree
- </h4>
- <pre class="fcndec">
- void XMLCALL
- XML_ParserFree(XML_Parser p);
- </pre>
- <div class="fcndef">
- <p>
- Free memory used by the parser.
- </p>
- <p>
- <strong>Note:</strong> Your application is responsible for freeing any memory
- associated with <a href="#userdata">user data</a>.
- </p>
- <p>
- <strong>Note:</strong> Please be sure to free subparsers created by
- <code><a href=
- "#XML_ExternalEntityParserCreate">XML_ExternalEntityParserCreate</a></code>
- <em>prior to</em> freeing their related parent parser, as subparsers reference
- and use parts of their respective parent parser, internally. Parent parsers
- must outlive subparsers.
- </p>
- </div>
- <h4 id="XML_ParserReset">
- XML_ParserReset
- </h4>
- <pre class="fcndec">
- XML_Bool XMLCALL
- XML_ParserReset(XML_Parser p,
- const XML_Char *encoding);
- </pre>
- <div class="fcndef">
- Clean up the memory structures maintained by the parser so that it may be used
- again. After this has been called, <code>parser</code> is ready to start parsing
- a new document. All handlers are cleared from the parser, except for the
- unknownEncodingHandler. The parser's external state is re-initialized except for
- the values of ns and ns_triplets. This function may not be used on a parser
- created using <code><a href=
- "#XML_ExternalEntityParserCreate">XML_ExternalEntityParserCreate</a></code>; it
- will return <code>XML_FALSE</code> in that case. Returns <code>XML_TRUE</code> on
- success. Your application is responsible for dealing with any memory associated
- with <a href="#userdata">user data</a>.
- </div>
- <h3>
- <a id="parsing" name="parsing">Parsing</a>
- </h3>
- <p>
- To state the obvious: the three parsing functions <code><a href=
- "#XML_Parse">XML_Parse</a></code>, <code><a href=
- "#XML_ParseBuffer">XML_ParseBuffer</a></code> and <code><a href=
- "#XML_GetBuffer">XML_GetBuffer</a></code> must not be called from within a
- handler unless they operate on a separate parser instance, that is, one that did
- not call the handler. For example, it is OK to call the parsing functions from
- within an <code>XML_ExternalEntityRefHandler</code>, if they apply to the parser
- created by <code><a href=
- "#XML_ExternalEntityParserCreate">XML_ExternalEntityParserCreate</a></code>.
- </p>
- <p>
- Note: The <code>len</code> argument passed to these functions should be
- considerably less than the maximum value for an integer, as it could create an
- integer overflow situation if the added lengths of a buffer and the unprocessed
- portion of the previous buffer exceed the maximum integer value. Input data at
- the end of a buffer will remain unprocessed if it is part of an XML token for
- which the end is not part of that buffer.
- </p>
- <p>
- <a id="isFinal" name="isFinal"></a>The application <em>must</em> make a
- concluding <code><a href="#XML_Parse">XML_Parse</a></code> or <code><a href=
- "#XML_ParseBuffer">XML_ParseBuffer</a></code> call with <code>isFinal</code> set
- to <code>XML_TRUE</code>.
- </p>
- <h4 id="XML_Parse">
- XML_Parse
- </h4>
- <pre class="fcndec">
- enum XML_Status XMLCALL
- XML_Parse(XML_Parser p,
- const char *s,
- int len,
- int isFinal);
- </pre>
- <pre class="signature">
- enum XML_Status {
- XML_STATUS_ERROR = 0,
- XML_STATUS_OK = 1
- };
- </pre>
- <div class="fcndef">
- <p>
- Parse some more of the document. The string <code>s</code> is a buffer
- containing part (or perhaps all) of the document. The number of bytes of s that
- are part of the document is indicated by <code>len</code>. This means that
- <code>s</code> doesn't have to be null-terminated. It also means that if
- <code>len</code> is larger than the number of bytes in the block of memory that
- <code>s</code> points at, then a memory fault is likely. Negative values for
- <code>len</code> are rejected since Expat 2.2.1. The <code>isFinal</code>
- parameter informs the parser that this is the last piece of the document.
- Frequently, the last piece is empty (i.e. <code>len</code> is zero.)
- </p>
- <p>
- If a parse error occurred, it returns <code>XML_STATUS_ERROR</code>. Otherwise
- it returns <code>XML_STATUS_OK</code> value. Note that regardless of the return
- value, there is no guarantee that all provided input has been parsed; only
- after <a href="#isFinal">the concluding call</a> will all handler callbacks and
- parsing errors have happened.
- </p>
- <p>
- Simplified, <code>XML_Parse</code> can be considered a convenience wrapper that
- is pairing calls to <code><a href="#XML_GetBuffer">XML_GetBuffer</a></code> and
- <code><a href="#XML_ParseBuffer">XML_ParseBuffer</a></code> (when Expat is
- built with macro <code>XML_CONTEXT_BYTES</code> defined to a positive value,
- which is both common and default). <code>XML_Parse</code> is then functionally
- equivalent to calling <code><a href="#XML_GetBuffer">XML_GetBuffer</a></code>,
- <code>memcpy</code>, and <code><a href=
- "#XML_ParseBuffer">XML_ParseBuffer</a></code>.
- </p>
- <p>
- To avoid double copying of the input, direct use of functions <code><a href=
- "#XML_GetBuffer">XML_GetBuffer</a></code> and <code><a href=
- "#XML_ParseBuffer">XML_ParseBuffer</a></code> is advised for most production
- use, e.g. if you're using <code>read</code> or similar functionality to fill
- your buffers, fill directly into the buffer from <code><a href=
- "#XML_GetBuffer">XML_GetBuffer</a></code>, then parse with <code><a href=
- "#XML_ParseBuffer">XML_ParseBuffer</a></code>.
- </p>
- </div>
- <h4 id="XML_ParseBuffer">
- XML_ParseBuffer
- </h4>
- <pre class="fcndec">
- enum XML_Status XMLCALL
- XML_ParseBuffer(XML_Parser p,
- int len,
- int isFinal);
- </pre>
- <div class="fcndef">
- <p>
- This is just like <code><a href="#XML_Parse">XML_Parse</a></code>, except in
- this case Expat provides the buffer. By obtaining the buffer from Expat with
- the <code><a href="#XML_GetBuffer">XML_GetBuffer</a></code> function, the
- application can avoid double copying of the input.
- </p>
- <p>
- Negative values for <code>len</code> are rejected since Expat 2.6.3.
- </p>
- </div>
- <h4 id="XML_GetBuffer">
- XML_GetBuffer
- </h4>
- <pre class="fcndec">
- void * XMLCALL
- XML_GetBuffer(XML_Parser p,
- int len);
- </pre>
- <div class="fcndef">
- Obtain a buffer of size <code>len</code> to read a piece of the document into. A
- <code>NULL</code> value is returned if Expat can't allocate enough memory for
- this buffer. A <code>NULL</code> value may also be returned if <code>len</code>
- is zero. This has to be called prior to every call to <code><a href=
- "#XML_ParseBuffer">XML_ParseBuffer</a></code>. A typical use would look like
- this:
- <pre class="eg">
- for (;;) {
- int bytes_read;
- void *buff = XML_GetBuffer(p, BUFF_SIZE);
- if (buff == NULL) {
- /* handle error */
- }
- bytes_read = read(docfd, buff, BUFF_SIZE);
- if (bytes_read < 0) {
- /* handle error */
- }
- if (! XML_ParseBuffer(p, bytes_read, bytes_read == 0)) {
- /* handle parse error */
- }
- if (bytes_read == 0)
- break;
- }
- </pre>
- </div>
- <h4 id="XML_StopParser">
- XML_StopParser
- </h4>
- <pre class="fcndec">
- enum XML_Status XMLCALL
- XML_StopParser(XML_Parser p,
- XML_Bool resumable);
- </pre>
- <div class="fcndef">
- <p>
- Stops parsing, causing <code><a href="#XML_Parse">XML_Parse</a></code> or
- <code><a href="#XML_ParseBuffer">XML_ParseBuffer</a></code> to return. Must be
- called from within a call-back handler, except when aborting (when
- <code>resumable</code> is <code>XML_FALSE</code>) an already suspended parser.
- Some call-backs may still follow because they would otherwise get lost,
- including
- </p>
- <ul>
- <li>the end element handler for empty elements when stopped in the start
- element handler,
- </li>
- <li>the end namespace declaration handler when stopped in the end element
- handler,
- </li>
- <li>the character data handler when stopped in the character data handler while
- making multiple call-backs on a contiguous chunk of characters,
- </li>
- </ul>
- <p>
- and possibly others.
- </p>
- <p>
- This can be called from most handlers, including DTD related call-backs, except
- when parsing an external parameter entity and <code>resumable</code> is
- <code>XML_TRUE</code>. Returns <code>XML_STATUS_OK</code> when successful,
- <code>XML_STATUS_ERROR</code> otherwise. The possible error codes are:
- </p>
- <dl>
- <dt>
- <code>XML_ERROR_NOT_STARTED</code>
- </dt>
- <dd>
- when stopping or suspending a parser before it has started, added in Expat
- 2.6.4.
- </dd>
- <dt>
- <code>XML_ERROR_SUSPENDED</code>
- </dt>
- <dd>
- when suspending an already suspended parser.
- </dd>
- <dt>
- <code>XML_ERROR_FINISHED</code>
- </dt>
- <dd>
- when the parser has already finished.
- </dd>
- <dt>
- <code>XML_ERROR_SUSPEND_PE</code>
- </dt>
- <dd>
- when suspending while parsing an external PE.
- </dd>
- </dl>
- <p>
- Since the stop/resume feature requires application support in the outer parsing
- loop, it is an error to call this function for a parser not being handled
- appropriately; see <a href="#stop-resume">Temporarily Stopping Parsing</a> for
- more information.
- </p>
- <p>
- When <code>resumable</code> is <code>XML_TRUE</code> then parsing is
- <em>suspended</em>, that is, <code><a href="#XML_Parse">XML_Parse</a></code>
- and <code><a href="#XML_ParseBuffer">XML_ParseBuffer</a></code> return
- <code>XML_STATUS_SUSPENDED</code>. Otherwise, parsing is <em>aborted</em>, that
- is, <code><a href="#XML_Parse">XML_Parse</a></code> and <code><a href=
- "#XML_ParseBuffer">XML_ParseBuffer</a></code> return
- <code>XML_STATUS_ERROR</code> with error code <code>XML_ERROR_ABORTED</code>.
- </p>
- <p>
- <strong>Note:</strong> This will be applied to the current parser instance
- only, that is, if there is a parent parser then it will continue parsing when
- the external entity reference handler returns. It is up to the implementation
- of that handler to call <code><a href=
- "#XML_StopParser">XML_StopParser</a></code> on the parent parser (recursively),
- if one wants to stop parsing altogether.
- </p>
- <p>
- When suspended, parsing can be resumed by calling <code><a href=
- "#XML_ResumeParser">XML_ResumeParser</a></code>.
- </p>
- <p>
- New in Expat 1.95.8.
- </p>
- </div>
- <h4 id="XML_ResumeParser">
- XML_ResumeParser
- </h4>
- <pre class="fcndec">
- enum XML_Status XMLCALL
- XML_ResumeParser(XML_Parser p);
- </pre>
- <div class="fcndef">
- <p>
- Resumes parsing after it has been suspended with <code><a href=
- "#XML_StopParser">XML_StopParser</a></code>. Must not be called from within a
- handler call-back. Returns same status codes as <code><a href=
- "#XML_Parse">XML_Parse</a></code> or <code><a href=
- "#XML_ParseBuffer">XML_ParseBuffer</a></code>. An additional error code,
- <code>XML_ERROR_NOT_SUSPENDED</code>, will be returned if the parser was not
- currently suspended.
- </p>
- <p>
- <strong>Note:</strong> This must be called on the most deeply nested child
- parser instance first, and on its parent parser only after the child parser has
- finished, to be applied recursively until the document entity's parser is
- restarted. That is, the parent parser will not resume by itself and it is up to
- the application to call <code><a href=
- "#XML_ResumeParser">XML_ResumeParser</a></code> on it at the appropriate
- moment.
- </p>
- <p>
- New in Expat 1.95.8.
- </p>
- </div>
- <h4 id="XML_GetParsingStatus">
- XML_GetParsingStatus
- </h4>
- <pre class="fcndec">
- void XMLCALL
- XML_GetParsingStatus(XML_Parser p,
- XML_ParsingStatus *status);
- </pre>
- <pre class="signature">
- enum XML_Parsing {
- XML_INITIALIZED,
- XML_PARSING,
- XML_FINISHED,
- XML_SUSPENDED
- };
- typedef struct {
- enum XML_Parsing parsing;
- XML_Bool finalBuffer;
- } XML_ParsingStatus;
- </pre>
- <div class="fcndef">
- <p>
- Returns status of parser with respect to being initialized, parsing, finished,
- or suspended, and whether the final buffer is being processed. The
- <code>status</code> parameter <em>must not</em> be <code>NULL</code>.
- </p>
- <p>
- New in Expat 1.95.8.
- </p>
- </div>
- <h3>
- <a id="setting" name="setting">Handler Setting</a>
- </h3>
- <p>
- Although handlers are typically set prior to parsing and left alone, an
- application may choose to set or change the handler for a parsing event while the
- parse is in progress. For instance, your application may choose to ignore all
- text not descended from a <code>para</code> element. One way it could do this is
- to set the character handler when a para start tag is seen, and unset it for the
- corresponding end tag.
- </p>
- <p>
- A handler may be <em>unset</em> by providing a <code>NULL</code> pointer to the
- appropriate handler setter. None of the handler setting functions have a return
- value.
- </p>
- <p>
- Your handlers will be receiving strings in arrays of type <code>XML_Char</code>.
- This type is conditionally defined in expat.h as either <code>char</code>,
- <code>wchar_t</code> or <code>unsigned short</code>. The former implies UTF-8
- encoding, the latter two imply UTF-16 encoding. Note that you'll receive them in
- this form independent of the original encoding of the document.
- </p>
- <div class="handler">
- <h4 id="XML_SetStartElementHandler">
- XML_SetStartElementHandler
- </h4>
- <pre class="setter">
- void XMLCALL
- XML_SetStartElementHandler(XML_Parser p,
- XML_StartElementHandler start);
- </pre>
- <pre class="signature">
- typedef void
- (XMLCALL *XML_StartElementHandler)(void *userData,
- const XML_Char *name,
- const XML_Char **atts);
- </pre>
- <p>
- Set handler for start (and empty) tags. Attributes are passed to the start
- handler as a pointer to a vector of char pointers. Each attribute seen in a
- start (or empty) tag occupies 2 consecutive places in this vector: the
- attribute name followed by the attribute value. These pairs are terminated by a
- <code>NULL</code> pointer.
- </p>
- <p>
- Note that an empty tag generates a call to both start and end handlers (in that
- order).
- </p>
- </div>
- <div class="handler">
- <h4 id="XML_SetEndElementHandler">
- XML_SetEndElementHandler
- </h4>
- <pre class="setter">
- void XMLCALL
- XML_SetEndElementHandler(XML_Parser p,
- XML_EndElementHandler);
- </pre>
- <pre class="signature">
- typedef void
- (XMLCALL *XML_EndElementHandler)(void *userData,
- const XML_Char *name);
- </pre>
- <p>
- Set handler for end (and empty) tags. As noted above, an empty tag generates a
- call to both start and end handlers.
- </p>
- </div>
- <div class="handler">
- <h4 id="XML_SetElementHandler">
- XML_SetElementHandler
- </h4>
- <pre class="setter">
- void XMLCALL
- XML_SetElementHandler(XML_Parser p,
- XML_StartElementHandler start,
- XML_EndElementHandler end);
- </pre>
- <p>
- Set handlers for start and end tags with one call.
- </p>
- </div>
- <div class="handler">
- <h4 id="XML_SetCharacterDataHandler">
- XML_SetCharacterDataHandler
- </h4>
- <pre class="setter">
- void XMLCALL
- XML_SetCharacterDataHandler(XML_Parser p,
- XML_CharacterDataHandler charhndl)
- </pre>
- <pre class="signature">
- typedef void
- (XMLCALL *XML_CharacterDataHandler)(void *userData,
- const XML_Char *s,
- int len);
- </pre>
- <p>
- Set a text handler. The string your handler receives is <em>NOT
- null-terminated</em>. You have to use the length argument to deal with the end
- of the string. A single block of contiguous text free of markup may still
- result in a sequence of calls to this handler. In other words, if you're
- searching for a pattern in the text, it may be split across calls to this
- handler. Note: Setting this handler to <code>NULL</code> may <em>NOT
- immediately</em> terminate call-backs if the parser is currently processing
- such a single block of contiguous markup-free text, as the parser will continue
- calling back until the end of the block is reached.
- </p>
- </div>
- <div class="handler">
- <h4 id="XML_SetProcessingInstructionHandler">
- XML_SetProcessingInstructionHandler
- </h4>
- <pre class="setter">
- void XMLCALL
- XML_SetProcessingInstructionHandler(XML_Parser p,
- XML_ProcessingInstructionHandler proc)
- </pre>
- <pre class="signature">
- typedef void
- (XMLCALL *XML_ProcessingInstructionHandler)(void *userData,
- const XML_Char *target,
- const XML_Char *data);
- </pre>
- <p>
- Set a handler for processing instructions. The target is the first word in the
- processing instruction. The data is the rest of the characters in it after
- skipping all whitespace after the initial word.
- </p>
- </div>
- <div class="handler">
- <h4 id="XML_SetCommentHandler">
- XML_SetCommentHandler
- </h4>
- <pre class="setter">
- void XMLCALL
- XML_SetCommentHandler(XML_Parser p,
- XML_CommentHandler cmnt)
- </pre>
- <pre class="signature">
- typedef void
- (XMLCALL *XML_CommentHandler)(void *userData,
- const XML_Char *data);
- </pre>
- <p>
- Set a handler for comments. The data is all text inside the comment delimiters.
- </p>
- </div>
- <div class="handler">
- <h4 id="XML_SetStartCdataSectionHandler">
- XML_SetStartCdataSectionHandler
- </h4>
- <pre class="setter">
- void XMLCALL
- XML_SetStartCdataSectionHandler(XML_Parser p,
- XML_StartCdataSectionHandler start);
- </pre>
- <pre class="signature">
- typedef void
- (XMLCALL *XML_StartCdataSectionHandler)(void *userData);
- </pre>
- <p>
- Set a handler that gets called at the beginning of a CDATA section.
- </p>
- </div>
- <div class="handler">
- <h4 id="XML_SetEndCdataSectionHandler">
- XML_SetEndCdataSectionHandler
- </h4>
- <pre class="setter">
- void XMLCALL
- XML_SetEndCdataSectionHandler(XML_Parser p,
- XML_EndCdataSectionHandler end);
- </pre>
- <pre class="signature">
- typedef void
- (XMLCALL *XML_EndCdataSectionHandler)(void *userData);
- </pre>
- <p>
- Set a handler that gets called at the end of a CDATA section.
- </p>
- </div>
- <div class="handler">
- <h4 id="XML_SetCdataSectionHandler">
- XML_SetCdataSectionHandler
- </h4>
- <pre class="setter">
- void XMLCALL
- XML_SetCdataSectionHandler(XML_Parser p,
- XML_StartCdataSectionHandler start,
- XML_EndCdataSectionHandler end)
- </pre>
- <p>
- Sets both CDATA section handlers with one call.
- </p>
- </div>
- <div class="handler">
- <h4 id="XML_SetDefaultHandler">
- XML_SetDefaultHandler
- </h4>
- <pre class="setter">
- void XMLCALL
- XML_SetDefaultHandler(XML_Parser p,
- XML_DefaultHandler hndl)
- </pre>
- <pre class="signature">
- typedef void
- (XMLCALL *XML_DefaultHandler)(void *userData,
- const XML_Char *s,
- int len);
- </pre>
- <p>
- Sets a handler for any characters in the document which wouldn't otherwise be
- handled. This includes both data for which no handlers can be set (like some
- kinds of DTD declarations) and data which could be reported but which currently
- has no handler set. The characters are passed exactly as they were present in
- the XML document except that they will be encoded in UTF-8 or UTF-16. Line
- boundaries are not normalized. Note that a byte order mark character is not
- passed to the default handler. There are no guarantees about how characters are
- divided between calls to the default handler: for example, a comment might be
- split between multiple calls. Setting the handler with this call has the side
- effect of turning off expansion of references to internally defined general
- entities. Instead these references are passed to the default handler.
- </p>
- <p>
- See also <code><a href="#XML_DefaultCurrent">XML_DefaultCurrent</a></code>.
- </p>
- </div>
- <div class="handler">
- <h4 id="XML_SetDefaultHandlerExpand">
- XML_SetDefaultHandlerExpand
- </h4>
- <pre class="setter">
- void XMLCALL
- XML_SetDefaultHandlerExpand(XML_Parser p,
- XML_DefaultHandler hndl)
- </pre>
- <pre class="signature">
- typedef void
- (XMLCALL *XML_DefaultHandler)(void *userData,
- const XML_Char *s,
- int len);
- </pre>
- <p>
- This sets a default handler, but doesn't inhibit the expansion of internal
- entity references. The entity reference will not be passed to the default
- handler.
- </p>
- <p>
- See also <code><a href="#XML_DefaultCurrent">XML_DefaultCurrent</a></code>.
- </p>
- </div>
- <div class="handler">
- <h4 id="XML_SetExternalEntityRefHandler">
- XML_SetExternalEntityRefHandler
- </h4>
- <pre class="setter">
- void XMLCALL
- XML_SetExternalEntityRefHandler(XML_Parser p,
- XML_ExternalEntityRefHandler hndl)
- </pre>
- <pre class="signature">
- typedef int
- (XMLCALL *XML_ExternalEntityRefHandler)(XML_Parser p,
- const XML_Char *context,
- const XML_Char *base,
- const XML_Char *systemId,
- const XML_Char *publicId);
- </pre>
- <p>
- Set an external entity reference handler. This handler is also called for
- processing an external DTD subset if parameter entity parsing is in effect.
- (See <a href=
- "#XML_SetParamEntityParsing"><code>XML_SetParamEntityParsing</code></a>.)
- </p>
- <p>
- <strong>Warning:</strong> Using an external entity reference handler can lead
- to <a href="https://libexpat.github.io/doc/xml-security/#external-entities">XXE
- vulnerabilities</a>. It should only be used in applications that do not parse
- untrusted XML input.
- </p>
- <p>
- The <code>context</code> parameter specifies the parsing context in the format
- expected by the <code>context</code> argument to <code><a href=
- "#XML_ExternalEntityParserCreate">XML_ExternalEntityParserCreate</a></code>.
- <code>code</code> is valid only until the handler returns, so if the referenced
- entity is to be parsed later, it must be copied. <code>context</code> is
- <code>NULL</code> only when the entity is a parameter entity, which is how one
- can differentiate between general and parameter entities.
- </p>
- <p>
- The <code>base</code> parameter is the base to use for relative system
- identifiers. It is set by <code><a href="#XML_SetBase">XML_SetBase</a></code>
- and may be <code>NULL</code>. The <code>publicId</code> parameter is the public
- id given in the entity declaration and may be <code>NULL</code>.
- <code>systemId</code> is the system identifier specified in the entity
- declaration and is never <code>NULL</code>.
- </p>
- <p>
- There are a couple of ways in which this handler differs from others. First,
- this handler returns a status indicator (an integer).
- <code>XML_STATUS_OK</code> should be returned for successful handling of the
- external entity reference. Returning <code>XML_STATUS_ERROR</code> indicates
- failure, and causes the calling parser to return an
- <code>XML_ERROR_EXTERNAL_ENTITY_HANDLING</code> error.
- </p>
- <p>
- Second, instead of having the user data as its first argument, it receives the
- parser that encountered the entity reference. This, along with the context
- parameter, may be used as arguments to a call to <code><a href=
- "#XML_ExternalEntityParserCreate">XML_ExternalEntityParserCreate</a></code>.
- Using the returned parser, the body of the external entity can be recursively
- parsed.
- </p>
- <p>
- Since this handler may be called recursively, it should not be saving
- information into global or static variables.
- </p>
- </div>
- <h4 id="XML_SetExternalEntityRefHandlerArg">
- XML_SetExternalEntityRefHandlerArg
- </h4>
- <pre class="fcndec">
- void XMLCALL
- XML_SetExternalEntityRefHandlerArg(XML_Parser p,
- void *arg)
- </pre>
- <div class="fcndef">
- <p>
- Set the argument passed to the ExternalEntityRefHandler. If <code>arg</code> is
- not <code>NULL</code>, it is the new value passed to the handler set using
- <code><a href=
- "#XML_SetExternalEntityRefHandler">XML_SetExternalEntityRefHandler</a></code>;
- if <code>arg</code> is <code>NULL</code>, the argument passed to the handler
- function will be the parser object itself.
- </p>
- <p>
- <strong>Note:</strong> The type of <code>arg</code> and the type of the first
- argument to the ExternalEntityRefHandler do not match. This function takes a
- <code>void *</code> to be passed to the handler, while the handler accepts an
- <code>XML_Parser</code>. This is a historical accident, but will not be
- corrected before Expat 2.0 (at the earliest) to avoid causing compiler warnings
- for code that's known to work with this API. It is the responsibility of the
- application code to know the actual type of the argument passed to the handler
- and to manage it properly.
- </p>
- </div>
- <div class="handler">
- <h4 id="XML_SetSkippedEntityHandler">
- XML_SetSkippedEntityHandler
- </h4>
- <pre class="setter">
- void XMLCALL
- XML_SetSkippedEntityHandler(XML_Parser p,
- XML_SkippedEntityHandler handler)
- </pre>
- <pre class="signature">
- typedef void
- (XMLCALL *XML_SkippedEntityHandler)(void *userData,
- const XML_Char *entityName,
- int is_parameter_entity);
- </pre>
- <p>
- Set a skipped entity handler. This is called in two situations:
- </p>
- <ol>
- <li>An entity reference is encountered for which no declaration has been read
- <em>and</em> this is not an error.
- </li>
- <li>An internal entity reference is read, but not expanded, because <a href=
- "#XML_SetDefaultHandler"><code>XML_SetDefaultHandler</code></a> has been
- called.
- </li>
- </ol>
- <p>
- The <code>is_parameter_entity</code> argument will be non-zero for a parameter
- entity and zero for a general entity.
- </p>
- <p>
- Note: Skipped parameter entities in declarations and skipped general entities
- in attribute values cannot be reported, because the event would be out of sync
- with the reporting of the declarations or attribute values
- </p>
- </div>
- <div class="handler">
- <h4 id="XML_SetUnknownEncodingHandler">
- XML_SetUnknownEncodingHandler
- </h4>
- <pre class="setter">
- void XMLCALL
- XML_SetUnknownEncodingHandler(XML_Parser p,
- XML_UnknownEncodingHandler enchandler,
- void *encodingHandlerData)
- </pre>
- <pre class="signature">
- typedef int
- (XMLCALL *XML_UnknownEncodingHandler)(void *encodingHandlerData,
- const XML_Char *name,
- XML_Encoding *info);
- typedef struct {
- int map[256];
- void *data;
- int (XMLCALL *convert)(void *data, const char *s);
- void (XMLCALL *release)(void *data);
- } XML_Encoding;
- </pre>
- <p>
- Set a handler to deal with encodings other than the <a href=
- "#builtin_encodings">built in set</a>. This should be done before
- <code><a href="#XML_Parse">XML_Parse</a></code> or <code><a href=
- "#XML_ParseBuffer">XML_ParseBuffer</a></code> have been called on the given
- parser.
- </p>
- <p>
- If the handler knows how to deal with an encoding with the given name, it
- should fill in the <code>info</code> data structure and return
- <code>XML_STATUS_OK</code>. Otherwise it should return
- <code>XML_STATUS_ERROR</code>. The handler will be called at most once per
- parsed (external) entity. The optional application data pointer
- <code>encodingHandlerData</code> will be passed back to the handler.
- </p>
- <p>
- The map array contains information for every possible leading byte in a byte
- sequence. If the corresponding value is >= 0, then it's a single byte
- sequence and the byte encodes that Unicode value. If the value is -1, then that
- byte is invalid as the initial byte in a sequence. If the value is -n, where n
- is an integer > 1, then n is the number of bytes in the sequence and the
- actual conversion is accomplished by a call to the function pointed at by
- convert. This function may return -1 if the sequence itself is invalid. The
- convert pointer may be <code>NULL</code> if there are only single byte codes.
- The data parameter passed to the convert function is the data pointer from
- <code>XML_Encoding</code>. The string s is <em>NOT</em> null-terminated and
- points at the sequence of bytes to be converted.
- </p>
- <p>
- The function pointed at by <code>release</code> is called by the parser when it
- is finished with the encoding. It may be <code>NULL</code>.
- </p>
- </div>
- <div class="handler">
- <h4 id="XML_SetStartNamespaceDeclHandler">
- XML_SetStartNamespaceDeclHandler
- </h4>
- <pre class="setter">
- void XMLCALL
- XML_SetStartNamespaceDeclHandler(XML_Parser p,
- XML_StartNamespaceDeclHandler start);
- </pre>
- <pre class="signature">
- typedef void
- (XMLCALL *XML_StartNamespaceDeclHandler)(void *userData,
- const XML_Char *prefix,
- const XML_Char *uri);
- </pre>
- <p>
- Set a handler to be called when a namespace is declared. Namespace declarations
- occur inside start tags. But the namespace declaration start handler is called
- before the start tag handler for each namespace declared in that start tag.
- </p>
- </div>
- <div class="handler">
- <h4 id="XML_SetEndNamespaceDeclHandler">
- XML_SetEndNamespaceDeclHandler
- </h4>
- <pre class="setter">
- void XMLCALL
- XML_SetEndNamespaceDeclHandler(XML_Parser p,
- XML_EndNamespaceDeclHandler end);
- </pre>
- <pre class="signature">
- typedef void
- (XMLCALL *XML_EndNamespaceDeclHandler)(void *userData,
- const XML_Char *prefix);
- </pre>
- <p>
- Set a handler to be called when leaving the scope of a namespace declaration.
- This will be called, for each namespace declaration, after the handler for the
- end tag of the element in which the namespace was declared.
- </p>
- </div>
- <div class="handler">
- <h4 id="XML_SetNamespaceDeclHandler">
- XML_SetNamespaceDeclHandler
- </h4>
- <pre class="setter">
- void XMLCALL
- XML_SetNamespaceDeclHandler(XML_Parser p,
- XML_StartNamespaceDeclHandler start,
- XML_EndNamespaceDeclHandler end)
- </pre>
- <p>
- Sets both namespace declaration handlers with a single call.
- </p>
- </div>
- <div class="handler">
- <h4 id="XML_SetXmlDeclHandler">
- XML_SetXmlDeclHandler
- </h4>
- <pre class="setter">
- void XMLCALL
- XML_SetXmlDeclHandler(XML_Parser p,
- XML_XmlDeclHandler xmldecl);
- </pre>
- <pre class="signature">
- typedef void
- (XMLCALL *XML_XmlDeclHandler)(void *userData,
- const XML_Char *version,
- const XML_Char *encoding,
- int standalone);
- </pre>
- <p>
- Sets a handler that is called for XML declarations and also for text
- declarations discovered in external entities. The way to distinguish is that
- the <code>version</code> parameter will be <code>NULL</code> for text
- declarations. The <code>encoding</code> parameter may be <code>NULL</code> for
- an XML declaration. The <code>standalone</code> argument will contain -1, 0, or
- 1 indicating respectively that there was no standalone parameter in the
- declaration, that it was given as no, or that it was given as yes.
- </p>
- </div>
- <div class="handler">
- <h4 id="XML_SetStartDoctypeDeclHandler">
- XML_SetStartDoctypeDeclHandler
- </h4>
- <pre class="setter">
- void XMLCALL
- XML_SetStartDoctypeDeclHandler(XML_Parser p,
- XML_StartDoctypeDeclHandler start);
- </pre>
- <pre class="signature">
- typedef void
- (XMLCALL *XML_StartDoctypeDeclHandler)(void *userData,
- const XML_Char *doctypeName,
- const XML_Char *sysid,
- const XML_Char *pubid,
- int has_internal_subset);
- </pre>
- <p>
- Set a handler that is called at the start of a DOCTYPE declaration, before any
- external or internal subset is parsed. Both <code>sysid</code> and
- <code>pubid</code> may be <code>NULL</code>. The
- <code>has_internal_subset</code> will be non-zero if the DOCTYPE declaration
- has an internal subset.
- </p>
- </div>
- <div class="handler">
- <h4 id="XML_SetEndDoctypeDeclHandler">
- XML_SetEndDoctypeDeclHandler
- </h4>
- <pre class="setter">
- void XMLCALL
- XML_SetEndDoctypeDeclHandler(XML_Parser p,
- XML_EndDoctypeDeclHandler end);
- </pre>
- <pre class="signature">
- typedef void
- (XMLCALL *XML_EndDoctypeDeclHandler)(void *userData);
- </pre>
- <p>
- Set a handler that is called at the end of a DOCTYPE declaration, after parsing
- any external subset.
- </p>
- </div>
- <div class="handler">
- <h4 id="XML_SetDoctypeDeclHandler">
- XML_SetDoctypeDeclHandler
- </h4>
- <pre class="setter">
- void XMLCALL
- XML_SetDoctypeDeclHandler(XML_Parser p,
- XML_StartDoctypeDeclHandler start,
- XML_EndDoctypeDeclHandler end);
- </pre>
- <p>
- Set both doctype handlers with one call.
- </p>
- </div>
- <div class="handler">
- <h4 id="XML_SetElementDeclHandler">
- XML_SetElementDeclHandler
- </h4>
- <pre class="setter">
- void XMLCALL
- XML_SetElementDeclHandler(XML_Parser p,
- XML_ElementDeclHandler eldecl);
- </pre>
- <pre class="signature">
- typedef void
- (XMLCALL *XML_ElementDeclHandler)(void *userData,
- const XML_Char *name,
- XML_Content *model);
- </pre>
- <pre class="signature">
- enum XML_Content_Type {
- XML_CTYPE_EMPTY = 1,
- XML_CTYPE_ANY,
- XML_CTYPE_MIXED,
- XML_CTYPE_NAME,
- XML_CTYPE_CHOICE,
- XML_CTYPE_SEQ
- };
- enum XML_Content_Quant {
- XML_CQUANT_NONE,
- XML_CQUANT_OPT,
- XML_CQUANT_REP,
- XML_CQUANT_PLUS
- };
- typedef struct XML_cp XML_Content;
- struct XML_cp {
- enum XML_Content_Type type;
- enum XML_Content_Quant quant;
- const XML_Char * name;
- unsigned int numchildren;
- XML_Content * children;
- };
- </pre>
- <p>
- Sets a handler for element declarations in a DTD. The handler gets called with
- the name of the element in the declaration and a pointer to a structure that
- contains the element model. It's the user code's responsibility to free model
- when finished with via a call to <code><a href=
- "#XML_FreeContentModel">XML_FreeContentModel</a></code>. There is no need to
- free the model from the handler, it can be kept around and freed at a later
- stage.
- </p>
- <p>
- The <code>model</code> argument is the root of a tree of
- <code>XML_Content</code> nodes. If <code>type</code> equals
- <code>XML_CTYPE_EMPTY</code> or <code>XML_CTYPE_ANY</code>, then
- <code>quant</code> will be <code>XML_CQUANT_NONE</code>, and the other fields
- will be zero or <code>NULL</code>. If <code>type</code> is
- <code>XML_CTYPE_MIXED</code>, then <code>quant</code> will be
- <code>XML_CQUANT_NONE</code> or <code>XML_CQUANT_REP</code> and
- <code>numchildren</code> will contain the number of elements that are allowed
- to be mixed in and <code>children</code> points to an array of
- <code>XML_Content</code> structures that will all have type XML_CTYPE_NAME with
- no quantification. Only the root node can be type <code>XML_CTYPE_EMPTY</code>,
- <code>XML_CTYPE_ANY</code>, or <code>XML_CTYPE_MIXED</code>.
- </p>
- <p>
- For type <code>XML_CTYPE_NAME</code>, the <code>name</code> field points to the
- name and the <code>numchildren</code> and <code>children</code> fields will be
- zero and <code>NULL</code>. The <code>quant</code> field will indicate any
- quantifiers placed on the name.
- </p>
- <p>
- Types <code>XML_CTYPE_CHOICE</code> and <code>XML_CTYPE_SEQ</code> indicate a
- choice or sequence respectively. The <code>numchildren</code> field indicates
- how many nodes in the choice or sequence and <code>children</code> points to
- the nodes.
- </p>
- </div>
- <div class="handler">
- <h4 id="XML_SetAttlistDeclHandler">
- XML_SetAttlistDeclHandler
- </h4>
- <pre class="setter">
- void XMLCALL
- XML_SetAttlistDeclHandler(XML_Parser p,
- XML_AttlistDeclHandler attdecl);
- </pre>
- <pre class="signature">
- typedef void
- (XMLCALL *XML_AttlistDeclHandler)(void *userData,
- const XML_Char *elname,
- const XML_Char *attname,
- const XML_Char *att_type,
- const XML_Char *dflt,
- int isrequired);
- </pre>
- <p>
- Set a handler for attlist declarations in the DTD. This handler is called for
- <em>each</em> attribute. So a single attlist declaration with multiple
- attributes declared will generate multiple calls to this handler. The
- <code>elname</code> parameter returns the name of the element for which the
- attribute is being declared. The attribute name is in the <code>attname</code>
- parameter. The attribute type is in the <code>att_type</code> parameter. It is
- the string representing the type in the declaration with whitespace removed.
- </p>
- <p>
- The <code>dflt</code> parameter holds the default value. It will be
- <code>NULL</code> in the case of "#IMPLIED" or "#REQUIRED" attributes. You can
- distinguish these two cases by checking the <code>isrequired</code> parameter,
- which will be true in the case of "#REQUIRED" attributes. Attributes which are
- "#FIXED" will have also have a true <code>isrequired</code>, but they will have
- the non-<code>NULL</code> fixed value in the <code>dflt</code> parameter.
- </p>
- </div>
- <div class="handler">
- <h4 id="XML_SetEntityDeclHandler">
- XML_SetEntityDeclHandler
- </h4>
- <pre class="setter">
- void XMLCALL
- XML_SetEntityDeclHandler(XML_Parser p,
- XML_EntityDeclHandler handler);
- </pre>
- <pre class="signature">
- typedef void
- (XMLCALL *XML_EntityDeclHandler)(void *userData,
- const XML_Char *entityName,
- int is_parameter_entity,
- const XML_Char *value,
- int value_length,
- const XML_Char *base,
- const XML_Char *systemId,
- const XML_Char *publicId,
- const XML_Char *notationName);
- </pre>
- <p>
- Sets a handler that will be called for all entity declarations. The
- <code>is_parameter_entity</code> argument will be non-zero in the case of
- parameter entities and zero otherwise.
- </p>
- <p>
- For internal entities (<code><!ENTITY foo "bar"></code>),
- <code>value</code> will be non-<code>NULL</code> and <code>systemId</code>,
- <code>publicId</code>, and <code>notationName</code> will all be
- <code>NULL</code>. The value string is <em>not</em> null-terminated; the length
- is provided in the <code>value_length</code> parameter. Do not use
- <code>value_length</code> to test for internal entities, since it is legal to
- have zero-length values. Instead check for whether or not <code>value</code> is
- <code>NULL</code>.
- </p>
- <p>
- The <code>notationName</code> argument will have a non-<code>NULL</code> value
- only for unparsed entity declarations.
- </p>
- </div>
- <div class="handler">
- <h4 id="XML_SetUnparsedEntityDeclHandler">
- XML_SetUnparsedEntityDeclHandler
- </h4>
- <pre class="setter">
- void XMLCALL
- XML_SetUnparsedEntityDeclHandler(XML_Parser p,
- XML_UnparsedEntityDeclHandler h)
- </pre>
- <pre class="signature">
- typedef void
- (XMLCALL *XML_UnparsedEntityDeclHandler)(void *userData,
- const XML_Char *entityName,
- const XML_Char *base,
- const XML_Char *systemId,
- const XML_Char *publicId,
- const XML_Char *notationName);
- </pre>
- <p>
- Set a handler that receives declarations of unparsed entities. These are entity
- declarations that have a notation (NDATA) field:
- </p>
- <div id="eg">
- <pre>
- <!ENTITY logo SYSTEM "images/logo.gif" NDATA gif>
- </pre>
- </div>
- <p>
- This handler is obsolete and is provided for backwards compatibility. Use
- instead <a href="#XML_SetEntityDeclHandler">XML_SetEntityDeclHandler</a>.
- </p>
- </div>
- <div class="handler">
- <h4 id="XML_SetNotationDeclHandler">
- XML_SetNotationDeclHandler
- </h4>
- <pre class="setter">
- void XMLCALL
- XML_SetNotationDeclHandler(XML_Parser p,
- XML_NotationDeclHandler h)
- </pre>
- <pre class="signature">
- typedef void
- (XMLCALL *XML_NotationDeclHandler)(void *userData,
- const XML_Char *notationName,
- const XML_Char *base,
- const XML_Char *systemId,
- const XML_Char *publicId);
- </pre>
- <p>
- Set a handler that receives notation declarations.
- </p>
- </div>
- <div class="handler">
- <h4 id="XML_SetNotStandaloneHandler">
- XML_SetNotStandaloneHandler
- </h4>
- <pre class="setter">
- void XMLCALL
- XML_SetNotStandaloneHandler(XML_Parser p,
- XML_NotStandaloneHandler h)
- </pre>
- <pre class="signature">
- typedef int
- (XMLCALL *XML_NotStandaloneHandler)(void *userData);
- </pre>
- <p>
- Set a handler that is called if the document is not "standalone". This happens
- when there is an external subset or a reference to a parameter entity, but does
- not have standalone set to "yes" in an XML declaration. If this handler returns
- <code>XML_STATUS_ERROR</code>, then the parser will throw an
- <code>XML_ERROR_NOT_STANDALONE</code> error.
- </p>
- </div>
- <h3>
- <a id="position" name="position">Parse position and error reporting functions</a>
- </h3>
- <p>
- These are the functions you'll want to call when the parse functions return
- <code>XML_STATUS_ERROR</code> (a parse error has occurred), although the position
- reporting functions are useful outside of errors. The position reported is the
- byte position (in the original document or entity encoding) of the first of the
- sequence of characters that generated the current event (or the error that caused
- the parse functions to return <code>XML_STATUS_ERROR</code>.) The exceptions are
- callbacks triggered by declarations in the document prologue, in which case they
- exact position reported is somewhere in the relevant markup, but not necessarily
- as meaningful as for other events.
- </p>
- <p>
- The position reporting functions are accurate only outside of the DTD. In other
- words, they usually return bogus information when called from within a DTD
- declaration handler.
- </p>
- <h4 id="XML_GetErrorCode">
- XML_GetErrorCode
- </h4>
- <pre class="fcndec">
- enum XML_Error XMLCALL
- XML_GetErrorCode(XML_Parser p);
- </pre>
- <div class="fcndef">
- Return what type of error has occurred.
- </div>
- <h4 id="XML_ErrorString">
- XML_ErrorString
- </h4>
- <pre class="fcndec">
- const XML_LChar * XMLCALL
- XML_ErrorString(enum XML_Error code);
- </pre>
- <div class="fcndef">
- Return a string describing the error corresponding to code. The code should be
- one of the enums that can be returned from <code><a href=
- "#XML_GetErrorCode">XML_GetErrorCode</a></code>.
- </div>
- <h4 id="XML_GetCurrentByteIndex">
- XML_GetCurrentByteIndex
- </h4>
- <pre class="fcndec">
- XML_Index XMLCALL
- XML_GetCurrentByteIndex(XML_Parser p);
- </pre>
- <div class="fcndef">
- Return the byte offset of the position. This always corresponds to the values
- returned by <code><a href=
- "#XML_GetCurrentLineNumber">XML_GetCurrentLineNumber</a></code> and
- <code><a href="#XML_GetCurrentColumnNumber">XML_GetCurrentColumnNumber</a></code>.
- </div>
- <h4 id="XML_GetCurrentLineNumber">
- XML_GetCurrentLineNumber
- </h4>
- <pre class="fcndec">
- XML_Size XMLCALL
- XML_GetCurrentLineNumber(XML_Parser p);
- </pre>
- <div class="fcndef">
- Return the line number of the position. The first line is reported as
- <code>1</code>.
- </div>
- <h4 id="XML_GetCurrentColumnNumber">
- XML_GetCurrentColumnNumber
- </h4>
- <pre class="fcndec">
- XML_Size XMLCALL
- XML_GetCurrentColumnNumber(XML_Parser p);
- </pre>
- <div class="fcndef">
- Return the <em>offset</em>, from the beginning of the current line, of the
- position. The first column is reported as <code>0</code>.
- </div>
- <h4 id="XML_GetCurrentByteCount">
- XML_GetCurrentByteCount
- </h4>
- <pre class="fcndec">
- int XMLCALL
- XML_GetCurrentByteCount(XML_Parser p);
- </pre>
- <div class="fcndef">
- Return the number of bytes in the current event. Returns <code>0</code> if the
- event is inside a reference to an internal entity and for the end-tag event for
- empty element tags (the later can be used to distinguish empty-element tags from
- empty elements using separate start and end tags).
- </div>
- <h4 id="XML_GetInputContext">
- XML_GetInputContext
- </h4>
- <pre class="fcndec">
- const char * XMLCALL
- XML_GetInputContext(XML_Parser p,
- int *offset,
- int *size);
- </pre>
- <div class="fcndef">
- <p>
- Returns the parser's input buffer, sets the integer pointed at by
- <code>offset</code> to the offset within this buffer of the current parse
- position, and set the integer pointed at by <code>size</code> to the size of
- the returned buffer.
- </p>
- <p>
- This should only be called from within a handler during an active parse and the
- returned buffer should only be referred to from within the handler that made
- the call. This input buffer contains the untranslated bytes of the input.
- </p>
- <p>
- Only a limited amount of context is kept, so if the event triggering a call
- spans over a very large amount of input, the actual parse position may be
- before the beginning of the buffer.
- </p>
- <p>
- If <code>XML_CONTEXT_BYTES</code> is zero, this will always return
- <code>NULL</code>.
- </p>
- </div>
- <h3>
- <a id="attack-protection" name="attack-protection">Attack Protection</a><a id=
- "billion-laughs" name="billion-laughs"></a>
- </h3>
- <h4 id="XML_SetBillionLaughsAttackProtectionMaximumAmplification">
- XML_SetBillionLaughsAttackProtectionMaximumAmplification
- </h4>
- <pre class="fcndec">
- /* Added in Expat 2.4.0. */
- XML_Bool XMLCALL
- XML_SetBillionLaughsAttackProtectionMaximumAmplification(XML_Parser p,
- float maximumAmplificationFactor);
- </pre>
- <div class="fcndef">
- <p>
- Sets the maximum tolerated amplification factor for protection against <a href=
- "https://en.wikipedia.org/wiki/Billion_laughs_attack">billion laughs
- attacks</a> (default: <code>100.0</code>) of parser <code>p</code> to
- <code>maximumAmplificationFactor</code>, and returns <code>XML_TRUE</code> upon
- success and <code>XML_FALSE</code> upon error.
- </p>
- <p>
- Once the <a href=
- "#XML_SetBillionLaughsAttackProtectionActivationThreshold">threshold for
- activation</a> is reached, the amplification factor is calculated as ..
- </p>
- <pre>amplification := (direct + indirect) / direct</pre>
- <p>
- .. while parsing, whereas <code>direct</code> is the number of bytes read from
- the primary document in parsing and <code>indirect</code> is the number of
- bytes added by expanding entities and reading of external DTD files, combined.
- </p>
- <p>
- For a call to
- <code>XML_SetBillionLaughsAttackProtectionMaximumAmplification</code> to
- succeed:
- </p>
- <ul>
- <li>parser <code>p</code> must be a non-<code>NULL</code> root parser (without
- any parent parsers) and
- </li>
- <li>
- <code>maximumAmplificationFactor</code> must be non-<code>NaN</code> and
- greater than or equal to <code>1.0</code>.
- </li>
- </ul>
- <p>
- <strong>Note:</strong> If you ever need to increase this value for non-attack
- payload, please <a href="https://github.com/libexpat/libexpat/issues">file a
- bug report</a>.
- </p>
- <p>
- <strong>Note:</strong> Peak amplifications of factor 15,000 for the entire
- payload and of factor 30,000 in the middle of parsing have been observed with
- small benign files in practice. So if you do reduce the maximum allowed
- amplification, please make sure that the activation threshold is still big
- enough to not end up with undesired false positives (i.e. benign files being
- rejected).
- </p>
- </div>
- <h4 id="XML_SetBillionLaughsAttackProtectionActivationThreshold">
- XML_SetBillionLaughsAttackProtectionActivationThreshold
- </h4>
- <pre class="fcndec">
- /* Added in Expat 2.4.0. */
- XML_Bool XMLCALL
- XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser p,
- unsigned long long activationThresholdBytes);
- </pre>
- <div class="fcndef">
- <p>
- Sets number of output bytes (including amplification from entity expansion and
- reading DTD files) needed to activate protection against <a href=
- "https://en.wikipedia.org/wiki/Billion_laughs_attack">billion laughs
- attacks</a> (default: <code>8 MiB</code>) of parser <code>p</code> to
- <code>activationThresholdBytes</code>, and returns <code>XML_TRUE</code> upon
- success and <code>XML_FALSE</code> upon error.
- </p>
- <p>
- For a call to
- <code>XML_SetBillionLaughsAttackProtectionActivationThreshold</code> to
- succeed:
- </p>
- <ul>
- <li>parser <code>p</code> must be a non-<code>NULL</code> root parser (without
- any parent parsers).
- </li>
- </ul>
- <p>
- <strong>Note:</strong> If you ever need to increase this value for non-attack
- payload, please <a href="https://github.com/libexpat/libexpat/issues">file a
- bug report</a>.
- </p>
- <p>
- <strong>Note:</strong> Activation thresholds below 4 MiB are known to break
- support for <a href=
- "https://en.wikipedia.org/wiki/Darwin_Information_Typing_Architecture">DITA</a>
- 1.3 payload and are hence not recommended.
- </p>
- </div>
- <h4 id="XML_SetAllocTrackerMaximumAmplification">
- XML_SetAllocTrackerMaximumAmplification
- </h4>
- <pre class="fcndec">
- /* Added in Expat 2.7.2. */
- XML_Bool
- XML_SetAllocTrackerMaximumAmplification(XML_Parser p,
- float maximumAmplificationFactor);
- </pre>
- <div class="fcndef">
- <p>
- Sets the maximum tolerated amplification factor between direct input and bytes
- of dynamic memory allocated (default: <code>100.0</code>) of parser
- <code>p</code> to <code>maximumAmplificationFactor</code>, and returns
- <code>XML_TRUE</code> upon success and <code>XML_FALSE</code> upon error.
- </p>
- <p>
- <strong>Note:</strong> There are three types of allocations that intentionally
- bypass tracking and limiting:
- </p>
- <ul>
- <li>application calls to functions <code><a href=
- "#XML_MemMalloc">XML_MemMalloc</a></code> and <code><a href="#XML_MemRealloc">
- XML_MemRealloc</a></code> — <em>healthy</em> use of these two functions
- continues to be a responsibility of the application using Expat —,
- </li>
- <li>the main character buffer used by functions <code><a href="#XML_GetBuffer">
- XML_GetBuffer</a></code> and <code><a href=
- "#XML_ParseBuffer">XML_ParseBuffer</a></code> (and thus also by plain
- <code><a href="#XML_Parse">XML_Parse</a></code>), and
- </li>
- <li>the <a href="#XML_SetElementDeclHandler">content model memory</a> (that is
- passed to the <a href="#XML_SetElementDeclHandler">element declaration
- handler</a> and freed by a call to <code><a href=
- "#XML_FreeContentModel">XML_FreeContentModel</a></code>).
- </li>
- </ul>
- <p>
- Once the <a href="#XML_SetAllocTrackerActivationThreshold">threshold for
- activation</a> is reached, the amplification factor is calculated as ..
- </p>
- <pre>amplification := allocated / direct</pre>
- <p>
- .. while parsing, whereas <code>direct</code> is the number of bytes read from
- the primary document in parsing and <code>allocated</code> is the number of
- bytes of dynamic memory allocated in the parser hierarchy.
- </p>
- <p>
- For a call to <code>XML_SetAllocTrackerMaximumAmplification</code> to succeed:
- </p>
- <ul>
- <li>parser <code>p</code> must be a non-<code>NULL</code> root parser (without
- any parent parsers) and
- </li>
- <li>
- <code>maximumAmplificationFactor</code> must be non-<code>NaN</code> and
- greater than or equal to <code>1.0</code>.
- </li>
- </ul>
- <p>
- <strong>Note:</strong> If you ever need to increase this value for non-attack
- payload, please <a href="https://github.com/libexpat/libexpat/issues">file a
- bug report</a>.
- </p>
- <p>
- <strong>Note:</strong> Amplifications factors greater than <code>100.0</code>
- can been observed near the start of parsing even with benign files in practice.
- So if you do reduce the maximum allowed amplification, please make sure that
- the activation threshold is still big enough to not end up with undesired false
- positives (i.e. benign files being rejected).
- </p>
- </div>
- <h4 id="XML_SetAllocTrackerActivationThreshold">
- XML_SetAllocTrackerActivationThreshold
- </h4>
- <pre class="fcndec">
- /* Added in Expat 2.7.2. */
- XML_Bool
- XML_SetAllocTrackerActivationThreshold(XML_Parser p,
- unsigned long long activationThresholdBytes);
- </pre>
- <div class="fcndef">
- <p>
- Sets number of allocated bytes of dynamic memory needed to activate protection
- against disproportionate use of RAM (default: <code>64 MiB</code>) of parser
- <code>p</code> to <code>activationThresholdBytes</code>, and returns
- <code>XML_TRUE</code> upon success and <code>XML_FALSE</code> upon error.
- </p>
- <p>
- <strong>Note:</strong> For types of allocations that intentionally bypass
- tracking and limiting, please see <code><a href=
- "#XML_SetAllocTrackerMaximumAmplification">XML_SetAllocTrackerMaximumAmplification</a></code>
- above.
- </p>
- <p>
- For a call to <code>XML_SetAllocTrackerActivationThreshold</code> to succeed:
- </p>
- <ul>
- <li>parser <code>p</code> must be a non-<code>NULL</code> root parser (without
- any parent parsers).
- </li>
- </ul>
- <p>
- <strong>Note:</strong> If you ever need to increase this value for non-attack
- payload, please <a href="https://github.com/libexpat/libexpat/issues">file a
- bug report</a>.
- </p>
- </div>
- <h4 id="XML_SetReparseDeferralEnabled">
- XML_SetReparseDeferralEnabled
- </h4>
- <pre class="fcndec">
- /* Added in Expat 2.6.0. */
- XML_Bool XMLCALL
- XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled);
- </pre>
- <div class="fcndef">
- <p>
- Large tokens may require many parse calls before enough data is available for
- Expat to parse it in full. If Expat retried parsing the token on every parse
- call, parsing could take quadratic time. To avoid this, Expat only retries once
- a significant amount of new data is available. This function allows disabling
- this behavior.
- </p>
- <p>
- The <code>enabled</code> argument should be <code>XML_TRUE</code> or
- <code>XML_FALSE</code>.
- </p>
- <p>
- Returns <code>XML_TRUE</code> on success, and <code>XML_FALSE</code> on error.
- </p>
- </div>
- <h3>
- <a id="miscellaneous" name="miscellaneous">Miscellaneous functions</a>
- </h3>
- <p>
- The functions in this section either obtain state information from the parser or
- can be used to dynamically set parser options.
- </p>
- <h4 id="XML_SetUserData">
- XML_SetUserData
- </h4>
- <pre class="fcndec">
- void XMLCALL
- XML_SetUserData(XML_Parser p,
- void *userData);
- </pre>
- <div class="fcndef">
- This sets the user data pointer that gets passed to handlers. It overwrites any
- previous value for this pointer. Note that the application is responsible for
- freeing the memory associated with <code>userData</code> when it is finished with
- the parser. So if you call this when there's already a pointer there, and you
- haven't freed the memory associated with it, then you've probably just leaked
- memory.
- </div>
- <h4 id="XML_GetUserData">
- XML_GetUserData
- </h4>
- <pre class="fcndec">
- void * XMLCALL
- XML_GetUserData(XML_Parser p);
- </pre>
- <div class="fcndef">
- This returns the user data pointer that gets passed to handlers. It is actually
- implemented as a macro.
- </div>
- <h4 id="XML_UseParserAsHandlerArg">
- XML_UseParserAsHandlerArg
- </h4>
- <pre class="fcndec">
- void XMLCALL
- XML_UseParserAsHandlerArg(XML_Parser p);
- </pre>
- <div class="fcndef">
- After this is called, handlers receive the parser in their <code>userData</code>
- arguments. The user data can still be obtained using the <code><a href=
- "#XML_GetUserData">XML_GetUserData</a></code> function.
- </div>
- <h4 id="XML_SetBase">
- XML_SetBase
- </h4>
- <pre class="fcndec">
- enum XML_Status XMLCALL
- XML_SetBase(XML_Parser p,
- const XML_Char *base);
- </pre>
- <div class="fcndef">
- Set the base to be used for resolving relative URIs in system identifiers. The
- return value is <code>XML_STATUS_ERROR</code> if there's no memory to store base,
- otherwise it's <code>XML_STATUS_OK</code>.
- </div>
- <h4 id="XML_GetBase">
- XML_GetBase
- </h4>
- <pre class="fcndec">
- const XML_Char * XMLCALL
- XML_GetBase(XML_Parser p);
- </pre>
- <div class="fcndef">
- Return the base for resolving relative URIs.
- </div>
- <h4 id="XML_GetSpecifiedAttributeCount">
- XML_GetSpecifiedAttributeCount
- </h4>
- <pre class="fcndec">
- int XMLCALL
- XML_GetSpecifiedAttributeCount(XML_Parser p);
- </pre>
- <div class="fcndef">
- When attributes are reported to the start handler in the atts vector, attributes
- that were explicitly set in the element occur before any attributes that receive
- their value from default information in an ATTLIST declaration. This function
- returns the number of attributes that were explicitly set times two, thus giving
- the offset in the <code>atts</code> array passed to the start tag handler of the
- first attribute set due to defaults. It supplies information for the last call to
- a start handler. If called inside a start handler, then that means the current
- call.
- </div>
- <h4 id="XML_GetIdAttributeIndex">
- XML_GetIdAttributeIndex
- </h4>
- <pre class="fcndec">
- int XMLCALL
- XML_GetIdAttributeIndex(XML_Parser p);
- </pre>
- <div class="fcndef">
- Returns the index of the ID attribute passed in the atts array in the last call
- to <code><a href="#XML_StartElementHandler">XML_StartElementHandler</a></code>,
- or -1 if there is no ID attribute. If called inside a start handler, then that
- means the current call.
- </div>
- <h4 id="XML_GetAttributeInfo">
- XML_GetAttributeInfo
- </h4>
- <pre class="fcndec">
- const XML_AttrInfo * XMLCALL
- XML_GetAttributeInfo(XML_Parser parser);
- </pre>
- <pre class="signature">
- typedef struct {
- XML_Index nameStart; /* Offset to beginning of the attribute name. */
- XML_Index nameEnd; /* Offset after the attribute name's last byte. */
- XML_Index valueStart; /* Offset to beginning of the attribute value. */
- XML_Index valueEnd; /* Offset after the attribute value's last byte. */
- } XML_AttrInfo;
- </pre>
- <div class="fcndef">
- Returns an array of <code>XML_AttrInfo</code> structures for the attribute/value
- pairs passed in the last call to the <code>XML_StartElementHandler</code> that
- were specified in the start-tag rather than defaulted. Each attribute/value pair
- counts as 1; thus the number of entries in the array is
- <code>XML_GetSpecifiedAttributeCount(parser) / 2</code>.
- </div>
- <h4 id="XML_SetEncoding">
- XML_SetEncoding
- </h4>
- <pre class="fcndec">
- enum XML_Status XMLCALL
- XML_SetEncoding(XML_Parser p,
- const XML_Char *encoding);
- </pre>
- <div class="fcndef">
- Set the encoding to be used by the parser. It is equivalent to passing a
- non-<code>NULL</code> encoding argument to the parser creation functions. It must
- not be called after <code><a href="#XML_Parse">XML_Parse</a></code> or
- <code><a href="#XML_ParseBuffer">XML_ParseBuffer</a></code> have been called on
- the given parser. Returns <code>XML_STATUS_OK</code> on success or
- <code>XML_STATUS_ERROR</code> on error.
- </div>
- <h4 id="XML_SetParamEntityParsing">
- XML_SetParamEntityParsing
- </h4>
- <pre class="fcndec">
- int XMLCALL
- XML_SetParamEntityParsing(XML_Parser p,
- enum XML_ParamEntityParsing code);
- </pre>
- <div class="fcndef">
- This enables parsing of parameter entities, including the external parameter
- entity that is the external DTD subset, according to <code>code</code>. The
- choices for <code>code</code> are:
- <ul>
- <li>
- <code>XML_PARAM_ENTITY_PARSING_NEVER</code>
- </li>
- <li>
- <code>XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE</code>
- </li>
- <li>
- <code>XML_PARAM_ENTITY_PARSING_ALWAYS</code>
- </li>
- </ul>
- <b>Note:</b> If <code>XML_SetParamEntityParsing</code> is called after
- <code>XML_Parse</code> or <code>XML_ParseBuffer</code>, then it has no effect and
- will always return 0.
- </div>
- <h4 id="XML_SetHashSalt">
- XML_SetHashSalt
- </h4>
- <pre class="fcndec">
- int XMLCALL
- XML_SetHashSalt(XML_Parser p,
- unsigned long hash_salt);
- </pre>
- <div class="fcndef">
- Sets the hash salt to use for internal hash calculations. Helps in preventing DoS
- attacks based on predicting hash function behavior. In order to have an effect
- this must be called before parsing has started. Returns 1 if successful, 0 when
- called after <code>XML_Parse</code> or <code>XML_ParseBuffer</code>.
- <p>
- <b>Note:</b> This call is optional, as the parser will auto-generate a new
- random salt value if no value has been set at the start of parsing.
- </p>
- <p>
- <b>Note:</b> One should not call <code>XML_SetHashSalt</code> with a hash salt
- value of 0, as this value is used as sentinel value to indicate that
- <code>XML_SetHashSalt</code> has <b>not</b> been called. Consequently such a
- call will have no effect, even if it returns 1.
- </p>
- </div>
- <h4 id="XML_UseForeignDTD">
- XML_UseForeignDTD
- </h4>
- <pre class="fcndec">
- enum XML_Error XMLCALL
- XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD);
- </pre>
- <div class="fcndef">
- <p>
- This function allows an application to provide an external subset for the
- document type declaration for documents which do not specify an external subset
- of their own. For documents which specify an external subset in their DOCTYPE
- declaration, the application-provided subset will be ignored. If the document
- does not contain a DOCTYPE declaration at all and <code>useDTD</code> is true,
- the application-provided subset will be parsed, but the
- <code>startDoctypeDeclHandler</code> and <code>endDoctypeDeclHandler</code>
- functions, if set, will not be called. The setting of parameter entity parsing,
- controlled using <code><a href=
- "#XML_SetParamEntityParsing">XML_SetParamEntityParsing</a></code>, will be
- honored.
- </p>
- <p>
- The application-provided external subset is read by calling the external entity
- reference handler set via <code><a href=
- "#XML_SetExternalEntityRefHandler">XML_SetExternalEntityRefHandler</a></code>
- with both <code>publicId</code> and <code>systemId</code> set to
- <code>NULL</code>.
- </p>
- <p>
- If this function is called after parsing has begun, it returns
- <code>XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING</code> and ignores
- <code>useDTD</code>. If called when Expat has been compiled without DTD
- support, it returns <code>XML_ERROR_FEATURE_REQUIRES_XML_DTD</code>. Otherwise,
- it returns <code>XML_ERROR_NONE</code>.
- </p>
- <p>
- <b>Note:</b> For the purpose of checking WFC: Entity Declared, passing
- <code>useDTD == XML_TRUE</code> will make the parser behave as if the document
- had a DTD with an external subset. This holds true even if the external entity
- reference handler returns without action.
- </p>
- </div>
- <h4 id="XML_SetReturnNSTriplet">
- XML_SetReturnNSTriplet
- </h4>
- <pre class="fcndec">
- void XMLCALL
- XML_SetReturnNSTriplet(XML_Parser parser,
- int do_nst);
- </pre>
- <div class="fcndef">
- <p>
- This function only has an effect when using a parser created with
- <code><a href="#XML_ParserCreateNS">XML_ParserCreateNS</a></code>, i.e. when
- namespace processing is in effect. The <code>do_nst</code> sets whether or not
- prefixes are returned with names qualified with a namespace prefix. If this
- function is called with <code>do_nst</code> non-zero, then afterwards namespace
- qualified names (that is qualified with a prefix as opposed to belonging to a
- default namespace) are returned as a triplet with the three parts separated by
- the namespace separator specified when the parser was created. The order of
- returned parts is URI, local name, and prefix.
- </p>
- <p>
- If <code>do_nst</code> is zero, then namespaces are reported in the default
- manner, URI then local_name separated by the namespace separator.
- </p>
- </div>
- <h4 id="XML_DefaultCurrent">
- XML_DefaultCurrent
- </h4>
- <pre class="fcndec">
- void XMLCALL
- XML_DefaultCurrent(XML_Parser parser);
- </pre>
- <div class="fcndef">
- This can be called within a handler for a start element, end element, processing
- instruction or character data. It causes the corresponding markup to be passed to
- the default handler set by <code><a href=
- "#XML_SetDefaultHandler">XML_SetDefaultHandler</a></code> or <code><a href=
- "#XML_SetDefaultHandlerExpand">XML_SetDefaultHandlerExpand</a></code>. It does
- nothing if there is not a default handler.
- </div>
- <h4 id="XML_ExpatVersion">
- XML_ExpatVersion
- </h4>
- <pre class="fcndec">
- XML_LChar * XMLCALL
- XML_ExpatVersion();
- </pre>
- <div class="fcndef">
- Return the library version as a string (e.g. <code>"expat_1.95.1"</code>).
- </div>
- <h4 id="XML_ExpatVersionInfo">
- XML_ExpatVersionInfo
- </h4>
- <pre class="fcndec">
- struct XML_Expat_Version XMLCALL
- XML_ExpatVersionInfo();
- </pre>
- <pre class="signature">
- typedef struct {
- int major;
- int minor;
- int micro;
- } XML_Expat_Version;
- </pre>
- <div class="fcndef">
- Return the library version information as a structure. Some macros are also
- defined that support compile-time tests of the library version:
- <ul>
- <li>
- <code>XML_MAJOR_VERSION</code>
- </li>
- <li>
- <code>XML_MINOR_VERSION</code>
- </li>
- <li>
- <code>XML_MICRO_VERSION</code>
- </li>
- </ul>
- Testing these constants is currently the best way to determine if particular
- parts of the Expat API are available.
- </div>
- <h4 id="XML_GetFeatureList">
- XML_GetFeatureList
- </h4>
- <pre class="fcndec">
- const XML_Feature * XMLCALL
- XML_GetFeatureList();
- </pre>
- <pre class="signature">
- enum XML_FeatureEnum {
- XML_FEATURE_END = 0,
- XML_FEATURE_UNICODE,
- XML_FEATURE_UNICODE_WCHAR_T,
- XML_FEATURE_DTD,
- XML_FEATURE_CONTEXT_BYTES,
- XML_FEATURE_MIN_SIZE,
- XML_FEATURE_SIZEOF_XML_CHAR,
- XML_FEATURE_SIZEOF_XML_LCHAR,
- XML_FEATURE_NS,
- XML_FEATURE_LARGE_SIZE
- };
- typedef struct {
- enum XML_FeatureEnum feature;
- XML_LChar *name;
- long int value;
- } XML_Feature;
- </pre>
- <div class="fcndef">
- <p>
- Returns a list of "feature" records, providing details on how Expat was
- configured at compile time. Most applications should not need to worry about
- this, but this information is otherwise not available from Expat. This function
- allows code that does need to check these features to do so at runtime.
- </p>
- <p>
- The return value is an array of <code>XML_Feature</code>, terminated by a
- record with a <code>feature</code> of <code>XML_FEATURE_END</code> and
- <code>name</code> of <code>NULL</code>, identifying the feature-test macros
- Expat was compiled with. Since an application that requires this kind of
- information needs to determine the type of character the <code>name</code>
- points to, records for the <code>XML_FEATURE_SIZEOF_XML_CHAR</code> and
- <code>XML_FEATURE_SIZEOF_XML_LCHAR</code> will be located at the beginning of
- the list, followed by <code>XML_FEATURE_UNICODE</code> and
- <code>XML_FEATURE_UNICODE_WCHAR_T</code>, if they are present at all.
- </p>
- <p>
- Some features have an associated value. If there isn't an associated value, the
- <code>value</code> field is set to 0. At this time, the following features have
- been defined to have values:
- </p>
- <dl>
- <dt>
- <code>XML_FEATURE_SIZEOF_XML_CHAR</code>
- </dt>
- <dd>
- The number of bytes occupied by one <code>XML_Char</code> character.
- </dd>
- <dt>
- <code>XML_FEATURE_SIZEOF_XML_LCHAR</code>
- </dt>
- <dd>
- The number of bytes occupied by one <code>XML_LChar</code> character.
- </dd>
- <dt>
- <code>XML_FEATURE_CONTEXT_BYTES</code>
- </dt>
- <dd>
- The maximum number of characters of context which can be reported by
- <code><a href="#XML_GetInputContext">XML_GetInputContext</a></code>.
- </dd>
- </dl>
- </div>
- <h4 id="XML_FreeContentModel">
- XML_FreeContentModel
- </h4>
- <pre class="fcndec">
- void XMLCALL
- XML_FreeContentModel(XML_Parser parser, XML_Content *model);
- </pre>
- <div class="fcndef">
- Function to deallocate the <code>model</code> argument passed to the
- <code>XML_ElementDeclHandler</code> callback set using <code><a href=
- "#XML_SetElementDeclHandler">XML_ElementDeclHandler</a></code>. This function
- should not be used for any other purpose.
- </div>
- <p>
- The following functions allow external code to share the memory allocator an
- <code>XML_Parser</code> has been configured to use. This is especially useful for
- third-party libraries that interact with a parser object created by application
- code, or heavily layered applications. This can be essential when using
- dynamically loaded libraries which use different C standard libraries (this can
- happen on Windows, at least).
- </p>
- <h4 id="XML_MemMalloc">
- XML_MemMalloc
- </h4>
- <pre class="fcndec">
- void * XMLCALL
- XML_MemMalloc(XML_Parser parser, size_t size);
- </pre>
- <div class="fcndef">
- Allocate <code>size</code> bytes of memory using the allocator the
- <code>parser</code> object has been configured to use. Returns a pointer to the
- memory or <code>NULL</code> on failure. Memory allocated in this way must be
- freed using <code><a href="#XML_MemFree">XML_MemFree</a></code>.
- </div>
- <h4 id="XML_MemRealloc">
- XML_MemRealloc
- </h4>
- <pre class="fcndec">
- void * XMLCALL
- XML_MemRealloc(XML_Parser parser, void *ptr, size_t size);
- </pre>
- <div class="fcndef">
- Allocate <code>size</code> bytes of memory using the allocator the
- <code>parser</code> object has been configured to use. <code>ptr</code> must
- point to a block of memory allocated by <code><a href=
- "#XML_MemMalloc">XML_MemMalloc</a></code> or <code>XML_MemRealloc</code>, or be
- <code>NULL</code>. This function tries to expand the block pointed to by
- <code>ptr</code> if possible. Returns a pointer to the memory or
- <code>NULL</code> on failure. On success, the original block has either been
- expanded or freed. On failure, the original block has not been freed; the caller
- is responsible for freeing the original block. Memory allocated in this way must
- be freed using <code><a href="#XML_MemFree">XML_MemFree</a></code>.
- </div>
- <h4 id="XML_MemFree">
- XML_MemFree
- </h4>
- <pre class="fcndec">
- void XMLCALL
- XML_MemFree(XML_Parser parser, void *ptr);
- </pre>
- <div class="fcndef">
- Free a block of memory pointed to by <code>ptr</code>. The block must have been
- allocated by <code><a href="#XML_MemMalloc">XML_MemMalloc</a></code> or
- <code>XML_MemRealloc</code>, or be <code>NULL</code>.
- </div>
- <hr />
- <div class="footer">
- Found a bug in the documentation? <a href=
- "https://github.com/libexpat/libexpat/issues">Please file a bug report.</a>
- </div>
- </div>
- </body>
- </html>
|