| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101 |
- /* *****************************************************************************
- * ******************* C++ wrapper for PCRE2 Library ****************************
- * *****************************************************************************
- * Copyright (c) 2015-2018 Md. Jahidul Hamid
- *
- * -----------------------------------------------------------------------------
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * * The names of its contributors may not be used to endorse or promote
- * products derived from this software without specific prior written
- * permission.
- *
- * Disclaimer:
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- * */
- /** @file jpcre2.hpp
- * @brief Main header file for JPCRE2 library to be included by programs that uses its functionalities.
- * It includes the `pcre2.h` header, therefore you shouldn't include `pcre2.h`, neither should you define `PCRE2_CODE_UNIT_WIDTH` before including
- * `jpcre2.hpp`.
- * If your `pcre2.h` header is not in standard include paths, you may include `pcre2.h` with correct path before including `jpcre2.hpp`
- * manually. In this case you will have to define `PCRE2_CODE_UNIT_WIDTH` before including `pcre2.h`.
- * Make sure to link required PCRE2 libraries when compiling.
- *
- * @author [Md Jahidul Hamid](https://github.com/neurobin)
- */
- #ifndef JPCRE2_HPP
- #define JPCRE2_HPP
- #ifndef PCRE2_CODE_UNIT_WIDTH
- ///@def PCRE2_CODE_UNIT_WIDTH
- ///This macro does not have any significance in JPCRE2 context.
- ///It is defined as 0 by default. Defining it before including jpcre2.hpp
- ///will override the default (discouraged as it will make it harder for you to detect problems),
- ///but still it will have no effect in a JPCRE2 perspective.
- ///Defining it with an invalid value will yield to compile error.
- #define PCRE2_CODE_UNIT_WIDTH 0
- #endif
- //previous inclusion of pcre2.h will be respected and we won't try to include it twice.
- //Thus one can pre-include pcre2.h from an arbitrary/non-standard path.
- #ifndef PCRE2_MAJOR
- #include <pcre2.h> // pcre2 header
- #endif
- #include <string> // std::string, std::wstring
- #include <vector> // std::vector
- #include <map> // std::map
- #include <cstdio> // std::fprintf
- #include <climits> // CHAR_BIT
- #include <cstdlib> // std::abort()
- #if __cplusplus >= 201103L
- #include <utility>
- #ifndef JPCRE2_USE_FUNCTION_POINTER_CALLBACK
- #include <functional> // std::function
- #endif
- #endif
- #define JPCRE2_UNUSED(x) ((void)(x))
- #if defined(NDEBUG) || defined(JPCRE2_NDEBUG)
- #define JPCRE2_ASSERT(cond, msg) ((void)0)
- #define JPCRE2_VECTOR_DATA_ASSERT(cond, name) ((void)0)
- #else
- #define JPCRE2_ASSERT(cond, msg) jpcre2::jassert(cond, msg, __FILE__, __LINE__)
- #define JPCRE2_VECTOR_DATA_ASSERT(cond, name) jpcre2::_jvassert(cond, name, __FILE__, __LINE__)
- #endif
- /** @namespace jpcre2
- * Top level namespace of JPCRE2.
- *
- * All functions, classes/structs, constants, enums that are provided by JPCRE2 belong to this namespace while
- * **PCRE2** structs, functions, constants remain outside of its scope.
- *
- * If you want to use any PCRE2 functions or constants,
- * remember that they are in the global scope and should be used as such.
- */
- namespace jpcre2 {
- ///Define for JPCRE2 version.
- ///It can be used to support changes in different versions of the lib.
- #define JPCRE2_VERSION 103103L
- /** @namespace jpcre2::INFO
- * Namespace to provide information about JPCRE2 library itself.
- * Contains constant Strings with version info.
- */
- namespace INFO {
- static const char NAME[] = "JPCRE2"; ///< Name of the project
- static const char FULL_VERSION[] = "10.31.03"; ///< Full version string
- static const char VERSION_GENRE[] = "10"; ///< Generation, depends on original PCRE2 version
- static const char VERSION_MAJOR[] = "31"; ///< Major version, updated when API change is made
- static const char VERSION_MINOR[] = "03"; ///< Minor version, includes bug fix or minor feature upgrade
- static const char VERSION_PRE_RELEASE[] = ""; ///< Alpha or beta (testing) release version
- }
- typedef PCRE2_SIZE SIZE_T; ///< Used for match count and vector size
- typedef uint32_t Uint; ///< Used for options (bitwise operation)
- typedef uint8_t Ush; ///< 8 bit unsigned integer.
- typedef std::vector<SIZE_T> VecOff; ///< vector of size_t.
- typedef std::vector<Uint> VecOpt; ///< vector for Uint option values.
- /// @namespace jpcre2::ERROR
- /// Namespace for error codes.
- namespace ERROR {
- /** Error numbers for JPCRE2.
- * JPCRE2 error numbers are positive integers while
- * PCRE2 error numbers are negative integers.
- */
- enum {
- INVALID_MODIFIER = 2, ///< Invalid modifier was detected
- INSUFFICIENT_OVECTOR = 3 ///< Ovector was not big enough during a match
- };
- }
- /** These constants provide JPCRE2 options.
- */
- enum {
- NONE = 0x0000000u, ///< Option 0 (zero)
- FIND_ALL = 0x0000002u, ///< Find all during match (global match)
- JIT_COMPILE = 0x0000004u ///< Perform JIT compilation for optimization
- };
- //enableif and is_same implementation
- template<bool B, typename T = void>
- struct EnableIf{};
- template<typename T>
- struct EnableIf<true, T>{typedef T Type;};
- template<typename T1, typename T2>
- struct IsSame{ static const bool value = false; };
- template<typename T>
- struct IsSame<T,T>{ static const bool value = true; };
- ///JPCRE2 assert function.
- ///Aborts with an error message if condition fails.
- ///@param cond boolean condition
- ///@param msg message (std::string)
- ///@param f file where jassert was called.
- ///@param line line number where jassert was called.
- static inline void jassert(bool cond, const char* msg, const char* f, size_t line){
- if(!cond) {
- std::fprintf(stderr,"\n\tE: AssertionFailure\n%s\nAssertion failed in file: %s\t at line: %u\n", msg, f, (unsigned)line);
- std::abort();
- }
- }
- static inline void _jvassert(bool cond, char const * name, const char* f, size_t line){
- jassert(cond, (std::string("ValueError: \n\
- Required data vector of type ")+std::string(name)+" is empty.\n\
- Your MatchEvaluator callback function is not\n\
- compatible with existing data!!\n\
- You are trying to use a vector that does not\n\
- have any match data. Either call nreplace() or replace()\n\
- with true or perform a match with appropriate\n\
- callback function. For more details, refer to\n\
- the doc in MatchEvaluator section.").c_str(), f, line);
- }
- static inline std::string _tostdstring(unsigned x){
- char buf[128];
- int written = std::sprintf(buf, "%u", x);
- return (written > 0) ? std::string(buf, buf + written) : std::string();
- }
- ////////////////////////// The following are type and function mappings from PCRE2 interface to JPCRE2 interface /////////////////////////
- //forward declaration
- template<Ush BS> struct Pcre2Type;
- template<Ush BS> struct Pcre2Func;
- //PCRE2 types
- //These templated types will be used in place of actual types
- template<Ush BS> struct Pcre2Type {};
- template<> struct Pcre2Type<8>{
- //typedefs used
- typedef PCRE2_UCHAR8 Pcre2Uchar;
- typedef PCRE2_SPTR8 Pcre2Sptr;
- typedef pcre2_code_8 Pcre2Code;
- typedef pcre2_compile_context_8 CompileContext;
- typedef pcre2_match_data_8 MatchData;
- typedef pcre2_general_context_8 GeneralContext;
- typedef pcre2_match_context_8 MatchContext;
- typedef pcre2_jit_callback_8 JitCallback;
- typedef pcre2_jit_stack_8 JitStack;
- };
- template<> struct Pcre2Type<16>{
- //typedefs used
- typedef PCRE2_UCHAR16 Pcre2Uchar;
- typedef PCRE2_SPTR16 Pcre2Sptr;
- typedef pcre2_code_16 Pcre2Code;
- typedef pcre2_compile_context_16 CompileContext;
- typedef pcre2_match_data_16 MatchData;
- typedef pcre2_general_context_16 GeneralContext;
- typedef pcre2_match_context_16 MatchContext;
- typedef pcre2_jit_callback_16 JitCallback;
- typedef pcre2_jit_stack_16 JitStack;
- };
- template<> struct Pcre2Type<32>{
- //typedefs used
- typedef PCRE2_UCHAR32 Pcre2Uchar;
- typedef PCRE2_SPTR32 Pcre2Sptr;
- typedef pcre2_code_32 Pcre2Code;
- typedef pcre2_compile_context_32 CompileContext;
- typedef pcre2_match_data_32 MatchData;
- typedef pcre2_general_context_32 GeneralContext;
- typedef pcre2_match_context_32 MatchContext;
- typedef pcre2_jit_callback_32 JitCallback;
- typedef pcre2_jit_stack_32 JitStack;
- };
- //wrappers for PCRE2 functions
- template<Ush BS> struct Pcre2Func{};
- //8-bit version
- template<> struct Pcre2Func<8> {
- static Pcre2Type<8>::CompileContext* compile_context_create(Pcre2Type<8>::GeneralContext *gcontext){
- return pcre2_compile_context_create_8(gcontext);
- }
- static void compile_context_free(Pcre2Type<8>::CompileContext *ccontext){
- pcre2_compile_context_free_8(ccontext);
- }
- static Pcre2Type<8>::CompileContext* compile_context_copy(Pcre2Type<8>::CompileContext* ccontext){
- return pcre2_compile_context_copy_8(ccontext);
- }
- static const unsigned char * maketables(Pcre2Type<8>::GeneralContext* gcontext){
- return pcre2_maketables_8(gcontext);
- }
- static int set_character_tables(Pcre2Type<8>::CompileContext * ccontext, const unsigned char * table){
- return pcre2_set_character_tables_8(ccontext, table);
- }
- static Pcre2Type<8>::Pcre2Code * compile(Pcre2Type<8>::Pcre2Sptr pattern,
- PCRE2_SIZE length,
- uint32_t options,
- int *errorcode,
- PCRE2_SIZE *erroroffset,
- Pcre2Type<8>::CompileContext *ccontext){
- return pcre2_compile_8(pattern, length, options, errorcode, erroroffset, ccontext);
- }
- static int jit_compile(Pcre2Type<8>::Pcre2Code *code, uint32_t options){
- return pcre2_jit_compile_8(code, options);
- }
- static int substitute( const Pcre2Type<8>::Pcre2Code *code,
- Pcre2Type<8>::Pcre2Sptr subject,
- PCRE2_SIZE length,
- PCRE2_SIZE startoffset,
- uint32_t options,
- Pcre2Type<8>::MatchData *match_data,
- Pcre2Type<8>::MatchContext *mcontext,
- Pcre2Type<8>::Pcre2Sptr replacement,
- PCRE2_SIZE rlength,
- Pcre2Type<8>::Pcre2Uchar *outputbuffer,
- PCRE2_SIZE *outlengthptr){
- return pcre2_substitute_8( code, subject, length, startoffset, options, match_data,
- mcontext, replacement, rlength, outputbuffer, outlengthptr);
- }
- //~ static int substring_get_bynumber(Pcre2Type<8>::MatchData *match_data,
- //~ uint32_t number,
- //~ Pcre2Type<8>::Pcre2Uchar **bufferptr,
- //~ PCRE2_SIZE *bufflen){
- //~ return pcre2_substring_get_bynumber_8(match_data, number, bufferptr, bufflen);
- //~ }
- //~ static int substring_get_byname(Pcre2Type<8>::MatchData *match_data,
- //~ Pcre2Type<8>::Pcre2Sptr name,
- //~ Pcre2Type<8>::Pcre2Uchar **bufferptr,
- //~ PCRE2_SIZE *bufflen){
- //~ return pcre2_substring_get_byname_8(match_data, name, bufferptr, bufflen);
- //~ }
- //~ static void substring_free(Pcre2Type<8>::Pcre2Uchar *buffer){
- //~ pcre2_substring_free_8(buffer);
- //~ }
- //~ static Pcre2Type<8>::Pcre2Code * code_copy(const Pcre2Type<8>::Pcre2Code *code){
- //~ return pcre2_code_copy_8(code);
- //~ }
- static void code_free(Pcre2Type<8>::Pcre2Code *code){
- pcre2_code_free_8(code);
- }
- static int get_error_message( int errorcode,
- Pcre2Type<8>::Pcre2Uchar *buffer,
- PCRE2_SIZE bufflen){
- return pcre2_get_error_message_8(errorcode, buffer, bufflen);
- }
- static Pcre2Type<8>::MatchData * match_data_create_from_pattern(
- const Pcre2Type<8>::Pcre2Code *code,
- Pcre2Type<8>::GeneralContext *gcontext){
- return pcre2_match_data_create_from_pattern_8(code, gcontext);
-
- }
- static int match( const Pcre2Type<8>::Pcre2Code *code,
- Pcre2Type<8>::Pcre2Sptr subject,
- PCRE2_SIZE length,
- PCRE2_SIZE startoffset,
- uint32_t options,
- Pcre2Type<8>::MatchData *match_data,
- Pcre2Type<8>::MatchContext *mcontext){
- return pcre2_match_8(code, subject, length, startoffset, options, match_data, mcontext);
- }
- static void match_data_free(Pcre2Type<8>::MatchData *match_data){
- pcre2_match_data_free_8(match_data);
- }
- static PCRE2_SIZE * get_ovector_pointer(Pcre2Type<8>::MatchData *match_data){
- return pcre2_get_ovector_pointer_8(match_data);
- }
- static int pattern_info(const Pcre2Type<8>::Pcre2Code *code, uint32_t what, void *where){
- return pcre2_pattern_info_8(code, what, where);
- }
- static int set_newline(Pcre2Type<8>::CompileContext *ccontext, uint32_t value){
- return pcre2_set_newline_8(ccontext, value);
- }
- //~ static void jit_stack_assign(Pcre2Type<8>::MatchContext *mcontext,
- //~ Pcre2Type<8>::JitCallback callback_function,
- //~ void *callback_data){
- //~ pcre2_jit_stack_assign_8(mcontext, callback_function, callback_data);
- //~ }
- //~ static Pcre2Type<8>::JitStack *jit_stack_create(PCRE2_SIZE startsize, PCRE2_SIZE maxsize,
- //~ Pcre2Type<8>::GeneralContext *gcontext){
- //~ return pcre2_jit_stack_create_8(startsize, maxsize, gcontext);
- //~ }
- //~ static void jit_stack_free(Pcre2Type<8>::JitStack *jit_stack){
- //~ pcre2_jit_stack_free_8(jit_stack);
- //~ }
- //~ static void jit_free_unused_memory(Pcre2Type<8>::GeneralContext *gcontext){
- //~ pcre2_jit_free_unused_memory_8(gcontext);
- //~ }
- //~ static Pcre2Type<8>::MatchContext *match_context_create(Pcre2Type<8>::GeneralContext *gcontext){
- //~ return pcre2_match_context_create_8(gcontext);
- //~ }
- //~ static Pcre2Type<8>::MatchContext *match_context_copy(Pcre2Type<8>::MatchContext *mcontext){
- //~ return pcre2_match_context_copy_8(mcontext);
- //~ }
- //~ static void match_context_free(Pcre2Type<8>::MatchContext *mcontext){
- //~ pcre2_match_context_free_8(mcontext);
- //~ }
- static uint32_t get_ovector_count(Pcre2Type<8>::MatchData *match_data){
- return pcre2_get_ovector_count_8(match_data);
- }
- };
- //16-bit version
- template<> struct Pcre2Func<16> {
- static Pcre2Type<16>::CompileContext* compile_context_create(Pcre2Type<16>::GeneralContext *gcontext){
- return pcre2_compile_context_create_16(gcontext);
- }
- static void compile_context_free(Pcre2Type<16>::CompileContext *ccontext){
- pcre2_compile_context_free_16(ccontext);
- }
- static Pcre2Type<16>::CompileContext* compile_context_copy(Pcre2Type<16>::CompileContext* ccontext){
- return pcre2_compile_context_copy_16(ccontext);
- }
- static const unsigned char * maketables(Pcre2Type<16>::GeneralContext* gcontext){
- return pcre2_maketables_16(gcontext);
- }
- static int set_character_tables(Pcre2Type<16>::CompileContext * ccontext, const unsigned char * table){
- return pcre2_set_character_tables_16(ccontext, table);
- }
- static Pcre2Type<16>::Pcre2Code * compile(Pcre2Type<16>::Pcre2Sptr pattern,
- PCRE2_SIZE length,
- uint32_t options,
- int *errorcode,
- PCRE2_SIZE *erroroffset,
- Pcre2Type<16>::CompileContext *ccontext){
- return pcre2_compile_16(pattern, length, options, errorcode, erroroffset, ccontext);
- }
- static int jit_compile(Pcre2Type<16>::Pcre2Code *code, uint32_t options){
- return pcre2_jit_compile_16(code, options);
- }
- static int substitute( const Pcre2Type<16>::Pcre2Code *code,
- Pcre2Type<16>::Pcre2Sptr subject,
- PCRE2_SIZE length,
- PCRE2_SIZE startoffset,
- uint32_t options,
- Pcre2Type<16>::MatchData *match_data,
- Pcre2Type<16>::MatchContext *mcontext,
- Pcre2Type<16>::Pcre2Sptr replacement,
- PCRE2_SIZE rlength,
- Pcre2Type<16>::Pcre2Uchar *outputbuffer,
- PCRE2_SIZE *outlengthptr){
- return pcre2_substitute_16( code, subject, length, startoffset, options, match_data,
- mcontext, replacement, rlength, outputbuffer, outlengthptr);
- }
- //~ static int substring_get_bynumber(Pcre2Type<16>::MatchData *match_data,
- //~ uint32_t number,
- //~ Pcre2Type<16>::Pcre2Uchar **bufferptr,
- //~ PCRE2_SIZE *bufflen){
- //~ return pcre2_substring_get_bynumber_16(match_data, number, bufferptr, bufflen);
- //~ }
- //~ static int substring_get_byname(Pcre2Type<16>::MatchData *match_data,
- //~ Pcre2Type<16>::Pcre2Sptr name,
- //~ Pcre2Type<16>::Pcre2Uchar **bufferptr,
- //~ PCRE2_SIZE *bufflen){
- //~ return pcre2_substring_get_byname_16(match_data, name, bufferptr, bufflen);
- //~ }
- //~ static void substring_free(Pcre2Type<16>::Pcre2Uchar *buffer){
- //~ pcre2_substring_free_16(buffer);
- //~ }
- //~ static Pcre2Type<16>::Pcre2Code * code_copy(const Pcre2Type<16>::Pcre2Code *code){
- //~ return pcre2_code_copy_16(code);
- //~ }
- static void code_free(Pcre2Type<16>::Pcre2Code *code){
- pcre2_code_free_16(code);
- }
- static int get_error_message( int errorcode,
- Pcre2Type<16>::Pcre2Uchar *buffer,
- PCRE2_SIZE bufflen){
- return pcre2_get_error_message_16(errorcode, buffer, bufflen);
- }
- static Pcre2Type<16>::MatchData * match_data_create_from_pattern(
- const Pcre2Type<16>::Pcre2Code *code,
- Pcre2Type<16>::GeneralContext *gcontext){
- return pcre2_match_data_create_from_pattern_16(code, gcontext);
-
- }
- static int match( const Pcre2Type<16>::Pcre2Code *code,
- Pcre2Type<16>::Pcre2Sptr subject,
- PCRE2_SIZE length,
- PCRE2_SIZE startoffset,
- uint32_t options,
- Pcre2Type<16>::MatchData *match_data,
- Pcre2Type<16>::MatchContext *mcontext){
- return pcre2_match_16(code, subject, length, startoffset, options, match_data, mcontext);
- }
- static void match_data_free(Pcre2Type<16>::MatchData *match_data){
- pcre2_match_data_free_16(match_data);
- }
- static PCRE2_SIZE * get_ovector_pointer(Pcre2Type<16>::MatchData *match_data){
- return pcre2_get_ovector_pointer_16(match_data);
- }
- static int pattern_info(const Pcre2Type<16>::Pcre2Code *code, uint32_t what, void *where){
- return pcre2_pattern_info_16(code, what, where);
- }
- static int set_newline(Pcre2Type<16>::CompileContext *ccontext, uint32_t value){
- return pcre2_set_newline_16(ccontext, value);
- }
- //~ static void jit_stack_assign(Pcre2Type<16>::MatchContext *mcontext,
- //~ Pcre2Type<16>::JitCallback callback_function,
- //~ void *callback_data){
- //~ pcre2_jit_stack_assign_16(mcontext, callback_function, callback_data);
- //~ }
- //~ static Pcre2Type<16>::JitStack *jit_stack_create(PCRE2_SIZE startsize, PCRE2_SIZE maxsize,
- //~ Pcre2Type<16>::GeneralContext *gcontext){
- //~ return pcre2_jit_stack_create_16(startsize, maxsize, gcontext);
- //~ }
- //~ static void jit_stack_free(Pcre2Type<16>::JitStack *jit_stack){
- //~ pcre2_jit_stack_free_16(jit_stack);
- //~ }
- //~ static void jit_free_unused_memory(Pcre2Type<16>::GeneralContext *gcontext){
- //~ pcre2_jit_free_unused_memory_16(gcontext);
- //~ }
- //~ static Pcre2Type<16>::MatchContext *match_context_create(Pcre2Type<16>::GeneralContext *gcontext){
- //~ return pcre2_match_context_create_16(gcontext);
- //~ }
- //~ static Pcre2Type<16>::MatchContext *match_context_copy(Pcre2Type<16>::MatchContext *mcontext){
- //~ return pcre2_match_context_copy_16(mcontext);
- //~ }
- //~ static void match_context_free(Pcre2Type<16>::MatchContext *mcontext){
- //~ pcre2_match_context_free_16(mcontext);
- //~ }
- static uint32_t get_ovector_count(Pcre2Type<16>::MatchData *match_data){
- return pcre2_get_ovector_count_16(match_data);
- }
- };
- //32-bit version
- template<> struct Pcre2Func<32> {
- static Pcre2Type<32>::CompileContext* compile_context_create(Pcre2Type<32>::GeneralContext *gcontext){
- return pcre2_compile_context_create_32(gcontext);
- }
- static void compile_context_free(Pcre2Type<32>::CompileContext *ccontext){
- pcre2_compile_context_free_32(ccontext);
- }
- static Pcre2Type<32>::CompileContext* compile_context_copy(Pcre2Type<32>::CompileContext* ccontext){
- return pcre2_compile_context_copy_32(ccontext);
- }
- static const unsigned char * maketables(Pcre2Type<32>::GeneralContext* gcontext){
- return pcre2_maketables_32(gcontext);
- }
- static int set_character_tables(Pcre2Type<32>::CompileContext * ccontext, const unsigned char * table){
- return pcre2_set_character_tables_32(ccontext, table);
- }
- static Pcre2Type<32>::Pcre2Code * compile(Pcre2Type<32>::Pcre2Sptr pattern,
- PCRE2_SIZE length,
- uint32_t options,
- int *errorcode,
- PCRE2_SIZE *erroroffset,
- Pcre2Type<32>::CompileContext *ccontext){
- return pcre2_compile_32(pattern, length, options, errorcode, erroroffset, ccontext);
- }
- static int jit_compile(Pcre2Type<32>::Pcre2Code *code, uint32_t options){
- return pcre2_jit_compile_32(code, options);
- }
- static int substitute( const Pcre2Type<32>::Pcre2Code *code,
- Pcre2Type<32>::Pcre2Sptr subject,
- PCRE2_SIZE length,
- PCRE2_SIZE startoffset,
- uint32_t options,
- Pcre2Type<32>::MatchData *match_data,
- Pcre2Type<32>::MatchContext *mcontext,
- Pcre2Type<32>::Pcre2Sptr replacement,
- PCRE2_SIZE rlength,
- Pcre2Type<32>::Pcre2Uchar *outputbuffer,
- PCRE2_SIZE *outlengthptr){
- return pcre2_substitute_32( code, subject, length, startoffset, options, match_data,
- mcontext, replacement, rlength, outputbuffer, outlengthptr);
- }
- //~ static int substring_get_bynumber(Pcre2Type<32>::MatchData *match_data,
- //~ uint32_t number,
- //~ Pcre2Type<32>::Pcre2Uchar **bufferptr,
- //~ PCRE2_SIZE *bufflen){
- //~ return pcre2_substring_get_bynumber_32(match_data, number, bufferptr, bufflen);
- //~ }
- //~ static int substring_get_byname(Pcre2Type<32>::MatchData *match_data,
- //~ Pcre2Type<32>::Pcre2Sptr name,
- //~ Pcre2Type<32>::Pcre2Uchar **bufferptr,
- //~ PCRE2_SIZE *bufflen){
- //~ return pcre2_substring_get_byname_32(match_data, name, bufferptr, bufflen);
- //~ }
- //~ static void substring_free(Pcre2Type<32>::Pcre2Uchar *buffer){
- //~ pcre2_substring_free_32(buffer);
- //~ }
- //~ static Pcre2Type<32>::Pcre2Code * code_copy(const Pcre2Type<32>::Pcre2Code *code){
- //~ return pcre2_code_copy_32(code);
- //~ }
- static void code_free(Pcre2Type<32>::Pcre2Code *code){
- pcre2_code_free_32(code);
- }
- static int get_error_message( int errorcode,
- Pcre2Type<32>::Pcre2Uchar *buffer,
- PCRE2_SIZE bufflen){
- return pcre2_get_error_message_32(errorcode, buffer, bufflen);
- }
- static Pcre2Type<32>::MatchData * match_data_create_from_pattern(
- const Pcre2Type<32>::Pcre2Code *code,
- Pcre2Type<32>::GeneralContext *gcontext){
- return pcre2_match_data_create_from_pattern_32(code, gcontext);
-
- }
- static int match( const Pcre2Type<32>::Pcre2Code *code,
- Pcre2Type<32>::Pcre2Sptr subject,
- PCRE2_SIZE length,
- PCRE2_SIZE startoffset,
- uint32_t options,
- Pcre2Type<32>::MatchData *match_data,
- Pcre2Type<32>::MatchContext *mcontext){
- return pcre2_match_32(code, subject, length, startoffset, options, match_data, mcontext);
- }
- static void match_data_free(Pcre2Type<32>::MatchData *match_data){
- pcre2_match_data_free_32(match_data);
- }
- static PCRE2_SIZE * get_ovector_pointer(Pcre2Type<32>::MatchData *match_data){
- return pcre2_get_ovector_pointer_32(match_data);
- }
- static int pattern_info(const Pcre2Type<32>::Pcre2Code *code, uint32_t what, void *where){
- return pcre2_pattern_info_32(code, what, where);
- }
- static int set_newline(Pcre2Type<32>::CompileContext *ccontext, uint32_t value){
- return pcre2_set_newline_32(ccontext, value);
- }
- //~ static void jit_stack_assign(Pcre2Type<32>::MatchContext *mcontext,
- //~ Pcre2Type<32>::JitCallback callback_function,
- //~ void *callback_data){
- //~ pcre2_jit_stack_assign_32(mcontext, callback_function, callback_data);
- //~ }
- //~ static Pcre2Type<32>::JitStack *jit_stack_create(PCRE2_SIZE startsize, PCRE2_SIZE maxsize,
- //~ Pcre2Type<32>::GeneralContext *gcontext){
- //~ return pcre2_jit_stack_create_32(startsize, maxsize, gcontext);
- //~ }
- //~ static void jit_stack_free(Pcre2Type<32>::JitStack *jit_stack){
- //~ pcre2_jit_stack_free_32(jit_stack);
- //~ }
- //~ static void jit_free_unused_memory(Pcre2Type<32>::GeneralContext *gcontext){
- //~ pcre2_jit_free_unused_memory_32(gcontext);
- //~ }
- //~ static Pcre2Type<32>::MatchContext *match_context_create(Pcre2Type<32>::GeneralContext *gcontext){
- //~ return pcre2_match_context_create_32(gcontext);
- //~ }
- //~ static Pcre2Type<32>::MatchContext *match_context_copy(Pcre2Type<32>::MatchContext *mcontext){
- //~ return pcre2_match_context_copy_32(mcontext);
- //~ }
- //~ static void match_context_free(Pcre2Type<32>::MatchContext *mcontext){
- //~ pcre2_match_context_free_32(mcontext);
- //~ }
- static uint32_t get_ovector_count(Pcre2Type<32>::MatchData *match_data){
- return pcre2_get_ovector_count_32(match_data);
- }
- };
- ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- ///Class to take a std::string modifier value with null safety.
- ///You don't need to make an instance of this class to pass modifier,
- ///just pass std::string or char const*, whatever seems feasible,
- ///implicit conversion will kick in and take care of things for you.
- class Modifier{
- std::string mod;
-
- public:
- ///Default constructor.
- Modifier(){}
-
- ///Constructor that takes a std::string.
- ///@param x std::string as a reference.
- Modifier(std::string const& x):mod(x){}
-
- ///Constructor that takes char const * (null safety is provided by this one)
- ///@param x char const *
- Modifier(char const *x):mod(x?x:""){}
-
- ///Returns the modifier string
- ///@return modifier string (std::string)
- std::string str() const { return mod; }
-
- ///Returns the c_str() of modifier string
- ///@return char const *
- char const * c_str() const { return mod.c_str(); }
-
- ///Returns the length of the modifier string
- ///@return length
- SIZE_T length() const{ return mod.length(); }
-
- ///operator[] overload to access character by index.
- ///@param i index
- ///@return character at index i.
- char operator[](SIZE_T i) const { return mod[i]; }
- };
- // Namespace for modifier constants.
- // For each modifier constant there is a jpcre2::Uint option value.
- // Some modifiers may have multiple values set together (ORed in bitwise operation) and
- // thus they may include other modifiers. Such an example is the 'n' modifier. It is combined together with 'u'.
- namespace MOD {
- // Define modifiers for compile
- // String of compile modifier characters for PCRE2 options
- static const char C_N[] = "eijmnsuxADJU";
- // Array of compile modifier values for PCRE2 options
- // Uint is being used in getModifier() in for loop to get the number of element in this array,
- // be sure to chnage there if you change here.
- static const jpcre2::Uint C_V[12] = { PCRE2_MATCH_UNSET_BACKREF, // Modifier e
- PCRE2_CASELESS, // Modifier i
- PCRE2_ALT_BSUX | PCRE2_MATCH_UNSET_BACKREF, // Modifier j
- PCRE2_MULTILINE, // Modifier m
- PCRE2_UTF | PCRE2_UCP, // Modifier n (includes u)
- PCRE2_DOTALL, // Modifier s
- PCRE2_UTF, // Modifier u
- PCRE2_EXTENDED, // Modifier x
- PCRE2_ANCHORED, // Modifier A
- PCRE2_DOLLAR_ENDONLY, // Modifier D
- PCRE2_DUPNAMES, // Modifier J
- PCRE2_UNGREEDY // Modifier U
- };
-
- // String of compile modifier characters for JPCRE2 options
- static const char CJ_N[] = "S";
- // Array of compile modifier values for JPCRE2 options
- static const jpcre2::Uint CJ_V[1] = { JIT_COMPILE, // Modifier S
- };
- // Define modifiers for replace
- // String of action (replace) modifier characters for PCRE2 options
- static const char R_N[] = "eEgx";
- // Array of action (replace) modifier values for PCRE2 options
- static const jpcre2::Uint R_V[4] = { PCRE2_SUBSTITUTE_UNSET_EMPTY, // Modifier e
- PCRE2_SUBSTITUTE_UNKNOWN_UNSET | PCRE2_SUBSTITUTE_UNSET_EMPTY, // Modifier E (includes e)
- PCRE2_SUBSTITUTE_GLOBAL, // Modifier g
- PCRE2_SUBSTITUTE_EXTENDED // Modifier x
- };
- // String of action (replace) modifier characters for JPCRE2 options
- static const char RJ_N[] = "";
- // Array of action (replace) modifier values for JPCRE2 options
- static const jpcre2::Uint RJ_V[1] = { NONE //placeholder
- };
- // Define modifiers for match
- // String of action (match) modifier characters for PCRE2 options
- static const char M_N[] = "A";
- // Array of action (match) modifier values for PCRE2 options
- static const jpcre2::Uint M_V[1] = { PCRE2_ANCHORED // Modifier A
- };
- // String of action (match) modifier characters for JPCRE2 options
- static const char MJ_N[] = "g";
- // Array of action (match) modifier values for JPCRE2 options
- static const jpcre2::Uint MJ_V[1] = { FIND_ALL, // Modifier g
- };
-
- static inline void toOption(Modifier const& mod, bool x,
- Uint const * J_V, char const * J_N, SIZE_T SJ,
- Uint const * V, char const * N, SIZE_T S,
- Uint* po, Uint* jo,
- int* en, SIZE_T* eo
- ){
- //loop through mod
- SIZE_T n = mod.length();
- for (SIZE_T i = 0; i < n; ++i) {
- //First check for JPCRE2 mods
- for(SIZE_T j = 0; j < SJ; ++j){
- if(J_N[j] == mod[i]) {
- if(x) *jo |= J_V[j];
- else *jo &= ~J_V[j];
- goto endfor;
- }
- }
-
- //Now check for PCRE2 mods
- for(SIZE_T j = 0; j< S; ++j){
- if(N[j] == mod[i]){
- if(x) *po |= V[j];
- else *po &= ~V[j];
- goto endfor;
- }
- }
-
- //Modifier didn't match, invalid modifier
- *en = (int)ERROR::INVALID_MODIFIER;
- *eo = (int)mod[i];
-
- endfor:;
- }
- }
-
- static inline void toMatchOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo){
- toOption(mod, x,
- MJ_V, MJ_N, sizeof(MJ_V)/sizeof(Uint),
- M_V, M_N, sizeof(M_V)/sizeof(Uint),
- po, jo, en, eo);
- }
-
- static inline void toReplaceOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo){
- toOption(mod, x,
- RJ_V, RJ_N, sizeof(RJ_V)/sizeof(Uint),
- R_V, R_N, sizeof(R_V)/sizeof(Uint),
- po, jo, en, eo);
- }
-
- static inline void toCompileOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo){
- toOption(mod, x,
- CJ_V, CJ_N, sizeof(CJ_V)/sizeof(Uint),
- C_V, C_N, sizeof(C_V)/sizeof(Uint),
- po, jo, en, eo);
- }
- static inline std::string fromOption(Uint const * J_V, char const * J_N, SIZE_T SJ,
- Uint const * V, char const * N, SIZE_T S,
- Uint po, Uint jo
- ){
- std::string mod;
- //Calculate PCRE2 mod
- for(SIZE_T i = 0; i < S; ++i){
- if( (V[i] & po) != 0 &&
- (V[i] & po) == V[i]) //One option can include other
- mod += N[i];
- }
- //Calculate JPCRE2 mod
- for(SIZE_T i = 0; i < SJ; ++i){
- if( (J_V[i] & jo) != 0 &&
- (J_V[i] & jo) == J_V[i]) //One option can include other
- mod += J_N[i];
- }
- return mod;
- }
-
- static inline std::string fromMatchOption(Uint po, Uint jo){
- return fromOption(MJ_V, MJ_N, sizeof(MJ_V)/sizeof(Uint),
- M_V, M_N, sizeof(M_V)/sizeof(Uint),
- po, jo);
- }
-
- static inline std::string fromReplaceOption(Uint po, Uint jo){
- return fromOption(RJ_V, RJ_N, sizeof(RJ_V)/sizeof(Uint),
- R_V, R_N, sizeof(R_V)/sizeof(Uint),
- po, jo);
- }
-
- static inline std::string fromCompileOption(Uint po, Uint jo){
- return fromOption(CJ_V, CJ_N, sizeof(CJ_V)/sizeof(Uint),
- C_V, C_N, sizeof(C_V)/sizeof(Uint),
- po, jo);
- }
-
- } //MOD namespace ends
- ///Lets you create custom modifier tables.
- ///An instance of this class can be passed to
- ///match, replace or compile related class objects.
- class ModifierTable{
-
- std::string tabjms;
- std::string tabms;
- std::string tabjrs;
- std::string tabrs;
- std::string tabjcs;
- std::string tabcs;
- VecOpt tabjmv;
- VecOpt tabmv;
- VecOpt tabjrv;
- VecOpt tabrv;
- VecOpt tabjcv;
- VecOpt tabcv;
-
- void toOption(Modifier const& mod, bool x,
- VecOpt const& J_V, std::string const& J_N,
- VecOpt const& V, std::string const& N,
- Uint* po, Uint* jo, int* en, SIZE_T* eo
- ) const{
- SIZE_T SJ = J_V.size();
- SIZE_T S = V.size();
- JPCRE2_ASSERT(SJ == J_N.length(), ("ValueError: Modifier character and value table must be of the same size (" + _tostdstring(SJ) + " == " + _tostdstring(J_N.length()) + ").").c_str());
- JPCRE2_ASSERT(S == N.length(), ("ValueError: Modifier character and value table must be of the same size (" + _tostdstring(S) + " == " + _tostdstring(N.length()) + ").").c_str());
- MOD::toOption(mod, x,
- J_V.empty()?0:&J_V[0], J_N.c_str(), SJ,
- V.empty()?0:&V[0], N.c_str(), S,
- po, jo, en, eo
- );
- }
-
- std::string fromOption(VecOpt const& J_V, std::string const& J_N,
- VecOpt const& V, std::string const& N,
- Uint po, Uint jo) const{
- SIZE_T SJ = J_V.size();
- SIZE_T S = V.size();
- JPCRE2_ASSERT(SJ == J_N.length(), ("ValueError: Modifier character and value table must be of the same size (" + _tostdstring(SJ) + " == " + _tostdstring(J_N.length()) + ").").c_str());
- JPCRE2_ASSERT(S == N.length(), ("ValueError: Modifier character and value table must be of the same size (" + _tostdstring(S) + " == " + _tostdstring(N.length()) + ").").c_str());
- return MOD::fromOption(J_V.empty()?0:&J_V[0], J_N.c_str(), SJ,
- V.empty()?0:&V[0], N.c_str(), S,
- po, jo);
- }
-
- void parseModifierTable(std::string& tabjs, VecOpt& tabjv,
- std::string& tab_s, VecOpt& tab_v,
- std::string const& tabs, VecOpt const& tabv);
- public:
-
- ///Default constructor that creates an empty modifier table.
- ModifierTable(){}
-
- ///@overload
- ///@param deflt Initialize with default table if true, otherwise keep empty.
- ModifierTable(bool deflt){
- if(deflt) setAllToDefault();
- }
-
- ///Reset the match modifier table to its initial (empty) state including memory.
- ///@return A reference to the calling ModifierTable object.
- ModifierTable& resetMatchModifierTable(){
- std::string().swap(tabjms);
- std::string().swap(tabms);
- VecOpt().swap(tabjmv);
- VecOpt().swap(tabmv);
- return *this;
- }
-
- ///Reset the replace modifier table to its initial (empty) state including memory.
- ///@return A reference to the calling ModifierTable object.
- ModifierTable& resetReplaceModifierTable(){
- std::string().swap(tabjrs);
- std::string().swap(tabrs);
- VecOpt().swap(tabjrv);
- VecOpt().swap(tabrv);
- return *this;
- }
-
- ///Reset the compile modifier table to its initial (empty) state including memory.
- ///@return A reference to the calling ModifierTable object.
- ModifierTable& resetCompileModifierTable(){
- std::string().swap(tabjcs);
- std::string().swap(tabcs);
- VecOpt().swap(tabjcv);
- VecOpt().swap(tabcv);
- return *this;
- }
-
- ///Reset the modifier tables to their initial (empty) state including memory.
- ///@return A reference to the calling ModifierTable object.
- ModifierTable& reset(){
- resetMatchModifierTable();
- resetReplaceModifierTable();
- resetCompileModifierTable();
- return *this;
- }
-
- ///Clear the match modifier table to its initial (empty) state.
- ///Memory may retain for further use.
- ///@return A reference to the calling ModifierTable object.
- ModifierTable& clearMatchModifierTable(){
- tabjms.clear();
- tabms.clear();
- tabjmv.clear();
- tabmv.clear();
- return *this;
- }
-
- ///Clear the replace modifier table to its initial (empty) state.
- ///Memory may retain for further use.
- ///@return A reference to the calling ModifierTable object.
- ModifierTable& clearReplaceModifierTable(){
- tabjrs.clear();
- tabrs.clear();
- tabjrv.clear();
- tabrv.clear();
- return *this;
- }
-
- ///Clear the compile modifier table to its initial (empty) state.
- ///Memory may retain for further use.
- ///@return A reference to the calling ModifierTable object.
- ModifierTable& clearCompileModifierTable(){
- tabjcs.clear();
- tabcs.clear();
- tabjcv.clear();
- tabcv.clear();
- return *this;
- }
-
- ///Clear the modifier tables to their initial (empty) state.
- ///Memory may retain for further use.
- ///@return A reference to the calling ModifierTable object.
- ModifierTable& clear(){
- clearMatchModifierTable();
- clearReplaceModifierTable();
- clearCompileModifierTable();
- return *this;
- }
-
- ///Modifier parser for match related options.
- ///@param mod modifier string
- ///@param x whether to add or remove the modifers.
- ///@param po pointer to PCRE2 match option that will be modified.
- ///@param jo pointer to JPCRE2 match option that will be modified.
- ///@param en where to put the error number.
- ///@param eo where to put the error offset.
- void toMatchOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo) const {
- toOption(mod, x,tabjmv,tabjms,tabmv, tabms,po,jo,en,eo);
- }
-
- ///Modifier parser for replace related options.
- ///@param mod modifier string
- ///@param x whether to add or remove the modifers.
- ///@param po pointer to PCRE2 replace option that will be modified.
- ///@param jo pointer to JPCRE2 replace option that will be modified.
- ///@param en where to put the error number.
- ///@param eo where to put the error offset.
- void toReplaceOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo) const {
- return toOption(mod, x,tabjrv,tabjrs,tabrv,tabrs,po,jo,en,eo);
- }
-
- ///Modifier parser for compile related options.
- ///@param mod modifier string
- ///@param x whether to add or remove the modifers.
- ///@param po pointer to PCRE2 compile option that will be modified.
- ///@param jo pointer to JPCRE2 compile option that will be modified.
- ///@param en where to put the error number.
- ///@param eo where to put the error offset.
- void toCompileOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo) const {
- return toOption(mod, x,tabjcv,tabjcs,tabcv,tabcs,po,jo,en,eo);
- }
-
- ///Take match related option value and convert to modifier string.
- ///@param po PCRE2 option.
- ///@param jo JPCRE2 option.
- ///@return modifier string (std::string)
- std::string fromMatchOption(Uint po, Uint jo) const {
- return fromOption(tabjmv,tabjms,tabmv,tabms,po,jo);
- }
-
- ///Take replace related option value and convert to modifier string.
- ///@param po PCRE2 option.
- ///@param jo JPCRE2 option.
- ///@return modifier string (std::string)
- std::string fromReplaceOption(Uint po, Uint jo) const {
- return fromOption(tabjrv,tabjrs,tabrv,tabrs,po,jo);
- }
-
- ///Take compile related option value and convert to modifier string.
- ///@param po PCRE2 option.
- ///@param jo JPCRE2 option.
- ///@return modifier string (std::string)
- std::string fromCompileOption(Uint po, Uint jo) const {
- return fromOption(tabjcv,tabjcs,tabcv,tabcs,po,jo);
- }
-
- ///Set modifier table for match.
- ///Takes a string and a vector of sequential options.
- ///@param tabs modifier string (list of modifiers)
- ///@param tabv vector of Uint (options).
- ///@return A reference to the calling ModifierTable object.
- ModifierTable& setMatchModifierTable(std::string const& tabs, VecOpt const& tabv){
- parseModifierTable(tabjms, tabjmv, tabms, tabmv, tabs, tabv);
- return *this;
- }
-
- ///Set modifier table for match.
- ///Takes a string and an array of sequential options.
- ///@param tabs modifier string (list of modifiers)
- ///@param tabvp array of Uint (options). If null, table is set to empty.
- ///@return A reference to the calling ModifierTable object.
- ModifierTable& setMatchModifierTable(std::string const& tabs, const Uint* tabvp){
- if(tabvp) {
- VecOpt tabv(tabvp, tabvp + tabs.length());
- setMatchModifierTable(tabs, tabv);
- } else clearMatchModifierTable();
- return *this;
- }
-
- ///@overload
- ///...
- ///This one takes modifier and value by array.
- ///If the arrays are not of the same length, the behavior is undefined.
- ///If any of the argument is null, the table is set empty.
- ///@param tabsp modifier string (list of modifiers).
- ///@param tabvp array of Uint (options).
- ///@return A reference to the calling ModifierTable object.
- ModifierTable& setMatchModifierTable(const char* tabsp, const Uint* tabvp){
- if(tabsp && tabvp) {
- std::string tabs(tabsp);
- VecOpt tabv(tabvp, tabvp + tabs.length());
- setMatchModifierTable(tabs, tabv);
- } else clearMatchModifierTable();
- return *this;
- }
-
- ///Set modifier table for replace.
- ///Takes a string and a vector of sequential options.
- ///@param tabs modifier string (list of modifiers)
- ///@param tabv vector of Uint (options).
- ///@return A reference to the calling ModifierTable object.
- ModifierTable& setReplaceModifierTable(std::string const& tabs, VecOpt const& tabv){
- parseModifierTable(tabjrs, tabjrv, tabrs, tabrv, tabs, tabv);
- return *this;
- }
-
- ///Set modifier table for replace.
- ///Takes a string and an array of sequential options.
- ///@param tabs modifier string (list of modifiers)
- ///@param tabvp array of Uint (options). If null, table is set to empty.
- ///@return A reference to the calling ModifierTable object.
- ModifierTable& setReplaceModifierTable(std::string const& tabs, const Uint* tabvp){
- if(tabvp) {
- VecOpt tabv(tabvp, tabvp + tabs.length());
- setReplaceModifierTable(tabs, tabv);
- } else clearReplaceModifierTable();
- return *this;
- }
-
- ///@overload
- ///...
- ///This one takes modifier and value by array.
- ///If the arrays are not of the same length, the behavior is undefined.
- ///If any of the argument is null, the table is set empty.
- ///@param tabsp modifier string (list of modifiers).
- ///@param tabvp array of Uint (options).
- ///@return A reference to the calling ModifierTable object.
- ModifierTable& setReplaceModifierTable(const char* tabsp, const Uint* tabvp){
- if(tabsp && tabvp) {
- std::string tabs(tabsp);
- VecOpt tabv(tabvp, tabvp + tabs.length());
- setReplaceModifierTable(tabs, tabv);
- } else clearReplaceModifierTable();
- return *this;
- }
-
- ///Set modifier table for compile.
- ///Takes a string and a vector of sequential options.
- ///@param tabs modifier string (list of modifiers)
- ///@param tabv vector of Uint (options).
- ///@return A reference to the calling ModifierTable object.
- ModifierTable& setCompileModifierTable(std::string const& tabs, VecOpt const& tabv){
- parseModifierTable(tabjcs, tabjcv, tabcs, tabcv, tabs, tabv);
- return *this;
- }
-
- ///Set modifier table for compile.
- ///Takes a string and an array of sequential options.
- ///@param tabs modifier string (list of modifiers)
- ///@param tabvp array of Uint (options). If null, table is set to empty.
- ///@return A reference to the calling ModifierTable object.
- ModifierTable& setCompileModifierTable(std::string const& tabs, const Uint* tabvp){
- if(tabvp) {
- VecOpt tabv(tabvp, tabvp + tabs.length());
- setCompileModifierTable(tabs, tabv);
- } else clearCompileModifierTable();
- return *this;
- }
-
- ///@overload
- ///...
- ///This one takes modifier and value by array.
- ///If the arrays are not of the same length, the behavior is undefined.
- ///If any of the argument is null, the table is set empty.
- ///@param tabsp modifier string (list of modifiers).
- ///@param tabvp array of Uint (options).
- ///@return A reference to the calling ModifierTable object.
- ModifierTable& setCompileModifierTable(const char* tabsp, const Uint* tabvp){
- if(tabsp && tabvp) {
- std::string tabs(tabsp);
- VecOpt tabv(tabvp, tabvp + tabs.length());
- setCompileModifierTable(tabs, tabv);
- } else clearCompileModifierTable();
- return *this;
- }
-
- ///Set match modifie table to default
- ///@return A reference to the calling ModifierTable object.
- ModifierTable& setMatchModifierTableToDefault(){
- tabjms = std::string(MOD::MJ_N, MOD::MJ_N + sizeof(MOD::MJ_V)/sizeof(Uint));
- tabms = std::string(MOD::M_N, MOD::M_N + sizeof(MOD::M_V)/sizeof(Uint));
- tabjmv = VecOpt(MOD::MJ_V, MOD::MJ_V + sizeof(MOD::MJ_V)/sizeof(Uint));
- tabmv = VecOpt(MOD::M_V, MOD::M_V + sizeof(MOD::M_V)/sizeof(Uint));
- return *this;
- }
-
- ///Set replace modifier table to default.
- ///@return A reference to the calling ModifierTable object.
- ModifierTable& setReplaceModifierTableToDefault(){
- tabjrs = std::string(MOD::RJ_N, MOD::RJ_N + sizeof(MOD::RJ_V)/sizeof(Uint));
- tabrs = std::string(MOD::R_N, MOD::R_N + sizeof(MOD::R_V)/sizeof(Uint));
- tabjrv = VecOpt(MOD::RJ_V, MOD::RJ_V + sizeof(MOD::RJ_V)/sizeof(Uint));
- tabrv = VecOpt(MOD::R_V, MOD::R_V + sizeof(MOD::R_V)/sizeof(Uint));
- return *this;
- }
-
- ///Set compile modifier table to default.
- ///@return A reference to the calling ModifierTable object.
- ModifierTable& setCompileModifierTableToDefault(){
- tabjcs = std::string(MOD::CJ_N, MOD::CJ_N + sizeof(MOD::CJ_V)/sizeof(Uint));
- tabcs = std::string(MOD::C_N, MOD::C_N + sizeof(MOD::C_V)/sizeof(Uint));
- tabjcv = VecOpt(MOD::CJ_V, MOD::CJ_V + sizeof(MOD::CJ_V)/sizeof(Uint));
- tabcv = VecOpt(MOD::C_V, MOD::C_V + sizeof(MOD::C_V)/sizeof(Uint));
- return *this;
- }
-
- ///Set all tables to default.
- ///@return A reference to the calling ModifierTable object.
- ModifierTable& setAllToDefault(){
- setMatchModifierTableToDefault();
- setReplaceModifierTableToDefault();
- setCompileModifierTableToDefault();
- return *this;
- }
- };
- //These message strings are used for error/warning message construction.
- //take care to prevent multiple definition
- template<typename Char_T> struct MSG{
- static std::basic_string<Char_T> INVALID_MODIFIER(void);
- static std::basic_string<Char_T> INSUFFICIENT_OVECTOR(void);
- };
- //specialization
- template<> inline std::basic_string<char> MSG<char>::INVALID_MODIFIER(){ return "Invalid modifier: "; }
- template<> inline std::basic_string<wchar_t> MSG<wchar_t>::INVALID_MODIFIER(){ return L"Invalid modifier: "; }
- template<> inline std::basic_string<char> MSG<char>::INSUFFICIENT_OVECTOR(){ return "ovector wasn't big enough"; }
- template<> inline std::basic_string<wchar_t> MSG<wchar_t>::INSUFFICIENT_OVECTOR(){ return L"ovector wasn't big enough"; }
- #if __cplusplus >= 201103L
- template<> inline std::basic_string<char16_t> MSG<char16_t>::INVALID_MODIFIER(){ return u"Invalid modifier: "; }
- template<> inline std::basic_string<char32_t> MSG<char32_t>::INVALID_MODIFIER(){ return U"Invalid modifier: "; }
- template<> inline std::basic_string<char16_t> MSG<char16_t>::INSUFFICIENT_OVECTOR(){ return u"ovector wasn't big enough"; }
- template<> inline std::basic_string<char32_t> MSG<char32_t>::INSUFFICIENT_OVECTOR(){ return U"ovector wasn't big enough"; }
- #endif
- ///struct to select the types.
- ///
- ///@tparam Char_T Character type (`char`, `wchar_t`, `char16_t`, `char32_t`)
- ///@tparam Map Optional parameter (Only `>= C++11`) to specify a map container (`std::map`, `std::unordered_map` etc..). Default is `std::map`.
- ///
- ///The character type (`Char_T`) must be in accordance with the PCRE2 library you are linking against.
- ///If not sure which library you need, link against all 3 PCRE2 libraries and they will be used as needed.
- ///
- ///If you want to be specific, then here's the rule:
- ///
- ///1. If `Char_T` is 8 bit, you need 8 bit PCRE2 library
- ///2. If `Char_T` is 16 bit, you need 16 bit PCRE2 library
- ///3. If `Char_T` is 32 bit, you need 32 bit PCRE2 library
- ///4. if `Char_T` is not 8 or 16 or 32 bit, you will get compile error.
- ///
- ///In `>= C++11` you get an additional optional template parameter to specify a map container.
- ///For example, you can use `std::unordered_map` instead of the default `std::map`:
- /// ```cpp
- /// #include <unordered_map>
- /// typedef jpcre2::select<char, std::unordered_map> jp;
- /// ```
- ///
- ///We will use the following typedef throughout this doc:
- ///```cpp
- ///typedef jpcre2::select<Char_T> jp;
- ///```
- #if __cplusplus >= 201103L
- template<typename Char_T, template<typename...> class Map=std::map>
- #else
- template<typename Char_T>
- #endif
- struct select{
- ///Typedef for character (`char`, `wchar_t`, `char16_t`, `char32_t`)
- typedef Char_T Char;
-
- //typedef Char_T Char;
- ///Typedef for string (`std::string`, `std::wstring`, `std::u16string`, `std::u32string`).
- ///Defined as `std::basic_string<Char_T>`.
- ///May be this list will make more sense:
- ///Character | String
- ///--------- | -------
- ///char | std::string
- ///wchar_t | std::wstring
- ///char16_t | std::u16string (>=C++11)
- ///char32_t | std::u32string (>=C++11)
- typedef typename std::basic_string<Char_T> String;
-
- #if __cplusplus >= 201103L
- ///Map for Named substrings.
- typedef class Map<String, String> MapNas;
- ///Substring name to Substring number map.
- typedef class Map<String, SIZE_T> MapNtN;
- #else
- ///Map for Named substrings.
- typedef typename std::map<String, String> MapNas;
- ///Substring name to Substring number map.
- typedef typename std::map<String, SIZE_T> MapNtN;
- #endif
-
- ///Allow spelling mistake of MapNtN as MapNtn.
- typedef MapNtN MapNtn;
-
- ///Vector for Numbered substrings (Sub container).
- typedef typename std::vector<String> NumSub;
- ///Vector of matches with named substrings.
- typedef typename std::vector<MapNas> VecNas;
- ///Vector of substring name to substring number map.
- typedef typename std::vector<MapNtN> VecNtN;
- ///Allow spelling mistake of VecNtN as VecNtn.
- typedef VecNtN VecNtn;
- ///Vector of matches with numbered substrings.
- typedef typename std::vector<NumSub> VecNum;
-
- //These are to shorten the code
- typedef typename Pcre2Type<sizeof( Char_T ) * CHAR_BIT>::Pcre2Uchar Pcre2Uchar;
- typedef typename Pcre2Type<sizeof( Char_T ) * CHAR_BIT>::Pcre2Sptr Pcre2Sptr;
- typedef typename Pcre2Type<sizeof( Char_T ) * CHAR_BIT>::Pcre2Code Pcre2Code;
- typedef typename Pcre2Type<sizeof( Char_T ) * CHAR_BIT>::CompileContext CompileContext;
- typedef typename Pcre2Type<sizeof( Char_T ) * CHAR_BIT>::MatchData MatchData;
- typedef typename Pcre2Type<sizeof( Char_T ) * CHAR_BIT>::GeneralContext GeneralContext;
- typedef typename Pcre2Type<sizeof( Char_T ) * CHAR_BIT>::MatchContext MatchContext;
- typedef typename Pcre2Type<sizeof( Char_T ) * CHAR_BIT>::JitCallback JitCallback;
- typedef typename Pcre2Type<sizeof( Char_T ) * CHAR_BIT>::JitStack JitStack;
-
- template<typename T>
- static String toString(T); //prevent implicit type conversion of T
-
- ///Converts a Char_T to jpcre2::select::String
- ///@param a Char_T
- ///@return jpcre2::select::String
- static String toString(Char a){
- return a?String(1, a):String();
- }
-
- ///@overload
- ///...
- ///Converts a Char_T const * to jpcre2::select::String
- ///@param a Char_T const *
- ///@return jpcre2::select::String
- static String toString(Char const *a){
- return a?String(a):String();
- }
-
- ///@overload
- ///...
- ///Converts a Char_T* to jpcre2::select::String
- ///@param a Char_T const *
- ///@return jpcre2::select::String
- static String toString(Char* a){
- return a?String(a):String();
- }
-
- ///@overload
- ///...
- ///Converts a PCRE2_UCHAR to String
- ///@param a PCRE2_UCHAR
- ///@return jpcre2::select::String
- static String toString(Pcre2Uchar* a) {
- return a?String((Char*) a):String();
- }
-
- ///Retruns error message from PCRE2 error number
- ///@param err_num error number (negative)
- ///@return message as jpcre2::select::String.
- static String getPcre2ErrorMessage(int err_num) {
- Pcre2Uchar buffer[sizeof(Char)*CHAR_BIT*1024];
- Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::get_error_message(err_num, buffer, sizeof(buffer));
- return toString((Pcre2Uchar*) buffer);
- }
-
- ///Returns error message (either JPCRE2 or PCRE2) from error number and error offset
- ///@param err_num error number (negative for PCRE2, positive for JPCRE2)
- ///@param err_off error offset
- ///@return message as jpcre2::select::String.
- static String getErrorMessage(int err_num, int err_off) {
- if(err_num == (int)ERROR::INVALID_MODIFIER){
- return MSG<Char>::INVALID_MODIFIER() + toString((Char)err_off);
- } else if(err_num == (int)ERROR::INSUFFICIENT_OVECTOR){
- return MSG<Char>::INSUFFICIENT_OVECTOR();
- } else if(err_num != 0) {
- return getPcre2ErrorMessage((int) err_num);
- } else return String();
- }
- //forward declaration
- class Regex;
- class RegexMatch;
- class RegexReplace;
- class MatchEvaluator;
-
- /** Provides public constructors to create RegexMatch objects.
- * Every RegexMatch object should be associated with a Regex object.
- * This class stores a pointer to its' associated Regex object, thus when
- * the content of the associated Regex object is changed, there will be no need to
- * set the pointer again.
- *
- * Examples:
- *
- * ```cpp
- * jp::Regex re;
- * jp::RegexMatch rm;
- * rm.setRegexObject(&re);
- * rm.match("subject", "g"); // 0 match
- * re.compile("\\w");
- * rm.match(); // 7 matches
- * ```
- */
- class RegexMatch {
- private:
- friend class MatchEvaluator;
- Regex const *re;
- String m_subject;
- String const *m_subject_ptr;
- Uint match_opts;
- Uint jpcre2_match_opts;
- MatchContext *mcontext;
- ModifierTable const * modtab;
- MatchData * mdata;
-
- PCRE2_SIZE _start_offset; //name collision, use _ at start
- VecNum* vec_num;
- VecNas* vec_nas;
- VecNtN* vec_ntn;
-
- VecOff* vec_soff;
- VecOff* vec_eoff;
- bool getNumberedSubstrings(int, Pcre2Sptr, PCRE2_SIZE*);
- bool getNamedSubstrings(int, int, Pcre2Sptr, Pcre2Sptr, PCRE2_SIZE*);
-
- void init_vars() {
- re = 0;
- vec_num = 0;
- vec_nas = 0;
- vec_ntn = 0;
- vec_soff = 0;
- vec_eoff = 0;
- match_opts = 0;
- jpcre2_match_opts = 0;
- error_number = 0;
- error_offset = 0;
- _start_offset = 0;
- m_subject_ptr = &m_subject;
- mcontext = 0;
- modtab = 0;
- mdata = 0;
- }
-
- void onlyCopy(RegexMatch const &rm){
- re = rm.re; //only pointer should be copied
-
- //pointer to subject may point to m_subject or other user data
- m_subject_ptr = (rm.m_subject_ptr == &rm.m_subject) ? &m_subject //not &rm.m_subject
- : rm.m_subject_ptr;
-
- //underlying data of vectors are not handled by RegexMatch
- //thus it's safe to just copy the pointers.
- vec_num = rm.vec_num;
- vec_nas = rm.vec_nas;
- vec_ntn = rm.vec_ntn;
- vec_soff = rm.vec_soff;
- vec_eoff = rm.vec_eoff;
-
- match_opts = rm.match_opts;
- jpcre2_match_opts = rm.jpcre2_match_opts;
- error_number = rm.error_number;
- error_offset = rm.error_offset;
- _start_offset = rm._start_offset;
- mcontext = rm.mcontext;
- modtab = rm.modtab;
- mdata = rm.mdata;
- }
-
- void deepCopy(RegexMatch const &rm){
- m_subject = rm.m_subject;
- onlyCopy(rm);
- }
-
- #if __cplusplus >= 201103L
- void deepMove(RegexMatch& rm){
- m_subject = std::move_if_noexcept(rm.m_subject);
- onlyCopy(rm);
- }
- #endif
- friend class Regex;
- protected:
-
- int error_number;
- PCRE2_SIZE error_offset;
- public:
-
- ///Default constructor.
- RegexMatch(){
- init_vars();
- }
-
- ///@overload
- ///...
- ///Creates a RegexMatch object associating a Regex object.
- ///Underlying data is not modified.
- ///@param r pointer to a Regex object
- RegexMatch(Regex const *r) {
- init_vars();
- re = r;
- }
-
- ///@overload
- ///...
- ///Copy constructor.
- ///@param rm Reference to RegexMatch object
- RegexMatch(RegexMatch const &rm){
- init_vars();
- deepCopy(rm);
- }
-
- ///Overloaded copy-assignment operator.
- ///@param rm RegexMatch object
- ///@return A reference to the calling RegexMatch object.
- virtual RegexMatch& operator=(RegexMatch const &rm){
- if(this == &rm) return *this;
- deepCopy(rm);
- return *this;
- }
-
- #if __cplusplus >= 201103L
- ///@overload
- ///...
- ///Move constructor.
- ///This constructor steals resources from the argument.
- ///It leaves the argument in a valid but indeterminate sate.
- ///The indeterminate state can be returned to normal by calling reset() on that object.
- ///@param rm rvalue reference to a RegexMatch object
- RegexMatch(RegexMatch&& rm){
- init_vars();
- deepMove(rm);
- }
-
- ///@overload
- ///...
- ///Overloaded move-assignment operator.
- ///This constructor steals resources from the argument.
- ///It leaves the argument in a valid but indeterminate sate.
- ///The indeterminate state can be returned to normal by calling reset() on that object.
- ///@param rm rvalue reference to a RegexMatch object
- ///@return A reference to the calling RegexMatch object.
- virtual RegexMatch& operator=(RegexMatch&& rm){
- if(this == &rm) return *this;
- deepMove(rm);
- return *this;
- }
- #endif
-
- ///Destructor
- ///Frees all internal memories that were used.
- virtual ~RegexMatch() {}
- ///Reset all class variables to its default (initial) state including memory.
- ///Data in the vectors will retain (as it's external)
- ///You will need to pass vector pointers again after calling this function to get match results.
- ///@return Reference to the calling RegexMatch object.
- virtual RegexMatch& reset() {
- String().swap(m_subject); //not ptr , external string won't be modified.
- init_vars();
- return *this;
- }
-
- ///Clear all class variables (may retain some memory for further use).
- ///Data in the vectors will retain (as it's external)
- ///You will need to pass vector pointers again after calling this function to get match results.
- ///@return Reference to the calling RegexMatch object.
- virtual RegexMatch& clear(){
- m_subject.clear(); //not ptr , external string won't be modified.
- init_vars();
- return *this;
- }
-
- ///reset match related errors to zero.
- ///If you want to examine the error status of a function call in the method chain,
- ///add this function just before your target function so that the error is set to zero
- ///before that target function is called, and leave everything out after the target
- ///function so that there will be no additional errors from other function calls.
- ///@return A reference to the RegexMatch object
- ///@see Regex::resetErrors()
- ///@see RegexReplace::resetErrors()
- virtual RegexMatch& resetErrors(){
- error_number = 0;
- error_offset = 0;
- return *this;
- }
-
- /// Returns the last error number
- ///@return Last error number
- virtual int getErrorNumber() const {
- return error_number;
- }
- /// Returns the last error offset
- ///@return Last error offset
- virtual int getErrorOffset() const {
- return (int)error_offset;
- }
-
- /// Returns the last error message
- ///@return Last error message
- virtual String getErrorMessage() const {
- #if __cplusplus >= 201103L
- return select<Char, Map>::getErrorMessage(error_number, error_offset);
- #else
- return select<Char>::getErrorMessage(error_number, error_offset);
- #endif
- }
-
- ///Get subject string (by value).
- ///@return subject string
- ///@see RegexReplace::getSubject()
- virtual String getSubject() const {
- return *m_subject_ptr;
- }
-
- ///Get pointer to subject string.
- ///Data can not be changed with this pointer.
- ///@return constant subject string pointer
- ///@see RegexReplace::getSubjectPointer()
- virtual String const * getSubjectPointer() const {
- return m_subject_ptr;
- }
- /// Calculate modifier string from PCRE2 and JPCRE2 options and return it.
- ///
- /// Do remember that modifiers (or PCRE2 and JPCRE2 options) do not change or get initialized
- /// as long as you don't do that explicitly. Calling RegexMatch::setModifier() will re-set them.
- ///
- /// **Mixed or combined modifier**.
- ///
- /// Some modifier may include other modifiers i.e they have the same meaning of some modifiers
- /// combined together. For example, the 'n' modifier includes the 'u' modifier and together they
- /// are equivalent to `PCRE2_UTF | PCRE2_UCP`. When you set a modifier like this, both options
- /// get set, and when you remove the 'n' modifier (with `RegexMatch::changeModifier()`), both will get removed.
- ///@return Calculated modifier string (std::string)
- ///@see Regex::getModifier()
- ///@see RegexReplace::getModifier()
- virtual std::string getModifier() const {
- return modtab ? modtab->fromMatchOption(match_opts, jpcre2_match_opts)
- : MOD::fromMatchOption(match_opts, jpcre2_match_opts);
- }
-
- ///Get the modifier table that is set,
- ///@return pointer to constant ModifierTable.
- virtual ModifierTable const* getModifierTable(){
- return modtab;
- }
-
-
- ///Get PCRE2 option
- ///@return PCRE2 option for match operation
- ///@see Regex::getPcre2Option()
- ///@see RegexReplace::getPcre2Option()
- virtual Uint getPcre2Option() const {
- return match_opts;
- }
- /// Get JPCRE2 option
- ///@return JPCRE2 options for math operation
- ///@see Regex::getJpcre2Option()
- ///@see RegexReplace::getJpcre2Option()
- virtual Uint getJpcre2Option() const {
- return jpcre2_match_opts;
- }
-
- /// Get offset from where match will start in the subject.
- /// @return Start offset
- virtual PCRE2_SIZE getStartOffset() const {
- return _start_offset;
- }
-
- ///Get pre-set match start offset vector pointer.
- ///The pointer must be set with RegexMatch::setMatchStartOffsetVector() beforehand
- ///for this to work i.e it is just a convenience method to get the pre-set vector pointer.
- ///@return pointer to the const match start offset vector
- virtual VecOff const* getMatchStartOffsetVector() const {
- return vec_soff;
- }
-
- ///Get pre-set match end offset vector pointer.
- ///The pointer must be set with RegexMatch::setMatchEndOffsetVector() beforehand
- ///for this to work i.e it is just a convenience method to get the pre-set vector pointer.
- ///@return pointer to the const end offset vector
- virtual VecOff const* getMatchEndOffsetVector() const {
- return vec_eoff;
- }
-
- ///Get a pointer to the associated Regex object.
- ///If no actual Regex object is associated, null is returned.
- ///@return A pointer to the associated constant Regex object or null.
- virtual Regex const * getRegexObject() const {
- return re;
- }
-
- ///Get pointer to numbered substring vector.
- ///@return Pointer to const numbered substring vector.
- virtual VecNum const* getNumberedSubstringVector() const {
- return vec_num;
- }
-
- ///Get pointer to named substring vector.
- ///@return Pointer to const named substring vector.
- virtual VecNas const* getNamedSubstringVector() const {
- return vec_nas;
- }
-
- ///Get pointer to name to number map vector.
- ///@return Pointer to const name to number map vector.
- virtual VecNtN const* getNameToNumberMapVector() const {
- return vec_ntn;
- }
-
- ///Set the associated regex object.
- ///Null pointer unsets it.
- ///Underlying data is not modified.
- ///@param r Pointer to a Regex object.
- ///@return Reference to the calling RegexMatch object.
- virtual RegexMatch& setRegexObject(Regex const *r){
- re = r;
- return *this;
- }
- /// Set a pointer to the numbered substring vector.
- /// Null pointer unsets it.
- ///
- /// This vector will be filled with numbered (indexed) captured groups.
- /// @param v pointer to the numbered substring vector
- /// @return Reference to the calling RegexMatch object
- virtual RegexMatch& setNumberedSubstringVector(VecNum* v) {
- vec_num = v;
- return *this;
- }
- /// Set a pointer to the named substring vector.
- /// Null pointer unsets it.
- ///
- /// This vector will be populated with named captured groups.
- /// @param v pointer to the named substring vector
- /// @return Reference to the calling RegexMatch object
- virtual RegexMatch& setNamedSubstringVector(VecNas* v) {
- vec_nas = v;
- return *this;
- }
- /// Set a pointer to the name to number map vector.
- /// Null pointer unsets it.
- ///
- /// This vector will be populated with name to number map for captured groups.
- /// @param v pointer to the name to number map vector
- /// @return Reference to the calling RegexMatch object
- virtual RegexMatch& setNameToNumberMapVector(VecNtN* v) {
- vec_ntn = v;
- return *this;
- }
-
- /// Set the pointer to a vector to store the offsets where matches
- /// start in the subject.
- /// Null pointer unsets it.
- /// @param v Pointer to a jpcre2::VecOff vector (std::vector<size_t>)
- /// @return Reference to the calling RegexMatch object
- virtual RegexMatch& setMatchStartOffsetVector(VecOff* v){
- vec_soff = v;
- return *this;
- }
-
- /// Set the pointer to a vector to store the offsets where matches
- /// end in the subject.
- /// Null pointer unsets it.
- /// @param v Pointer to a VecOff vector (std::vector<size_t>)
- /// @return Reference to the calling RegexMatch object
- virtual RegexMatch& setMatchEndOffsetVector(VecOff* v){
- vec_eoff = v;
- return *this;
- }
- ///Set the subject string for match.
- ///This makes a copy of the subject string.
- /// @param s Subject string
- /// @return Reference to the calling RegexMatch object
- /// @see RegexReplace::setSubject()
- virtual RegexMatch& setSubject(String const &s) {
- m_subject = s;
- m_subject_ptr = &m_subject; //must overwrite
- return *this;
- }
- ///@overload
- ///...
- /// Works with the original without modifying it. Null pointer unsets the subject.
- /// @param s Pointer to subject string
- /// @return Reference to the calling RegexMatch object
- /// @see RegexReplace::setSubject()
- virtual RegexMatch& setSubject(String const *s) {
- if(s) m_subject_ptr = s;
- else {
- m_subject.clear();
- m_subject_ptr = &m_subject;
- }
- return *this;
- }
- /// Set the modifier (resets all JPCRE2 and PCRE2 options) by calling RegexMatch::changeModifier().
- /// Re-initializes the option bits for PCRE2 and JPCRE2 options, then parses the modifier to set their equivalent options.
- /// @param s Modifier string.
- /// @return Reference to the calling RegexMatch object
- /// @see RegexReplace::setModifier()
- /// @see Regex::setModifier()
- virtual RegexMatch& setModifier(Modifier const& s) {
- match_opts = 0;
- jpcre2_match_opts = 0;
- changeModifier(s, true);
- return *this;
- }
-
- ///Set a custom modifier table to be used.
- ///@param mdt pointer to ModifierTable object.
- ///@return Reference to the calling RegexMatch object.
- virtual RegexMatch& setModifierTable(ModifierTable const * mdt){
- modtab = mdt;
- return *this;
- }
- /// Set JPCRE2 option for match (resets all)
- /// @param x Option value
- /// @return Reference to the calling RegexMatch object
- /// @see RegexReplace::setJpcre2Option()
- /// @see Regex::setJpcre2Option()
- virtual RegexMatch& setJpcre2Option(Uint x) {
- jpcre2_match_opts = x;
- return *this;
- }
- ///Set PCRE2 option match (overwrite existing option)
- /// @param x Option value
- /// @return Reference to the calling RegexMatch object
- /// @see RegexReplace::setPcre2Option()
- /// @see Regex::setPcre2Option()
- virtual RegexMatch& setPcre2Option(Uint x) {
- match_opts = x;
- return *this;
- }
-
- /// Set whether to perform global match
- /// @param x True or False
- /// @return Reference to the calling RegexMatch object
- virtual RegexMatch& setFindAll(bool x) {
- jpcre2_match_opts = x?jpcre2_match_opts | FIND_ALL:jpcre2_match_opts & ~FIND_ALL;
- return *this;
- }
- ///@overload
- ///...
- ///This function just calls RegexMatch::setFindAll(bool x) with `true` as the parameter
- ///@return Reference to the calling RegexMatch object
- virtual RegexMatch& setFindAll() {
- return setFindAll(true);
- }
- /// Set offset from where match starts.
- /// When FIND_ALL is set, a global match would not be performed on all positions on the subject,
- /// rather it will be performed from the start offset and onwards.
- /// @param offset Start offset
- /// @return Reference to the calling RegexMatch object
- virtual RegexMatch& setStartOffset(PCRE2_SIZE offset) {
- _start_offset = offset;
- return *this;
- }
-
- ///Set the match context.
- ///You can create match context using the native PCRE2 API.
- ///The memory is not handled by RegexMatch object and not freed.
- ///User will be responsible for freeing the memory of the match context.
- ///@param match_context Pointer to the match context.
- ///@return Reference to the calling RegexMatch object
- virtual RegexMatch& setMatchContext(MatchContext *match_context){
- mcontext = match_context;
- return *this;
- }
-
- ///Return pointer to the match context that was previously set with setMatchContext().
- ///Handling memory is the callers' responsibility.
- ///@return pointer to the match context (default: null).
- virtual MatchContext* getMatchContext(){
- return mcontext;
- }
-
- ///Set the match data block to be used.
- ///The memory is not handled by RegexMatch object and not freed.
- ///User will be responsible for freeing the memory of the match data block.
- ///@param madt Pointer to a match data block.
- ///@return Reference to the calling RegexMatch object
- virtual RegexMatch& setMatchDataBlock(MatchData* madt){
- mdata = madt;
- return *this;
- }
-
- ///Get the pointer to the match data block that was set previously with setMatchData()
- ///Handling memory is the callers' responsibility.
- ///@return pointer to the match data (default: null).
- virtual MatchData* getMatchDataBlock(){
- return mdata;
- }
-
- /// Parse modifier and add/remove equivalent PCRE2 and JPCRE2 options.
- /// This function does not initialize or re-initialize options.
- /// If you want to set options from scratch, initialize them to 0 before calling this function.
- /// If invalid modifier is detected, then the error number for the RegexMatch
- /// object will be jpcre2::ERROR::INVALID_MODIFIER and error offset will be the modifier character.
- /// You can get the message with RegexMatch::getErrorMessage() function.
- ///
- /// @param mod Modifier string.
- /// @param x Whether to add or remove option
- /// @return Reference to the RegexMatch object
- /// @see Regex::changeModifier()
- /// @see RegexReplace::changeModifier()
- virtual RegexMatch& changeModifier(Modifier const& mod, bool x){
- modtab ? modtab->toMatchOption(mod, x, &match_opts, &jpcre2_match_opts, &error_number, &error_offset)
- : MOD::toMatchOption(mod, x, &match_opts, &jpcre2_match_opts, &error_number, &error_offset);
- return *this;
- }
- /// Add or remove a JPCRE2 option
- /// @param opt JPCRE2 option value
- /// @param x Add the option if it's true, remove otherwise.
- /// @return Reference to the calling RegexMatch object
- /// @see RegexReplace::changeJpcre2Option()
- /// @see Regex::changeJpcre2Option()
- virtual RegexMatch& changeJpcre2Option(Uint opt, bool x) {
- jpcre2_match_opts = x ? jpcre2_match_opts | opt : jpcre2_match_opts & ~opt;
- return *this;
- }
- /// Add or remove a PCRE2 option
- /// @param opt PCRE2 option value
- /// @param x Add the option if it's true, remove otherwise.
- /// @return Reference to the calling RegexMatch object
- /// @see RegexReplace::changePcre2Option()
- /// @see Regex::changePcre2Option()
- virtual RegexMatch& changePcre2Option(Uint opt, bool x) {
- match_opts = x ? match_opts | opt : match_opts & ~opt;
- return *this;
- }
-
- /// Parse modifier string and add equivalent PCRE2 and JPCRE2 options.
- /// This is just a wrapper of the original function RegexMatch::changeModifier()
- /// @param mod Modifier string.
- /// @return Reference to the calling RegexMatch object
- /// @see RegexReplace::addModifier()
- /// @see Regex::addModifier()
- virtual RegexMatch& addModifier(Modifier const& mod){
- return changeModifier(mod, true);
- }
- /// Add option to existing JPCRE2 options for match
- /// @param x Option value
- /// @return Reference to the calling RegexMatch object
- /// @see RegexReplace::addJpcre2Option()
- /// @see Regex::addJpcre2Option()
- virtual RegexMatch& addJpcre2Option(Uint x) {
- jpcre2_match_opts |= x;
- return *this;
- }
- /// Add option to existing PCRE2 options for match
- /// @param x Option value
- /// @return Reference to the calling RegexMatch object
- /// @see RegexReplace::addPcre2Option()
- /// @see Regex::addPcre2Option()
- virtual RegexMatch& addPcre2Option(Uint x) {
- match_opts |= x;
- return *this;
- }
-
- /// Perform match operation using info from class variables and return the match count and
- /// store the results in specified vectors.
- ///
- /// Note: This function uses pcre2_match() function to do the match.
- ///@return Match count
- virtual SIZE_T match(void);
- };
-
- ///This class contains a typedef of a function pointer or a templated function wrapper (`std::function`)
- ///to provide callback function to the `MatchEvaluator`.
- ///`std::function` is used when `>=C++11` is being used , otherwise function pointer is used.
- ///You can force using function pointer instead of `std::function` when `>=C++11` is used by defining the macro
- ///`JPCRE2_USE_FUNCTION_POINTER_CALLBACK` before including jpcre2.hpp.
- ///If you are using lambda function with capture, you must use the `std::function` approach.
- ///
- ///The callback function takes exactly three positional arguments:
- ///@tparam T1 The first argument must be `jp::NumSub const &` aka `std::vector<String> const &` (or `void*` if not needed).
- ///@tparam T2 The second argument must be `jp::MapNas const &` aka `std::map<String, size_t> const &` (or `void*` if not needed).
- ///@tparam T3 The third argument must be `jp::MapNtN const &` aka `std::map<String, String> const &` (or `void*` if not needed).
- ///
- /// **Examples:**
- /// ```cpp
- /// typedef jpcre2::select<char> jp;
- /// jp::String myCallback1(jp::NumSub const &m1, void*, void*){
- /// return "("+m1[0]+")";
- /// }
- ///
- /// jp::String myCallback2(jp::NumSub const &m1, jp::MapNas const &m2, void*){
- /// return "("+m1[0]+"/"+m2.at("total")+")";
- /// }
- /// //Now you can pass these functions in MatchEvaluator constructors to create a match evaluator
- /// jp::MatchEvaluator me1(myCallback1);
- ///
- /// //Examples with lambda (>=C++11)
- /// jp::MatchEvaluator me2([](jp::NumSub const &m1, void*, void*)
- /// {
- /// return "("+m1[0]+")";
- /// });
- /// ```
- ///@see MatchEvaluator
- template<typename T1, typename T2, typename T3>
- struct MatchEvaluatorCallback{
- #if !defined JPCRE2_USE_FUNCTION_POINTER_CALLBACK && __cplusplus >= 201103L
- typedef std::function<String (T1,T2,T3)> Callback;
- #else
- typedef String (*Callback)(T1,T2,T3);
- #endif
- };
- ///Provides some default static callback functions.
- ///The primary goal of this class is to provide default
- ///callback function to MatchEvaluator default constructor which is
- ///essentially callback::erase.
- ///This class does not allow object instantiation.
- struct callback{
- ///Callback function that removes the matched part/s in the subject string
- /// and takes all match vectors as argument.
- ///Even though this function itself does not use the vectors, it still takes them
- ///so that the caller can perform a match and populate all the match data to perform
- ///further evaluation of other callback functions without doing the match again.
- ///@param num jp::NumSub vector.
- ///@param nas jp::MapNas map.
- ///@param ntn jp::MapNtN map.
- ///@return empty string.
- static String eraseFill(NumSub const &num, MapNas const &nas, MapNtN const &ntn){
- return String();
- }
-
- ///Callback function that removes the matched part/s in the subject string
- ///and does not take any match vector.
- ///This is a minimum cost pattern deleting callback function.
- ///
- ///It's the default callback function when you Instantiate
- ///a MatchEvaluator object with its default constructor:
- ///```cpp
- ///MatchEvaluator me;
- ///```
- ///@return empty string.
- static String erase(void*, void*, void*){
- return String();
- }
-
- ///Callback function for populating match vectors that does not modify the subject string.
- ///It always returns the total matched part and thus the subject string remains the same.
- ///@param num jp::NumSub vector.
- ///@param nas jp::MapNas map.
- ///@param ntn jp::MapNtN map.
- ///@return total match (group 0) of current match.
- static String fill(NumSub const &num, MapNas const &nas, MapNtn const &ntn){
- return num[0];
- }
-
- private:
- //prevent object instantiation.
- callback();
- callback(callback const &);
- #if __cplusplus >= 201103L
- callback(callback&&);
- #endif
- ~callback();
- };
- ///This class inherits RegexMatch and provides a similar functionality.
- ///All public member functions from RegexMatch class are publicly available except the following:
- ///* setNumberedSubstringVector
- ///* setNamedSubstringVector
- ///* setNameToNumberMapVector
- ///* setMatchStartOffsetVector
- ///* setMatchEndOffsetVector
- ///
- ///The use of above functions is not allowed as the vectors are created according to the callback function you pass.
- ///
- ///Each constructor of this class takes a callback function as argument (see `MatchEvaluatorCallback`).
- ///
- ///It provides a MatchEvaluator::nreplace() function to perform replace operation using native JPCRE2 approach
- ///and `MatchEvaluator::replace()` function for PCRE2 compatible replace operation.
- ///
- ///An instance of this class can also be passed with `RegexReplace::nreplace()` or `RegexReplace::replace()` function to perform replacement
- ///according to this match evaluator.
- ///
- ///Match data is stored in vectors, and the vectors are populated according to the callback functions.
- ///Populated vector data is never deleted but they get overwritten. Vector data can be manually zeroed out
- ///by calling `MatchEvaluator::clearMatchData()`. If the capacities of those match vectors are desired to
- ///to be shrinked too instead of just clearing them, use `MatchEvaluator::resetMatchData()` instead.
- ///
- /// # Re-usability of Match Data
- /// A match data populated with a callback function that takes only a jp::NumSub vector is not compatible
- /// with the data created according to callback function with a jp::MapNas vector.
- /// Because, for this later callback, jp::MapNas data is required but is not available (only jp::NumSub is available).
- /// In such cases, previous Match data can not be used to perform a new replacment operation with this second callback function.
- ///
- /// To populate the match vectors, one must call the `MatchEvaluator::match()` or `MatchEvaluator::nreplace()` function, they will populate
- /// vectors with match data according to call back function.
- ///
- /// ## Example:
- ///
- /// ```cpp
- /// jp::String callback5(NumSub const &m, void*, MapNtn const &n){
- /// return m[0];
- /// }
- /// jp::String callback4(void*, void*, MapNtn const &n){
- /// return std::to_string(n.at("name")); //position of group 'name'.
- /// }
- /// jp::String callback2(void*, MapNas const &m, void*){
- /// return m.at('name'); //substring by name
- /// }
- ///
- /// jp::MatchEvaluator me;
- /// me.setRegexObject(&re).setSubject("string").setCallback(callback5).nreplace();
- /// //In above, nreplace() populates jp::NumSub and jp::MapNtn with match data.
- ///
- /// me.setCallback(callback4).nreplace(false);
- /// //the above uses previous match result (note the 'false') which is OK,
- /// //because, callback4 requires jp::MapNtn which was made available in the previous operation.
- ///
- /// //but the following is not OK: (assertion failure)
- /// me.setCallback(callback2).nreplace(false);
- /// //because, callback2 requires jp::MapNas data which is not available.
- /// //now, this is OK:
- /// me.setCallback(callback2).nreplace();
- /// //because, it will recreate those match data including this one (jp::MapNas).
- /// ```
- ///
- /// # Replace options
- /// MatchEvaluator can not take replace options.
- /// Replace options are taken directly by the replace functions: `nreplace()` and `replace()`.
- ///
- /// # Using as a match object
- /// As it's just a subclass of RegexMatch, it can do all the things that RegexMatch can do, with some restrictions:
- /// * matching options are modified to strip off bad options according to replacement (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT).
- /// * match depends on the callback function. Only those vectors will be populated that are implemented by the callback functions so far
- /// (multiple callback function will set multiple match data vectors.)
- /// * match vectors are internal to this class, you can not set them manually (without callback function). (you can get pointers to these vectors
- /// with `getNumberedSubstringVector()` and related functions).
- ///
- ///@see MatchEvaluatorCallback
- ///@see RegexReplace::nreplace()
- class MatchEvaluator: virtual public RegexMatch{
- private:
- friend class RegexReplace;
-
- VecNum vec_num;
- VecNas vec_nas;
- VecNtN vec_ntn;
- VecOff vec_soff;
- VecOff vec_eoff;
- int callbackn;
- typename MatchEvaluatorCallback<void*, void*, void*>::Callback callback0;
- typename MatchEvaluatorCallback<NumSub const &, void*, void*>::Callback callback1;
- typename MatchEvaluatorCallback<void*, MapNas const &, void*>::Callback callback2;
- typename MatchEvaluatorCallback<NumSub const &, MapNas const &, void*>::Callback callback3;
- typename MatchEvaluatorCallback<void*, void*, MapNtN const &>::Callback callback4;
- typename MatchEvaluatorCallback<NumSub const &, void*, MapNtN const &>::Callback callback5;
- typename MatchEvaluatorCallback<void*, MapNas const &, MapNtN const &>::Callback callback6;
- typename MatchEvaluatorCallback<NumSub const &, MapNas const &, MapNtN const &>::Callback callback7;
- //Q: Why the callback names seem random? is it random?
- //A: No, it's not random, NumSub = 1, MapNas = 2, MapNtn = 4, thus:
- // NumSub + MapNas = 3
- // NumSub + MapNtn = 5
- // MapNas + MapNtn = 6
- // NumSub + MapNas + MapNtn = 7
- //Q: Why is it like this?
- //A: It's historical. Once, there was not this many callback declaration, there was only one (a templated one).
- // The nreplace function itself used to calculate a mode value according to available vectors
- // and determine what kind of callback function needed to be called.
- //Q: Why the history changed?
- //A: We had some compatibility issues with the single templated callback.
- // Also, this approach proved to be more readable and robust.
-
- PCRE2_SIZE buffer_size;
-
-
- void init(){
- callbackn = 0;
- callback0 = callback::erase;
- callback1 = 0;
- callback2 = 0;
- callback3 = 0;
- callback4 = 0;
- callback5 = 0;
- callback6 = 0;
- callback7 = 0;
- setMatchStartOffsetVector(&vec_soff);
- setMatchEndOffsetVector(&vec_eoff);
- buffer_size = 0;
- }
-
- void setVectorPointersAccordingToCallback(){
- switch(callbackn){
- case 0: break;
- case 1: setNumberedSubstringVector(&vec_num);break;
- case 2: setNamedSubstringVector(&vec_nas);break;
- case 3: setNumberedSubstringVector(&vec_num).setNamedSubstringVector(&vec_nas);break;
- case 4: setNameToNumberMapVector(&vec_ntn);break;
- case 5: setNumberedSubstringVector(&vec_num).setNameToNumberMapVector(&vec_ntn);break;
- case 6: setNamedSubstringVector(&vec_nas).setNameToNumberMapVector(&vec_ntn);break;
- case 7: setNumberedSubstringVector(&vec_num).setNamedSubstringVector(&vec_nas).setNameToNumberMapVector(&vec_ntn);break;
- }
- }
-
- void onlyCopy(MatchEvaluator const &me){
- callbackn = me.callbackn;
- callback0 = me.callback0;
- callback1 = me.callback1;
- callback2 = me.callback2;
- callback3 = me.callback3;
- callback4 = me.callback4;
- callback5 = me.callback5;
- callback6 = me.callback6;
- callback7 = me.callback7;
- //must update the pointers to point to this class vectors.
- setVectorPointersAccordingToCallback();
- buffer_size = me.buffer_size;
- }
-
- void deepCopy(MatchEvaluator const &me) {
- vec_num = me.vec_num;
- vec_nas = me.vec_nas;
- vec_ntn = me.vec_ntn;
- vec_soff = me.vec_soff;
- vec_eoff = me.vec_eoff;
- onlyCopy(me);
- }
- #if __cplusplus >= 201103L
- void deepMove(MatchEvaluator& me){
- vec_num = std::move_if_noexcept(me.vec_num);
- vec_nas = std::move_if_noexcept(me.vec_nas);
- vec_ntn = std::move_if_noexcept(me.vec_ntn);
- vec_soff = std::move_if_noexcept(me.vec_soff);
- vec_eoff = std::move_if_noexcept(me.vec_eoff);
- onlyCopy(me);
- }
- #endif
-
- //prevent public access to some funcitons
- MatchEvaluator& setNumberedSubstringVector(VecNum* v){
- RegexMatch::setNumberedSubstringVector(v);
- return *this;
- }
- MatchEvaluator& setNamedSubstringVector(VecNas* v){
- RegexMatch::setNamedSubstringVector(v);
- return *this;
- }
- MatchEvaluator& setNameToNumberMapVector(VecNtN* v){
- RegexMatch::setNameToNumberMapVector(v);
- return *this;
- }
- MatchEvaluator& setMatchStartOffsetVector(VecOff* v){
- RegexMatch::setMatchStartOffsetVector(v);
- return *this;
- }
- MatchEvaluator& setMatchEndOffsetVector(VecOff* v){
- RegexMatch::setMatchEndOffsetVector(v);
- return *this;
- }
-
- public:
-
- ///Default constructor.
- ///Sets callback::erase as the callback function.
- ///Removes matched part/s from the subject string if the callback is not
- ///changed.
- /// ```cpp
- /// jp::Regex re("\s*string");
- /// jp::MatchEvaluator me;
- /// std::cout<<
- /// me.setRegexObject(&re);
- /// .setSubject("I am a string");
- /// .nreplace();
- /// //The above will delete ' string' from the subject
- /// //thus the result will be 'I am a'
- /// ```
- explicit
- MatchEvaluator():RegexMatch(){
- init();
- }
-
- ///@overload
- ///...
- ///Constructor taking a Regex object pointer.
- ///It sets the associated Regex object and
- ///initializes the MatchEvaluator object with
- ///callback::erase callback function.
- ///Underlying data is not modified.
- ///@param r constant Regex pointer.
- explicit
- MatchEvaluator(Regex const *r):RegexMatch(r){
- init();
- }
-
- ///@overload
- ///...
- ///Constructor taking a callback function.
- ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function.
- ///@param mef Callback function.
- explicit
- MatchEvaluator(typename MatchEvaluatorCallback<void*, void*, void*>::Callback mef): RegexMatch(){
- init();
- setCallback(mef);
- }
-
- ///@overload
- /// ...
- ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function.
- ///@param mef Callback function.
- explicit
- MatchEvaluator(typename MatchEvaluatorCallback<NumSub const &, void*, void*>::Callback mef): RegexMatch(){
- init();
- setCallback(mef);
- }
-
- ///@overload
- /// ...
- ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function.
- ///@param mef Callback function.
- explicit
- MatchEvaluator(typename MatchEvaluatorCallback<NumSub const &, MapNas const &, void*>::Callback mef): RegexMatch(){
- init();
- setCallback(mef);
- }
-
- ///@overload
- /// ...
- ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function.
- ///@param mef Callback function.
- explicit
- MatchEvaluator(typename MatchEvaluatorCallback<NumSub const &, void*, MapNtN const &>::Callback mef): RegexMatch(){
- init();
- setCallback(mef);
- }
-
- ///@overload
- /// ...
- ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function.
- ///@param mef Callback function.
- explicit
- MatchEvaluator(typename MatchEvaluatorCallback<NumSub const &, MapNas const &, MapNtN const &>::Callback mef): RegexMatch(){
- init();
- setCallback(mef);
- }
-
- ///@overload
- /// ...
- ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function.
- ///@param mef Callback function.
- explicit
- MatchEvaluator(typename MatchEvaluatorCallback<void*, MapNas const &, void*>::Callback mef): RegexMatch(){
- init();
- setCallback(mef);
- }
-
-
- ///@overload
- /// ...
- ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function.
- ///@param mef Callback function.
- explicit
- MatchEvaluator(typename MatchEvaluatorCallback<void*, MapNas const &, MapNtN const &>::Callback mef): RegexMatch(){
- init();
- setCallback(mef);
- }
-
-
-
- ///@overload
- /// ...
- ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function.
- ///@param mef Callback function.
- explicit
- MatchEvaluator(typename MatchEvaluatorCallback<void*, void*, MapNtN const &>::Callback mef): RegexMatch(){
- init();
- setCallback(mef);
- }
-
-
-
- ///@overload
- /// ...
- ///Copy constructor.
- ///@param me Reference to MatchEvaluator object
- MatchEvaluator(MatchEvaluator const &me): RegexMatch(me){
- init();
- deepCopy(me);
- }
-
- ///Overloaded copy-assignment operator
- ///@param me MatchEvaluator object
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& operator=(MatchEvaluator const &me){
- if(this == &me) return *this;
- RegexMatch::operator=(me);
- deepCopy(me);
- return *this;
- }
-
- #if __cplusplus >= 201103L
-
- ///@overload
- /// ...
- ///Move constructor.
- ///This constructor steals resources from the argument.
- ///It leaves the argument in a valid but indeterminate sate.
- ///The indeterminate state can be returned to normal by calling reset() on that object.
- ///@param me rvalue reference to a MatchEvaluator object
- MatchEvaluator(MatchEvaluator&& me): RegexMatch(me){
- init();
- deepMove(me);
- }
-
- ///@overload
- ///...
- ///Overloaded move-assignment operator.
- ///It steals resources from the argument.
- ///It leaves the argument in a valid but indeterminate sate.
- ///The indeterminate state can be returned to normal by calling reset() on that object.
- ///@param me rvalue reference to a MatchEvaluator object
- ///@return A reference to the calling MatchEvaluator object.
- ///@see MatchEvaluator(MatchEvaluator&& me)
- MatchEvaluator& operator=(MatchEvaluator&& me){
- if(this == &me) return *this;
- RegexMatch::operator=(me);
- deepMove(me);
- return *this;
- }
-
- #endif
-
- virtual ~MatchEvaluator(){}
-
- ///Member function to set a callback function with no vector reference.
- ///Callback function is always overwritten. The implemented vectors are set to be filled with match data.
- ///Other vectors that were set previously, are not unset and thus they will be filled with match data too
- ///when `match()` or `nreplace()` is called.
- ///@param mef Callback function.
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& setCallback(typename MatchEvaluatorCallback<void*, void*, void*>::Callback mef){
- callback0 = mef;
- callbackn = 0;
- return *this;
- }
-
- ///@overload
- /// ...
- ///Sets a callback function with a jp::NumSub vector.
- ///You will be working with a reference to the constant vector.
- ///@param mef Callback function.
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& setCallback(typename MatchEvaluatorCallback<NumSub const &, void*, void*>::Callback mef){
- callback1 = mef;
- callbackn = 1;
- setNumberedSubstringVector(&vec_num);
- return *this;
- }
-
- ///@overload
- /// ...
- ///Sets a callback function with a jp::NumSub and jp::MapNas.
- ///You will be working with references of the constant vectors.
- ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead:
- ///```cpp
- ///map_nas["word"]; //wrong
- ///map_nas.at("word"); //ok
- ///```
- ///If you want to use `[]` operator with maps, make a copy:
- ///```cpp
- ///jp::MapNas mn = map_nas;
- ///mn["word"]; //ok
- ///```
- ///@param mef Callback function.
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& setCallback(typename MatchEvaluatorCallback<NumSub const &, MapNas const &, void*>::Callback mef){
- callback3 = mef;
- callbackn = 3;
- setNumberedSubstringVector(&vec_num);
- setNamedSubstringVector(&vec_nas);
- return *this;
- }
-
- ///@overload
- /// ...
- ///Sets a callback function with a jp::NumSub and jp::MapNtN.
- ///You will be working with references of the constant vectors.
- ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead:
- ///```cpp
- ///map_ntn["word"]; //wrong
- ///map_ntn.at("word"); //ok
- ///```
- ///If you want to use `[]` operator with maps, make a copy:
- ///```cpp
- ///jp::MapNtN mn = map_ntn;
- ///mn["word"]; //ok
- ///```
- ///@param mef Callback function.
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& setCallback(typename MatchEvaluatorCallback<NumSub const &, void*, MapNtN const &>::Callback mef){
- callback5 = mef;
- callbackn = 5;
- setNumberedSubstringVector(&vec_num);
- setNameToNumberMapVector(&vec_ntn);
- return *this;
- }
-
-
- ///@overload
- /// ...
- ///Sets a callback function with a jp::NumSub, jp::MapNas, jp::MapNtN.
- ///You will be working with references of the constant vectors.
- ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead:
- ///```cpp
- ///map_nas["word"]; //wrong
- ///map_nas.at("word"); //ok
- ///```
- ///If you want to use `[]` operator with maps, make a copy:
- ///```cpp
- ///jp::MapNas mn = map_nas;
- ///mn["word"]; //ok
- ///```
- ///@param mef Callback function.
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& setCallback(typename MatchEvaluatorCallback<NumSub const &, MapNas const &, MapNtN const &>::Callback mef){
- callback7 = mef;
- callbackn = 7;
- setNumberedSubstringVector(&vec_num);
- setNamedSubstringVector(&vec_nas);
- setNameToNumberMapVector(&vec_ntn);
- return *this;
- }
-
- ///@overload
- /// ...
- ///Sets a callback function with a jp::MapNas.
- ///You will be working with reference of the constant vector.
- ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead:
- ///```cpp
- ///map_nas["word"]; //wrong
- ///map_nas.at("word"); //ok
- ///```
- ///If you want to use `[]` operator with maps, make a copy:
- ///```cpp
- ///jp::MapNas mn = map_nas;
- ///mn["word"]; //ok
- ///```
- ///@param mef Callback function.
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& setCallback(typename MatchEvaluatorCallback<void*, MapNas const &, void*>::Callback mef){
- callback2 = mef;
- callbackn = 2;
- setNamedSubstringVector(&vec_nas);
- return *this;
- }
-
- ///@overload
- /// ...
- ///Sets a callback function with a jp::MapNas, jp::MapNtN.
- ///You will be working with reference of the constant vector.
- ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead:
- ///```cpp
- ///map_nas["word"]; //wrong
- ///map_nas.at("word"); //ok
- ///```
- ///If you want to use `[]` operator with maps, make a copy:
- ///```cpp
- ///jp::MapNas mn = map_nas;
- ///mn["word"]; //ok
- ///```
- ///@param mef Callback function.
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& setCallback(typename MatchEvaluatorCallback<void*, MapNas const &, MapNtN const &>::Callback mef){
- callback6 = mef;
- callbackn = 6;
- setNamedSubstringVector(&vec_nas);
- setNameToNumberMapVector(&vec_ntn);
- return *this;
- }
-
- ///@overload
- /// ...
- ///Sets a callback function with a jp::MapNtN.
- ///You will be working with references of the constant vectors.
- ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead:
- ///```cpp
- ///map_ntn["word"]; //wrong
- ///map_ntn.at("word"); //ok
- ///```
- ///If you want to use `[]` operator with maps, make a copy:
- ///```cpp
- ///jp::MapNtN mn = map_ntn;
- ///mn["word"]; //ok
- ///```
- ///@param mef Callback function.
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& setCallback(typename MatchEvaluatorCallback<void*, void*, MapNtN const &>::Callback mef){
- callback4 = mef;
- callbackn = 4;
- setNameToNumberMapVector(&vec_ntn);
- return *this;
- }
-
- ///Clear match data.
- ///It clears all match data from all vectors (without shrinking).
- ///For shrinking the vectors, use `resetMatchData()`
- ///A call to `match()` or nreplace() will be required to produce match data again.
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& clearMatchData(){
- vec_num.clear();
- vec_nas.clear();
- vec_ntn.clear();
- vec_soff.clear();
- vec_eoff.clear();
- return *this;
- }
-
- ///Reset match data to initial state.
- ///It deletes all match data from all vectors shrinking their capacity.
- ///A call to `match()` or nreplace() will be required to produce match data again.
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& resetMatchData(){
- VecNum().swap(vec_num);
- VecNas().swap(vec_nas);
- VecNtN().swap(vec_ntn);
- VecOff().swap(vec_soff);
- VecOff().swap(vec_eoff);
- return *this;
- }
-
-
- ///Reset MatchEvaluator to initial state including memory.
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& reset(){
- RegexMatch::reset();
- resetMatchData();
- init();
- return *this;
- }
-
- ///Clears MatchEvaluator.
- ///Returns everything to initial state (some memory may retain for further and faster use).
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& clear(){
- RegexMatch::clear();
- clearMatchData();
- init();
- return *this;
- }
-
- ///Call RegexMatch::resetErrors().
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& resetErrors(){
- RegexMatch::resetErrors();
- return *this;
- }
-
- ///Call RegexMatch::setRegexObject(r).
- ///@param r constant Regex object pointer
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& setRegexObject (Regex const *r){
- RegexMatch::setRegexObject(r);
- return *this;
- }
-
- ///Call RegexMatch::setSubject(String const &s).
- ///@param s subject string.
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& setSubject (String const &s){
- RegexMatch::setSubject(s);
- return *this;
- }
-
- ///@overload
- ///@param s constant subject string by pointer
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& setSubject (String const *s){
- RegexMatch::setSubject(s);
- return *this;
- }
-
- ///Call RegexMatch::setModifier(Modifier const& s).
- ///@param s modifier string.
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& setModifier (Modifier const& s){
- RegexMatch::setModifier(s);
- return *this;
- }
-
- ///Call RegexMatch::setModifierTable(ModifierTable const * s).
- ///@param mdt pointer to ModifierTable object.
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& setModifierTable (ModifierTable const * mdt){
- RegexMatch::setModifierTable(mdt);
- return *this;
- }
-
- ///Call RegexMatch::setJpcre2Option(Uint x).
- ///@param x JPCRE2 option value.
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& setJpcre2Option (Uint x){
- RegexMatch::setJpcre2Option(x);
- return *this;
- }
-
- ///Call RegexMatch::setPcre2Option (Uint x).
- ///@param x PCRE2 option value.
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& setPcre2Option (Uint x){
- RegexMatch::setPcre2Option(x);
- return *this;
- }
-
- ///Call RegexMatch::setFindAll(bool x).
- ///@param x true if global match, false otherwise.
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& setFindAll (bool x){
- RegexMatch::setFindAll(x);
- return *this;
- }
-
- ///Call RegexMatch::setFindAll().
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& setFindAll(){
- RegexMatch::setFindAll();
- return *this;
- }
-
- ///Call RegexMatch::setStartOffset (PCRE2_SIZE offset).
- ///@param offset match start offset in the subject.
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& setStartOffset (PCRE2_SIZE offset){
- RegexMatch::setStartOffset(offset);
- return *this;
- }
-
- ///Call RegexMatch::setMatchContext(MatchContext *match_context).
- ///@param match_context pointer to match context.
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& setMatchContext (MatchContext *match_context){
- RegexMatch::setMatchContext(match_context);
- return *this;
- }
-
- ///Call RegexMatch::setMatchDataBlock(MatchContext * mdt);
- ///@param mdt pointer to match data block
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& setMatchDataBlock(MatchData* mdt){
- RegexMatch::setMatchDataBlock(mdt);
- return *this;
- }
-
- ///Set the buffer size that will be used by pcre2_substitute (replace()).
- ///If buffer size proves to be enough to fit the resultant string
- ///from each match (not the total resultant string), it will yield one less call
- ///to pcre2_substitute for each match.
- ///@param x buffer size.
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& setBufferSize(PCRE2_SIZE x){
- buffer_size = x;
- return *this;
- }
-
- ///Get the initial buffer size that is being used by internal function pcre2_substitute
- ///@return buffer_size
- PCRE2_SIZE getBufferSize(){
- return buffer_size;
- }
-
- ///Call RegexMatch::changeModifier(Modifier const& mod, bool x).
- ///@param mod modifier string.
- ///@param x true (add) or false (remove).
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& changeModifier (Modifier const& mod, bool x){
- RegexMatch::changeModifier(mod, x);
- return *this;
- }
-
- ///Call RegexMatch::changeJpcre2Option(Uint opt, bool x).
- ///@param opt JPCRE2 option
- ///@param x true (add) or false (remove).
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& changeJpcre2Option (Uint opt, bool x){
- RegexMatch::changeJpcre2Option(opt, x);
- return *this;
- }
-
- ///Call RegexMatch::changePcre2Option(Uint opt, bool x).
- ///@param opt PCRE2 option.
- ///@param x true (add) or false (remove).
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& changePcre2Option (Uint opt, bool x){
- RegexMatch::changePcre2Option(opt, x);
- return *this;
- }
-
- ///Call RegexMatch::addModifier(Modifier const& mod).
- ///@param mod modifier string.
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& addModifier (Modifier const& mod){
- RegexMatch::addModifier(mod);
- return *this;
- }
-
- ///Call RegexMatch::addJpcre2Option(Uint x).
- ///@param x JPCRE2 option.
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& addJpcre2Option (Uint x){
- RegexMatch::addJpcre2Option(x);
- return *this;
- }
-
- ///Call RegexMatch::addPcre2Option(Uint x).
- ///@param x PCRE2 option.
- ///@return A reference to the calling MatchEvaluator object.
- MatchEvaluator& addPcre2Option (Uint x){
- RegexMatch::addPcre2Option(x);
- return *this;
- }
-
- ///Perform match and return the match count.
- ///This function strips off matching options (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT) that are considered
- ///bad options for replacement operation and then calls
- ///RegexMatch::match() to perform the match.
- ///@return match count.
- SIZE_T match(void){
- //remove bad matching options
- RegexMatch::changePcre2Option(PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT, false);
- return RegexMatch::match();
- }
-
- ///Perform regex replace with this match evaluator.
- ///This is a JPCRE2 native replace function (thus the name nreplace).
- ///It uses the `MatchEvaluatorCallback` function that was set with a constructor or `MatchEvaluator::setCallback()` function
- ///to generate the replacement strings on the fly.
- ///The string returned by the callback function will be treated as literal and will
- ///not go through any further processing.
- ///
- ///This function performs a new match everytime it is called unless it is passed with a boolean `false` as the first argument.
- ///To use existing match data that was created by a previous `MatchEvaluator::nreplace()` or `MatchEvaluator::match()`, call this
- ///function with boolean `false` as the first argument.
- ///
- ///## Complexity
- /// 1. Changes in replace related option takes effect without a re-match.
- /// 2. Changes in match related option (e.g start offset) needs a re-match to take effect.
- /// 3. To re-use existing match data, callback function must be compatible with the data, otherwise assertion error.
- /// 4. If the associated Regex object or subject string changes, a new match must be performed,
- /// trying to use the existing match data in such cases is undefined behavior.
- ///
- ///@param do_match Perform a new matching operation if true, otherwise use existing match data.
- ///@param jro JPCRE2 replace options.
- ///@param counter Pointer to a counter to store the number of replacement done.
- ///@return resultant string after replace.
- ///@see MatchEvaluator.
- ///@see MatchEvaluatorCallback.
- String nreplace(bool do_match=true, Uint jro=0, SIZE_T* counter=0);
-
- ///PCRE2 compatible replace function that uses this MatchEvaluator.
- ///Performs regex replace with pcre2_substitute function
- ///by generating the replacement strings dynamically with MatchEvaluator callback.
- ///The string returned by callback function is processed by internal pcre2_substitute, thus allowing
- ///all options that are provided by PCRE2 itself.
- ///
- ///This function performs a new match everytime it is called unless it is passed with a boolean `false` as the first argument.
- ///
- ///## Complexity
- /// 1. Changes in replace related option takes effect without a re-match.
- /// 2. Changes in match related option (e.g start offset) needs a re-match to take effect.
- /// 3. To re-use existing match data, callback function must be compatible with the data, otherwise assertion error.
- /// 4. If the associated Regex object or subject string changes, a new match must be performed,
- /// trying to use the existing match data in such cases is undefined behavior.
- ///
- ///@param do_match perform a new match if true, otherwise use existing data.
- ///@param ro replace related PCRE2 options.
- ///@param counter Pointer to a counter to store the number of replacement done.
- ///@return resultant string after replacement.
- String replace(bool do_match=true, Uint ro=0, SIZE_T* counter=0);
- };
-
- /** Provides public constructors to create RegexReplace objects.
- * Every RegexReplace object should be associated with a Regex object.
- * This class stores a pointer to its' associated Regex object, thus when
- * the content of the associated Regex object is changed, there's no need to
- * set the pointer again.
- *
- * Examples:
- *
- * ```cpp
- * jp::Regex re;
- * jp::RegexReplace rr;
- * rr.setRegexObject(&re);
- * rr.replace("subject", "me"); // returns 'subject'
- * re.compile("\\w+");
- * rr.replace(); // replaces 'subject' with 'me' i.e returns 'me'
- * ```
- */
- class RegexReplace {
- private:
- friend class Regex;
-
- Regex const *re;
- String r_subject;
- String *r_subject_ptr; //preplace method modifies it in-place
- String r_replw;
- String const *r_replw_ptr;
- Uint replace_opts;
- Uint jpcre2_replace_opts;
- PCRE2_SIZE buffer_size;
- PCRE2_SIZE _start_offset;
- MatchData *mdata;
- MatchContext *mcontext;
- ModifierTable const * modtab;
- SIZE_T last_replace_count;
- SIZE_T* last_replace_counter;
-
- void init_vars() {
- re = 0;
- r_subject_ptr = &r_subject;
- r_replw_ptr = &r_replw;
- replace_opts = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH;
- jpcre2_replace_opts = 0;
- buffer_size = 0;
- error_number = 0;
- error_offset = 0;
- _start_offset = 0;
- mdata = 0;
- mcontext = 0;
- modtab = 0;
- last_replace_count = 0;
- last_replace_counter = &last_replace_count;
- }
-
- void onlyCopy(RegexReplace const &rr){
- re = rr.re; //only pointer should be copied.
-
- //rr.r_subject_ptr may point to rr.r_subject or other user data
- r_subject_ptr = (rr.r_subject_ptr == &rr.r_subject) ? &r_subject //not rr.r_subject
- : rr.r_subject_ptr; //other user data
-
- r_replw = rr.r_replw;
- //rr.r_replw_ptr may point to rr.r_replw or other user data
- r_replw_ptr = (rr.r_replw_ptr == &rr.r_replw) ? &r_replw //not rr.r_replw
- : rr.r_replw_ptr; //other user data
-
- replace_opts = rr.replace_opts;
- jpcre2_replace_opts = rr.jpcre2_replace_opts;
- buffer_size = rr.buffer_size;
- error_number = rr.error_number;
- error_offset = rr.error_offset;
- _start_offset = rr._start_offset;
- mdata = rr.mdata;
- mcontext = rr.mcontext;
- modtab = rr.modtab;
- last_replace_count = rr.last_replace_count;
- last_replace_counter = (rr.last_replace_counter == &rr.last_replace_count) ? &last_replace_count
- : rr.last_replace_counter;
- }
- void deepCopy(RegexReplace const &rr){
- r_subject = rr.r_subject;
- onlyCopy(rr);
- }
-
- #if __cplusplus >= 201103L
- void deepMove(RegexReplace& rr){
- r_subject = std::move_if_noexcept(rr.r_subject);
- onlyCopy(rr);
- }
- #endif
-
-
- protected:
-
- int error_number;
- PCRE2_SIZE error_offset;
- public:
- ///Default constructor
- RegexReplace(){
- init_vars();
- }
- ///@overload
- /// ...
- ///Creates a RegexReplace object associating a Regex object.
- ///Regex object is not modified.
- ///@param r pointer to a Regex object
- RegexReplace(Regex const *r) {
- init_vars();
- re = r;
- }
-
- ///@overload
- ///...
- ///Copy constructor.
- ///@param rr RegexReplace object reference
- RegexReplace(RegexReplace const &rr){
- init_vars();
- deepCopy(rr);
- }
-
- ///Overloaded Copy assignment operator.
- ///@param rr RegexReplace object reference
- ///@return A reference to the calling RegexReplace object
- RegexReplace& operator=(RegexReplace const &rr){
- if(this == &rr) return *this;
- deepCopy(rr);
- return *this;
- }
-
- #if __cplusplus >= 201103L
-
- ///@overload
- ///...
- ///Move constructor.
- ///This constructor steals resources from the argument.
- ///It leaves the argument in a valid but indeterminate sate.
- ///The indeterminate state can be returned to normal by calling reset() on that object.
- ///@param rr rvalue reference to a RegexReplace object reference
- RegexReplace(RegexReplace&& rr){
- init_vars();
- deepMove(rr);
- }
-
- ///@overload
- ///...
- ///Overloaded move assignment operator.
- ///This constructor steals resources from the argument.
- ///It leaves the argument in a valid but indeterminate sate.
- ///The indeterminate state can be returned to normal by calling reset() on that object.
- ///@param rr rvalue reference to a RegexReplace object reference
- ///@return A reference to the calling RegexReplace object
- RegexReplace& operator=(RegexReplace&& rr){
- if(this == &rr) return *this;
- deepMove(rr);
- return *this;
- }
-
- #endif
-
- virtual ~RegexReplace() {}
-
- ///Reset all class variables to its default (initial) state including memory.
- ///@return Reference to the calling RegexReplace object.
- RegexReplace& reset() {
- String().swap(r_subject);
- String().swap(r_replw);
- init_vars();
- return *this;
- }
-
- ///Clear all class variables to its default (initial) state (some memory may retain for further use).
- ///@return Reference to the calling RegexReplace object.
- RegexReplace& clear() {
- r_subject.clear();
- r_replw.clear();
- init_vars();
- return *this;
- }
-
- ///Reset replace related errors to zero.
- ///@return Reference to the calling RegexReplace object
- ///@see Regex::resetErrors()
- ///@see RegexMatch::resetErrors()
- RegexReplace& resetErrors(){
- error_number = 0;
- error_offset = 0;
- return *this;
- }
- /// Returns the last error number
- ///@return Last error number
- int getErrorNumber() const {
- return error_number;
- }
- /// Returns the last error offset
- ///@return Last error offset
- int getErrorOffset() const {
- return (int)error_offset;
- }
-
- /// Returns the last error message
- ///@return Last error message
- String getErrorMessage() const {
- #if __cplusplus >= 201103L
- return select<Char, Map>::getErrorMessage(error_number, error_offset);
- #else
- return select<Char>::getErrorMessage(error_number, error_offset);
- #endif
- }
-
- /// Get replacement string
- ///@return replacement string
- String getReplaceWith() const {
- return *r_replw_ptr;
- }
-
- /// Get pointer to replacement string
- ///@return pointer to replacement string
- String const * getReplaceWithPointer() const {
- return r_replw_ptr;
- }
- /// Get subject string
- ///@return subject string
- ///@see RegexMatch::getSubject()
- String getSubject() const {
- return *r_subject_ptr;
- }
- /// Get pointer to subject string
- ///@return Pointer to constant subject string
- ///@see RegexMatch::getSubjectPointer()
- String const * getSubjectPointer() const {
- return r_subject_ptr;
- }
-
- /// Calculate modifier string from PCRE2 and JPCRE2 options and return it.
- ///
- /// Do remember that modifiers (or PCRE2 and JPCRE2 options) do not change or get initialized
- /// as long as you don't do that explicitly. Calling RegexReplace::setModifier() will re-set them.
- ///
- /// **Mixed or combined modifier**.
- ///
- /// Some modifier may include other modifiers i.e they have the same meaning of some modifiers
- /// combined together. For example, the 'n' modifier includes the 'u' modifier and together they
- /// are equivalent to `PCRE2_UTF | PCRE2_UCP`. When you set a modifier like this, both options
- /// get set, and when you remove the 'n' modifier (with `RegexReplace::changeModifier()`), both will get removed.
- /// @return Calculated modifier string (std::string)
- ///@see RegexMatch::getModifier()
- ///@see Regex::getModifier()
- std::string getModifier() const {
- return modtab ? modtab->fromReplaceOption(replace_opts, jpcre2_replace_opts)
- : MOD::fromReplaceOption(replace_opts, jpcre2_replace_opts);
- }
-
- ///Get the modifier table that is set,
- ///@return constant ModifierTable pointer.
- ModifierTable const* getModifierTable(){
- return modtab;
- }
-
- ///Get start offset.
- ///@return the start offset where matching starts for replace operation
- PCRE2_SIZE getStartOffset() const {
- return _start_offset;
- }
-
- /// Get PCRE2 option
- ///@return PCRE2 option for replace
- ///@see Regex::getPcre2Option()
- ///@see RegexMatch::getPcre2Option()
- Uint getPcre2Option() const {
- return replace_opts;
- }
- /// Get JPCRE2 option
- ///@return JPCRE2 option for replace
- ///@see Regex::getJpcre2Option()
- ///@see RegexMatch::getJpcre2Option()
- Uint getJpcre2Option() const {
- return jpcre2_replace_opts;
- }
-
- ///Get a pointer to the associated Regex object.
- ///If no actual Regex object is associated, null is returned
- ///@return A pointer to the associated constant Regex object or null
- Regex const * getRegexObject() const {
- return re;
- }
-
- ///Return pointer to the match context that was previously set with setMatchContext().
- ///Handling memory is the callers' responsibility.
- ///@return pointer to the match context (default: null).
- MatchContext* getMatchContext(){
- return mcontext;
- }
-
- ///Get the pointer to the match data block that was set previously with setMatchData()
- ///Handling memory is the callers' responsibility.
- ///@return pointer to the match data (default: null).
- virtual MatchData* getMatchDataBlock(){
- return mdata;
- }
-
- ///Get the initial buffer size that is being used by internal function pcre2_substitute
- ///@return buffer_size
- PCRE2_SIZE getBufferSize(){
- return buffer_size;
- }
-
- ///Get the number of replacement in last replace operation.
- ///If you set an external counter with RegexReplace::setReplaceCounter(),
- ///a call to this getter method will dereference the pointer to the external counter
- ///and return the value.
- ///@return Last replace count
- SIZE_T getLastReplaceCount(){
- return *last_replace_counter;
- }
-
- ///Set an external counter variable to store the replacement count.
- ///This counter will be updated after each replacement operation on this object.
- ///A call to this method will reset the internal counter to 0, thus when you reset the counter
- ///to internal counter (by giving null as param), the previous replace count won't be available.
- ///@param counter Pointer to a counter variable. Null sets the counter to default internal counter.
- ///@return Reference to the calling RegexReplace object.
- RegexReplace& setReplaceCounter(SIZE_T* counter){
- last_replace_count = 0;
- last_replace_counter = counter ? counter : &last_replace_count;
- return *this;
- }
-
- ///Set the associated Regex object.
- ///Regex object is not modified.
- ///@param r Pointer to a Regex object.
- ///@return Reference to the calling RegexReplace object.
- RegexReplace& setRegexObject(Regex const *r){
- re = r;
- return *this;
- }
-
- /// Set the subject string for replace.
- ///This makes a copy of the string. If no copy is desired or you are working
- ///with big text, consider passing by pointer.
- ///@param s Subject string
- ///@return Reference to the calling RegexReplace object
- ///@see RegexMatch::setSubject()
- RegexReplace& setSubject(String const &s) {
- r_subject = s;
- r_subject_ptr = &r_subject; //must overwrite
- return *this;
- }
-
- ///@overload
- ///...
- /// Set pointer to the subject string for replace, null pointer unsets it.
- /// The underlined data is not modified unless RegexReplace::preplace() method is used.
- ///@param s Pointer to subject string
- ///@return Reference to the calling RegexReplace object
- ///@see RegexMatch::setSubject()
- RegexReplace& setSubject(String *s) {
- if(s) r_subject_ptr = s;
- else {
- r_subject.clear();
- r_subject_ptr = &r_subject;
- }
- return *this;
- }
-
- /// Set the replacement string.
- ///`$` is a special character which implies captured group.
- ///
- ///1. A numbered substring can be referenced with `$n` or `${n}` where n is the group number.
- ///2. A named substring can be referenced with `${name}`, where 'name' is the group name.
- ///3. A literal `$` can be given as `$$`.
- ///
- ///**Note:** This function makes a copy of the string. If no copy is desired or
- ///you are working with big text, consider passing the string with pointer.
- ///
- ///@param s String to replace with
- ///@return Reference to the calling RegexReplace object
- RegexReplace& setReplaceWith(String const &s) {
- r_replw = s;
- r_replw_ptr = &r_replw; //must overwrite
- return *this;
- }
-
- ///@overload
- ///...
- ///@param s Pointer to the string to replace with, null pointer unsets it.
- ///@return Reference to the calling RegexReplace object
- RegexReplace& setReplaceWith(String const *s) {
- if(s) r_replw_ptr = s;
- else {
- r_replw.clear();
- r_replw_ptr = &r_replw;
- }
- return *this;
- }
-
- /// Set the modifier string (resets all JPCRE2 and PCRE2 options) by calling RegexReplace::changeModifier().
- ///@param s Modifier string.
- ///@return Reference to the calling RegexReplace object
- ///@see RegexMatch::setModifier()
- ///@see Regex::setModifier()
- RegexReplace& setModifier(Modifier const& s) {
- replace_opts = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH; /* must not be initialized to 0 */
- jpcre2_replace_opts = 0;
- return changeModifier(s, true);
- }
-
- ///Set a custom modifier table to be used.
- ///@param mdt pointer to ModifierTable object.
- /// @return Reference to the calling RegexReplace object.
- RegexReplace& setModifierTable(ModifierTable const * mdt){
- modtab = mdt;
- return *this;
- }
- /// Set the initial buffer size to be allocated for replaced string (used by PCRE2)
- ///@param x Buffer size
- ///@return Reference to the calling RegexReplace object
- RegexReplace& setBufferSize(PCRE2_SIZE x) {
- buffer_size = x;
- return *this;
- }
-
- ///Set start offset.
- ///Set the offset where matching starts for replace operation
- ///@param start_offset The offset where matching starts for replace operation
- ///@return Reference to the calling RegexReplace object
- RegexReplace& setStartOffset(PCRE2_SIZE start_offset){
- _start_offset = start_offset;
- return *this;
- }
- /// Set JPCRE2 option for replace (overwrite existing option)
- ///@param x Option value
- ///@return Reference to the calling RegexReplace object
- ///@see RegexMatch::setJpcre2Option()
- ///@see Regex::setJpcre2Option()
- RegexReplace& setJpcre2Option(Uint x) {
- jpcre2_replace_opts = x;
- return *this;
- }
- /// Set PCRE2 option replace (overwrite existing option)
- ///@param x Option value
- ///@return Reference to the calling RegexReplace object
- ///@see RegexMatch::setPcre2Option()
- ///@see Regex::setPcre2Option()
- RegexReplace& setPcre2Option(Uint x) {
- replace_opts = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH | x;
- return *this;
- }
-
- ///Set the match context to be used.
- ///Native PCRE2 API may be used to create match context.
- ///The memory of the match context is not handled by RegexReplace object and not freed.
- ///User will be responsible for freeing memory.
- ///@param match_context Pointer to match context.
- ///@return Reference to the calling RegexReplace object.
- RegexReplace& setMatchContext(MatchContext * match_context){
- mcontext = match_context;
- return *this;
- }
-
- ///Set the match data block to be used.
- ///Native PCRE2 API may be used to create match data block.
- ///The memory of the match data is not handled by RegexReplace object and not freed.
- ///User will be responsible for creating/freeing memory.
- ///@param match_data Pointer to match data.
- ///@return Reference to the calling RegexReplace object.
- RegexReplace& setMatchDataBlock(MatchData *match_data){
- mdata = match_data;
- return *this;
- }
- /// After a call to this function PCRE2 and JPCRE2 options will be properly set.
- /// This function does not initialize or re-initialize options.
- /// If you want to set options from scratch, initialize them to 0 before calling this function.
- ///
- /// If invalid modifier is detected, then the error number for the RegexReplace
- /// object will be jpcre2::ERROR::INVALID_MODIFIER and error offset will be the modifier character.
- /// You can get the message with RegexReplace::getErrorMessage() function.
- /// @param mod Modifier string.
- /// @param x Whether to add or remove option
- /// @return Reference to the RegexReplace object
- /// @see Regex::changeModifier()
- /// @see RegexMatch::changeModifier()
- RegexReplace& changeModifier(Modifier const& mod, bool x){
- modtab ? modtab->toReplaceOption(mod, x, &replace_opts, &jpcre2_replace_opts, &error_number, &error_offset)
- : MOD::toReplaceOption(mod, x, &replace_opts, &jpcre2_replace_opts, &error_number, &error_offset);
- return *this;
- }
-
- /// Parse modifier and add/remove equivalent PCRE2 and JPCRE2 options.
- /// Add or remove a JPCRE2 option
- /// @param opt JPCRE2 option value
- /// @param x Add the option if it's true, remove otherwise.
- /// @return Reference to the calling RegexReplace object
- /// @see RegexMatch::changeJpcre2Option()
- /// @see Regex::changeJpcre2Option()
- RegexReplace& changeJpcre2Option(Uint opt, bool x) {
- jpcre2_replace_opts = x ? jpcre2_replace_opts | opt : jpcre2_replace_opts & ~opt;
- return *this;
- }
- /// Add or remove a PCRE2 option
- /// @param opt PCRE2 option value
- /// @param x Add the option if it's true, remove otherwise.
- /// @return Reference to the calling RegexReplace object
- /// @see RegexMatch::changePcre2Option()
- /// @see Regex::changePcre2Option()
- RegexReplace& changePcre2Option(Uint opt, bool x) {
- replace_opts = x ? replace_opts | opt : replace_opts & ~opt;
- //replace_opts |= PCRE2_SUBSTITUTE_OVERFLOW_LENGTH; /* It's important, but let user override it. */
- return *this;
- }
-
- /// Parse modifier string and add equivalent PCRE2 and JPCRE2 options.
- /// This is just a wrapper of the original function RegexReplace::changeModifier()
- /// provided for convenience.
- /// @param mod Modifier string.
- /// @return Reference to the calling RegexReplace object
- /// @see RegexMatch::addModifier()
- /// @see Regex::addModifier()
- RegexReplace& addModifier(Modifier const& mod){
- return changeModifier(mod, true);
- }
- /// Add specified JPCRE2 option to existing options for replace.
- ///@param x Option value
- ///@return Reference to the calling RegexReplace object
- ///@see RegexMatch::addJpcre2Option()
- ///@see Regex::addJpcre2Option()
- RegexReplace& addJpcre2Option(Uint x) {
- jpcre2_replace_opts |= x;
- return *this;
- }
-
- /// Add specified PCRE2 option to existing options for replace
- ///@param x Option value
- ///@return Reference to the calling RegexReplace object
- ///@see RegexMatch::addPcre2Option()
- ///@see Regex::addPcre2Option()
- RegexReplace& addPcre2Option(Uint x) {
- replace_opts |= x;
- return *this;
- }
-
- /// Perform regex replace by retrieving subject string, replacement string, modifier and other options from class variables.
- /// In the replacement string (see RegexReplace::setReplaceWith()) `$` is a special character which implies captured group.
- /// 1. A numbered substring can be referenced with `$n` or `${n}` where n is the group number.
- /// 2. A named substring can be referenced with `${name}`, where 'name' is the group name.
- /// 3. A literal `$` can be given as `$$`.
- /// 4. Bash like features: ${<n>:-<string>} and ${<n>:+<string1>:<string2>}, where <n> is a group number or name.
- ///
- ///All options supported by pcre2_substitute is available.
- ///
- /// Note: This function calls pcre2_substitute() to do the replacement.
- ///@return Replaced string
- String replace(void);
-
- /// Perl compatible replace method.
- /// Modifies subject string in-place and returns replace count.
- ///
- /// The replacement is performed with `RegexReplace::replace()` which uses `pcre2_substitute()`.
- /// @return replace count
- SIZE_T preplace(void){
- *r_subject_ptr = replace();
- return *last_replace_counter;
- }
-
- /// Perl compatible replace method with match evaluator.
- /// Modifies subject string in-place and returns replace count.
- /// MatchEvaluator class does not have a implementation of this replace method, thus it is not possible
- /// to re-use match data with preplace() method.
- /// Re-using match data with preplace doesn't actually make any sense, because new subject will
- /// always require new match data.
- ///
- /// The replacement is performed with `RegexReplace::replace()` which uses `pcre2_substitute()`.
- /// @param me MatchEvaluator object.
- /// @return replace count
- SIZE_T preplace(MatchEvaluator me){
- *r_subject_ptr = me.setRegexObject(getRegexObject())
- .setSubject(r_subject_ptr) //do not use method
- .setFindAll((getPcre2Option() & PCRE2_SUBSTITUTE_GLOBAL)!=0)
- .setMatchContext(getMatchContext())
- .setMatchDataBlock(getMatchDataBlock())
- .setBufferSize(getBufferSize())
- .setStartOffset(getStartOffset())
- .replace(true, getPcre2Option(), last_replace_counter);
- return *last_replace_counter;
- }
-
- ///JPCRE2 native replace function.
- ///A different name is adopted to
- ///distinguish itself from the regular replace() function which
- ///uses pcre2_substitute() to do the replacement; contrary to that,
- ///it will provide a JPCRE2 native way of replacement operation.
- ///It takes a MatchEvaluator object which provides a callback function that is used
- ///to generate replacement string on the fly. Any replacement string set with
- ///`RegexReplace::setReplaceWith()` function will have no effect.
- ///The string returned by the callback function will be treated as literal and will
- ///not go through any further processing.
- ///
- ///This function works on a copy of the MatchEvaluator, and thus makes no changes
- ///to the original. The copy is modified as below:
- ///
- ///1. Global replacement will set FIND_ALL for match, unset otherwise.
- ///2. Bad matching options such as `PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT` will be removed.
- ///3. subject, start_offset and Regex object will change according to the RegexReplace object.
- ///4. match context, and match data block will be changed according to the RegexReplace object.
- ///
- ///It calls MatchEvaluator::nreplace() on the MatchEvaluator object to perform the replacement.
- ///
- ///It always performs a new match.
- ///@param me A MatchEvaluator object.
- ///@return The resultant string after replacement.
- ///@see MatchEvaluator::nreplace()
- ///@see MatchEvaluator
- ///@see MatchEvaluatorCallback
- String nreplace(MatchEvaluator me){
- return me.setRegexObject(getRegexObject())
- .setSubject(getSubjectPointer())
- .setFindAll((getPcre2Option() & PCRE2_SUBSTITUTE_GLOBAL)!=0)
- .setMatchContext(getMatchContext())
- .setMatchDataBlock(getMatchDataBlock())
- .setStartOffset(getStartOffset())
- .nreplace(true, getJpcre2Option(), last_replace_counter);
- }
-
- ///PCRE2 compatible replace function that takes a MatchEvaluator.
- ///String returned by callback function is processed by pcre2_substitute,
- ///thus all PCRE2 substitute options are supported by this replace function.
- ///
- ///It always performs a new match.
- ///@param me MatchEvaluator instance, (copied and modified according to this object).
- ///@return resultant string.
- ///@see replace()
- String replace(MatchEvaluator me){
- return me.setRegexObject(getRegexObject())
- .setSubject(getSubjectPointer())
- .setFindAll((getPcre2Option() & PCRE2_SUBSTITUTE_GLOBAL)!=0)
- .setMatchContext(getMatchContext())
- .setMatchDataBlock(getMatchDataBlock())
- .setBufferSize(getBufferSize())
- .setStartOffset(getStartOffset())
- .replace(true, getPcre2Option(), last_replace_counter);
- }
- };
-
-
- /** Provides public constructors to create Regex object.
- * Each regex pattern needs an object of this class and each pattern needs to be compiled.
- * Pattern compilation can be done using one of its' overloaded constructors or the `Regex::compile()`
- * member function.
- *
- * Examples:
- *
- * ```cpp
- * jp::Regex re; //does not perform a compile
- * re.compile("pattern", "modifier");
- * jp::Regex re2("pattern", "modifier"); //performs a compile
- * ```
- *
- */
- class Regex {
- private:
- friend class RegexMatch;
- friend class RegexReplace;
- friend class MatchEvaluator;
-
- String pat_str;
- String const *pat_str_ptr;
- Pcre2Code *code;
- Uint compile_opts;
- Uint jpcre2_compile_opts;
- ModifierTable const * modtab;
- CompileContext *ccontext;
- std::vector<unsigned char> tabv;
-
-
- void init_vars() {
- jpcre2_compile_opts = 0;
- compile_opts = 0;
- error_number = 0;
- error_offset = 0;
- code = 0;
- pat_str_ptr = &pat_str;
- ccontext = 0;
- modtab = 0;
- }
- void freeRegexMemory(void) {
- Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::code_free(code);
- code = 0; //we may use it again
- }
-
- void freeCompileContext(){
- Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::compile_context_free(ccontext);
- ccontext = 0;
- }
- void onlyCopy(Regex const &r){
- //r.pat_str_ptr may point to other user data
- pat_str_ptr = (r.pat_str_ptr == &r.pat_str) ? &pat_str //not r.pat_str
- : r.pat_str_ptr; //other user data
-
- compile_opts = r.compile_opts;
- jpcre2_compile_opts = r.jpcre2_compile_opts;
- error_number = r.error_number;
- error_offset = r.error_offset;
- modtab = r.modtab;
- }
- void deepCopy(Regex const &r) {
- pat_str = r.pat_str; //must not use setPattern() here
-
- onlyCopy(r);
-
- //copy tables
- tabv = r.tabv;
- //copy ccontext if it's not null
- freeCompileContext();
- ccontext = (r.ccontext) ? Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::compile_context_copy(r.ccontext) : 0;
- //if tabv is not empty and ccontext is ok (not null) set the table pointer to ccontext
- if(ccontext && !tabv.empty()) Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::set_character_tables(ccontext, &tabv[0]);
-
- //table pointer must be updated in the compiled code itself, jit memory copy is not available.
- //copy is not going to work, we need a recompile.
- //as all vars are already copied, we can just call compile()
- r.code ? compile() //compile frees previous memory.
- : freeRegexMemory();
- }
-
- #if __cplusplus >= 201103L
-
- void deepMove(Regex& r) {
- pat_str = std::move_if_noexcept(r.pat_str);
-
- onlyCopy(r);
-
- //steal tables
- tabv = std::move_if_noexcept(r.tabv);
-
- //steal ccontext
- freeCompileContext();
- ccontext = r.ccontext; r.ccontext = 0; //must set this to 0
- if(ccontext && !tabv.empty()) Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::set_character_tables(ccontext, &tabv[0]);
-
- //steal the code
- freeRegexMemory();
- code = r.code; r.code = 0; //must set this to 0
- }
-
- #endif
-
- protected:
-
- int error_number;
- PCRE2_SIZE error_offset;
- public:
- /// Default Constructor.
- /// Initializes all class variables to defaults.
- /// Does not perform any pattern compilation.
- Regex() {
- init_vars();
- }
- ///Compile pattern with initialization.
- /// @param re Pattern string
- Regex(String const &re) {
- init_vars();
- compile(re);
- }
- /// @overload
- /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern.
- Regex(String const *re) {
- init_vars();
- compile(re);
- }
-
- ///@overload
- /// @param re Pattern string .
- /// @param mod Modifier string.
- Regex(String const &re, Modifier const& mod) {
- init_vars();
- compile(re, mod);
- }
-
- ///@overload
- /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern.
- /// @param mod Modifier string.
- Regex(String const *re, Modifier const& mod) {
- init_vars();
- compile(re, mod);
- }
- ///@overload
- /// @param re Pattern string .
- /// @param po PCRE2 option value
- Regex(String const &re, Uint po) {
- init_vars();
- compile(re, po);
- }
- ///@overload
- /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern.
- /// @param po PCRE2 option value
- Regex(String const *re, Uint po) {
- init_vars();
- compile(re, po);
- }
- ///@overload
- /// @param re Pattern string .
- /// @param po PCRE2 option value
- /// @param jo JPCRE2 option value
- Regex(String const &re, Uint po, Uint jo) {
- init_vars();
- compile(re, po, jo);
- }
- ///@overload
- /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern.
- /// @param po PCRE2 option value
- /// @param jo JPCRE2 option value
- Regex(String const *re, Uint po, Uint jo) {
- init_vars();
- compile(re, po, jo);
- }
- /// @overload
- ///...
- /// Copy constructor.
- /// A separate and new compile is performed from the copied options.
- ///
- /// @param r Constant Regex object reference.
- Regex(Regex const &r) {
- init_vars();
- deepCopy(r);
- }
-
- /// Overloaded assignment operator.
- /// @param r Regex const &
- /// @return *this
- Regex& operator=(Regex const &r) {
- if (this == &r) return *this;
- deepCopy(r);
- return *this;
- }
-
-
- #if __cplusplus >= 201103L
-
-
- /// @overload
- ///...
- /// Move constructor.
- ///This constructor steals resources from the argument.
- ///It leaves the argument in a valid but indeterminate sate.
- ///The indeterminate state can be returned to normal by calling reset() on that object.
- /// @param r rvalue reference to a Regex object.
- Regex(Regex&& r) {
- init_vars();
- deepMove(r);
- }
-
- ///@overload
- ///...
- /// Overloaded move-assignment operator.
- ///This constructor steals resources from the argument.
- ///It leaves the argument in a valid but indeterminate sate.
- ///The indeterminate state can be returned to normal by calling reset() on that object.
- /// @param r Regex&&
- /// @return *this
- Regex& operator=(Regex&& r) {
- if (this == &r) return *this;
- deepMove(r);
- return *this;
- }
-
- /// Provides boolean check for the status of the object.
- /// This overloaded boolean operator needs to be declared
- /// explicit to prevent implicit conversion and overloading issues.
- ///
- /// We will only enable it if >=C++11 is being used, as the explicit keyword
- /// for a function other than constructor is not supported in older compilers.
- ///
- /// If you are dealing with legacy code/compilers use the Double bang trick mentioned
- /// in Regex::operator!().
- ///
- /// This helps us to check the status of the compiled regex like this:
- ///
- /// ```
- /// jpcre2::select<char>::Regex re("pat", "mod");
- /// if(re) {
- /// std::cout<<"Compile success";
- /// } else {
- /// std::cout<<"Compile failed";
- /// }
- /// ```
- ///@return true if regex compiled successfully, false otherwise.
- ///
- explicit operator bool() const {
- return (code != 0);
- }
- #endif
-
- /// Provides boolean check for the status of the object.
- /// This is a safe boolean approach (no implicit conversion or overloading).
- /// We don't need the explicit keyword here and thus it's the preferable method
- /// to check for object status that will work well with older compilers.
- /// e.g:
- ///
- /// ```
- /// jpcre2::select<char>::Regex re("pat","mod");
- /// if(!re) {
- /// std::cout<<"Compile failed";
- /// } else {
- /// std::cout<<"Compiled successfully";
- /// }
- /// ```
- /// Double bang trick:
- ///
- /// ```
- /// jpcre2::select<char>::Regex re("pat","mod");
- /// if(!!re) {
- /// std::cout<<"Compiled successfully";
- /// } else {
- /// std::cout<<"Compile failed";
- /// }
- /// ```
- /// @return true if regex compile failed, false otherwise.
- bool operator!() const {
- return (code == 0);
- }
- virtual ~Regex() {
- freeRegexMemory();
- freeCompileContext();
- }
- ///Reset all class variables to its default (initial) state including memory.
- ///@return Reference to the calling Regex object.
- Regex& reset() {
- freeRegexMemory();
- freeCompileContext();
- String().swap(pat_str);
- init_vars();
- return *this;
- }
- ///Clear all class variables to its default (initial) state (some memory may retain for further use).
- ///@return Reference to the calling Regex object.
- Regex& clear() {
- freeRegexMemory();
- freeCompileContext();
- pat_str.clear();
- init_vars();
- return *this;
- }
- ///Reset regex compile related errors to zero.
- ///@return A reference to the Regex object
- ///@see RegexReplace::resetErrors()
- ///@see RegexMatch::resetErrors()
- Regex& resetErrors() {
- error_number = 0;
- error_offset = 0;
- return *this;
- }
- /// Recreate character tables used by PCRE2.
- /// You should call this function after changing the locale to remake the
- /// character tables according to the new locale.
- /// These character tables are used to compile the regex and used by match
- /// and replace operation. A separate call to compile() will be required
- /// to apply the new character tables.
- /// @return Reference to the calling Regex object.
- Regex& resetCharacterTables() {
- const unsigned char* tables = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::maketables(0); //must pass 0, we are using free() to free the tables.
- tabv = std::vector<unsigned char>(tables, tables+1088);
- ::free((void*)tables); //must free memory
- if(!ccontext)
- ccontext = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::compile_context_create(0);
- Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::set_character_tables(ccontext, &tabv[0]);
- return *this;
- }
-
- ///Get Pcre2 raw compiled code pointer.
- ///@return pointer to constant pcre2_code or null.
- Pcre2Code const* getPcre2Code() const{
- return code;
- }
- /// Get pattern string
- ///@return pattern string of type jpcre2::select::String
- String getPattern() const {
- return *pat_str_ptr;
- }
-
- /// Get pointer to pattern string
- ///@return Pointer to constant pattern string
- String const * getPatternPointer() const {
- return pat_str_ptr;
- }
- /// Calculate modifier string from PCRE2 and JPCRE2 options and return it.
- ///
- /// **Mixed or combined modifier**.
- ///
- /// Some modifier may include other modifiers i.e they have the same meaning of some modifiers
- /// combined together. For example, the 'n' modifier includes the 'u' modifier and together they
- /// are equivalent to `PCRE2_UTF | PCRE2_UCP`. When you set a modifier like this, both options
- /// get set, and when you remove the 'n' modifier (with `Regex::changeModifier()`), both will get removed.
- ///@tparam Char_T Character type
- ///@return Calculated modifier string (std::string)
- ///@see RegexMatch::getModifier()
- ///@see RegexReplace::getModifier()
- std::string getModifier() const {
- return modtab ? modtab->fromCompileOption(compile_opts, jpcre2_compile_opts)
- : MOD::fromCompileOption(compile_opts, jpcre2_compile_opts);
- }
- /// Get PCRE2 option
- /// @return Compile time PCRE2 option value
- ///@see RegexReplace::getPcre2Option()
- ///@see RegexMatch::getPcre2Option()
- Uint getPcre2Option() const {
- return compile_opts;
- }
- /// Get JPCRE2 option
- /// @return Compile time JPCRE2 option value
- ///@see RegexReplace::getJpcre2Option()
- ///@see RegexMatch::getJpcre2Option()
- Uint getJpcre2Option() const {
- return jpcre2_compile_opts;
- }
- /// Returns the last error number
- ///@return Last error number
- int getErrorNumber() const {
- return error_number;
- }
- /// Returns the last error offset
- ///@return Last error offset
- int getErrorOffset() const {
- return (int)error_offset;
- }
-
- /// Returns the last error message
- ///@return Last error message
- String getErrorMessage() const {
- #if __cplusplus >= 201103L
- return select<Char, Map>::getErrorMessage(error_number, error_offset);
- #else
- return select<Char>::getErrorMessage(error_number, error_offset);
- #endif
- }
-
- ///Get new line convention from compiled code.
- ///@return New line option value or 0.
- ///```
- ///PCRE2_NEWLINE_CR Carriage return only
- ///PCRE2_NEWLINE_LF Linefeed only
- ///PCRE2_NEWLINE_CRLF CR followed by LF only
- ///PCRE2_NEWLINE_ANYCRLF Any of the above
- ///PCRE2_NEWLINE_ANY Any Unicode newline sequence
- ///```
- Uint getNewLine() {
- if(!code) return 0;
- Uint newline = 0;
- int ret = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::pattern_info(code, PCRE2_INFO_NEWLINE, &newline);
- if(ret < 0) error_number = ret;
- return newline;
- }
-
- ///Get the modifier table that is set,
- ///@return constant ModifierTable pointer.
- ModifierTable const* getModifierTable(){
- return modtab;
- }
-
-
- ///Set new line convention.
- ///@param value New line option value.
- ///```
- ///PCRE2_NEWLINE_CR Carriage return only
- ///PCRE2_NEWLINE_LF Linefeed only
- ///PCRE2_NEWLINE_CRLF CR followed by LF only
- ///PCRE2_NEWLINE_ANYCRLF Any of the above
- ///PCRE2_NEWLINE_ANY Any Unicode newline sequence
- ///```
- ///@return Reference to the calling Regex object
- Regex& setNewLine(Uint value){
- if(!ccontext)
- ccontext = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::compile_context_create(0);
- int ret = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::set_newline(ccontext, value);
- if(ret < 0) error_number = ret;
- return *this;
- }
-
- /// Set the pattern string to compile
- /// @param re Pattern string
- /// @return Reference to the calling Regex object.
- Regex& setPattern(String const &re) {
- pat_str = re;
- pat_str_ptr = &pat_str; //must overwrite
- return *this;
- }
-
- /// @overload
- /// @param re Pattern string pointer, null pointer will unset it.
- /// @return Reference to the calling Regex object.
- Regex& setPattern(String const *re) {
- if(re) pat_str_ptr = re;
- else {
- pat_str.clear();
- pat_str_ptr = &pat_str;
- }
- return *this;
- }
- /// set the modifier (resets all JPCRE2 and PCRE2 options) by calling Regex::changeModifier().
- /// Re-initializes the option bits for PCRE2 and JPCRE2 options, then parses the modifier and sets
- /// equivalent PCRE2 and JPCRE2 options.
- /// @param x Modifier string.
- /// @return Reference to the calling Regex object.
- /// @see RegexMatch::setModifier()
- /// @see RegexReplace::setModifier()
- Regex& setModifier(Modifier const& x) {
- compile_opts = 0;
- jpcre2_compile_opts = 0;
- return changeModifier(x, true);
- }
-
- ///Set a custom modifier table to be used.
- ///@param mdt pointer to ModifierTable object.
- /// @return Reference to the calling Regex object.
- Regex& setModifierTable(ModifierTable const * mdt){
- modtab = mdt;
- return *this;
- }
-
- /// Set JPCRE2 option for compile (overwrites existing option)
- /// @param x Option value
- /// @return Reference to the calling Regex object.
- /// @see RegexMatch::setJpcre2Option()
- /// @see RegexReplace::setJpcre2Option()
- Regex& setJpcre2Option(Uint x) {
- jpcre2_compile_opts = x;
- return *this;
- }
- /// Set PCRE2 option for compile (overwrites existing option)
- /// @param x Option value
- /// @return Reference to the calling Regex object.
- /// @see RegexMatch::setPcre2Option()
- /// @see RegexReplace::setPcre2Option()
- Regex& setPcre2Option(Uint x) {
- compile_opts = x;
- return *this;
- }
- /// Parse modifier and add/remove equivalent PCRE2 and JPCRE2 options.
- /// This function does not initialize or re-initialize options.
- /// If you want to set options from scratch, initialize them to 0 before calling this function.
- ///
- /// If invalid modifier is detected, then the error number for the Regex
- /// object will be jpcre2::ERROR::INVALID_MODIFIER and error offset will be the modifier character.
- /// You can get the message with Regex::getErrorMessage() function.
- /// @param mod Modifier string.
- /// @param x Whether to add or remove option
- /// @return Reference to the calling Regex object
- /// @see RegexMatch::changeModifier()
- /// @see RegexReplace::changeModifier()
- Regex& changeModifier(Modifier const& mod, bool x){
- modtab ? modtab->toCompileOption(mod, x, &compile_opts, &jpcre2_compile_opts, &error_number, &error_offset)
- : MOD::toCompileOption(mod, x, &compile_opts, &jpcre2_compile_opts, &error_number, &error_offset);
- return *this;
- }
- /// Add or remove a JPCRE2 option
- /// @param opt JPCRE2 option value
- /// @param x Add the option if it's true, remove otherwise.
- /// @return Reference to the calling Regex object
- /// @see RegexMatch::changeJpcre2Option()
- /// @see RegexReplace::changeJpcre2Option()
- Regex& changeJpcre2Option(Uint opt, bool x) {
- jpcre2_compile_opts = x ? jpcre2_compile_opts | opt : jpcre2_compile_opts & ~opt;
- return *this;
- }
-
- /// Add or remove a PCRE2 option
- /// @param opt PCRE2 option value
- /// @param x Add the option if it's true, remove otherwise.
- /// @return Reference to the calling Regex object
- /// @see RegexMatch::changePcre2Option()
- /// @see RegexReplace::changePcre2Option()
- Regex& changePcre2Option(Uint opt, bool x) {
- compile_opts = x ? compile_opts | opt : compile_opts & ~opt;
- return *this;
- }
-
- /// Parse modifier string and add equivalent PCRE2 and JPCRE2 options.
- /// This is just a wrapper of the original function Regex::changeModifier()
- /// provided for convenience.
- /// @param mod Modifier string.
- /// @return Reference to the calling Regex object
- /// @see RegexMatch::addModifier()
- /// @see RegexReplace::addModifier()
- Regex& addModifier(Modifier const& mod){
- return changeModifier(mod, true);
- }
- /// Add option to existing JPCRE2 options for compile
- /// @param x Option value
- /// @return Reference to the calling Regex object
- /// @see RegexMatch::addJpcre2Option()
- /// @see RegexReplace::addJpcre2Option()
- Regex& addJpcre2Option(Uint x) {
- jpcre2_compile_opts |= x;
- return *this;
- }
- /// Add option to existing PCRE2 options for compile
- /// @param x Option value
- /// @return Reference to the calling Regex object
- /// @see RegexMatch::addPcre2Option()
- /// @see RegexReplace::addPcre2Option()
- Regex& addPcre2Option(Uint x) {
- compile_opts |= x;
- return *this;
- }
- ///Compile pattern using info from class variables.
- ///@see Regex::compile(String const &re, Uint po, Uint jo)
- ///@see Regex::compile(String const &re, Uint po)
- ///@see Regex::compile(String const &re, Modifier mod)
- ///@see Regex::compile(String const &re)
- void compile(void);
- ///@overload
- ///...
- /// Set the specified parameters, then compile the pattern using information from class variables.
- /// @param re Pattern string
- /// @param po PCRE2 option
- /// @param jo JPCRE2 option
- void compile(String const &re, Uint po, Uint jo) {
- setPattern(re).setPcre2Option(po).setJpcre2Option(jo);
- compile();
- }
- ///@overload
- /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern.
- /// @param po PCRE2 option
- /// @param jo JPCRE2 option
- void compile(String const *re, Uint po, Uint jo) {
- setPattern(re).setPcre2Option(po).setJpcre2Option(jo);
- compile();
- }
-
- ///@overload
- /// @param re Pattern string
- /// @param po PCRE2 option
- void compile(String const &re, Uint po) {
- setPattern(re).setPcre2Option(po);
- compile();
- }
-
- ///@overload
- /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern.
- /// @param po PCRE2 option
- void compile(String const *re, Uint po) {
- setPattern(re).setPcre2Option(po);
- compile();
- }
- /// @overload
- /// @param re Pattern string
- /// @param mod Modifier string.
- void compile(String const &re, Modifier const& mod) {
- setPattern(re).setModifier(mod);
- compile();
- }
- ///@overload
- /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern.
- /// @param mod Modifier string.
- void compile(String const *re, Modifier const& mod) {
- setPattern(re).setModifier(mod);
- compile();
- }
- ///@overload
- /// @param re Pattern string .
- void compile(String const &re) {
- setPattern(re);
- compile();
- }
- ///@overload
- /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern.
- void compile(String const *re) {
- setPattern(re);
- compile();
- }
-
- ///Returns a default constructed RegexMatch object by value.
- ///This object is initialized with the same modifier table
- ///as this Regex object.
- ///@return RegexMatch object.
- RegexMatch initMatch(){
- RegexMatch rm(this);
- rm.setModifierTable(modtab);
- return rm;
- }
-
- ///Synonym for initMatch()
- ///@return RegexMatch object by value.
- RegexMatch getMatchObject(){
- return initMatch();
- }
-
- /// Perform regex match and return match count using a temporary match object.
- /// This temporary match object will get available options from this Regex object,
- /// that includes modifier table.
- /// @param s Subject string .
- /// @param mod Modifier string.
- /// @param start_offset Offset from where matching will start in the subject string.
- /// @return Match count
- /// @see RegexMatch::match()
- SIZE_T match(String const &s, Modifier const& mod, PCRE2_SIZE start_offset=0) {
- return initMatch().setStartOffset(start_offset).setSubject(s).setModifier(mod).match();
- }
-
- ///@overload
- ///...
- ///@param s Pointer to subject string. A null pointer will unset the subject and perform a match with empty subject.
- ///@param mod Modifier string.
- ///@param start_offset Offset from where matching will start in the subject string.
- ///@return Match count
- SIZE_T match(String const *s, Modifier const& mod, PCRE2_SIZE start_offset=0) {
- return initMatch().setStartOffset(start_offset).setSubject(s).setModifier(mod).match();
- }
-
- ///@overload
- ///...
- /// @param s Subject string .
- /// @param start_offset Offset from where matching will start in the subject string.
- /// @return Match count
- /// @see RegexMatch::match()
- SIZE_T match(String const &s, PCRE2_SIZE start_offset=0) {
- return initMatch().setStartOffset(start_offset).setSubject(s).match();
- }
- ///@overload
- ///...
- /// @param s Pointer to subject string. A null pointer will unset the subject and perform a match with empty subject.
- /// @param start_offset Offset from where matching will start in the subject string.
- /// @return Match count
- /// @see RegexMatch::match()
- SIZE_T match(String const *s, PCRE2_SIZE start_offset=0) {
- return initMatch().setStartOffset(start_offset).setSubject(s).match();
- }
-
- ///Returns a default constructed RegexReplace object by value.
- ///This object is initialized with the same modifier table as this Regex object.
- ///@return RegexReplace object.
- RegexReplace initReplace(){
- RegexReplace rr(this);
- rr.setModifierTable(modtab);
- return rr;
- }
-
- ///Synonym for initReplace()
- ///@return RegexReplace object.
- RegexReplace getReplaceObject(){
- return initReplace();
- }
-
- /// Perform regex replace and return the replaced string using a temporary replace object.
- /// This temporary replace object will get available options from this Regex object,
- /// that includes modifier table.
- /// @param mains Subject string.
- /// @param repl String to replace with
- /// @param mod Modifier string.
- ///@param counter Pointer to a counter to store the number of replacement done.
- /// @return Resultant string after regex replace
- /// @see RegexReplace::replace()
- String replace(String const &mains, String const &repl, Modifier const& mod="", SIZE_T* counter=0) {
- return initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(counter).replace();
- }
-
- ///@overload
- /// @param mains Pointer to subject string
- /// @param repl String to replace with
- /// @param mod Modifier string.
- ///@param counter Pointer to a counter to store the number of replacement done.
- /// @return Resultant string after regex replace
- /// @see RegexReplace::replace()
- String replace(String *mains, String const &repl, Modifier const& mod="", SIZE_T* counter=0) {
- return initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(counter).replace();
- }
-
- ///@overload
- ///...
- /// @param mains Subject string
- /// @param repl Pointer to string to replace with
- /// @param mod Modifier string.
- ///@param counter Pointer to a counter to store the number of replacement done.
- /// @return Resultant string after regex replace
- /// @see RegexReplace::replace()
- String replace(String const &mains, String const *repl, Modifier const& mod="", SIZE_T* counter=0) {
- return initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(counter).replace();
- }
-
- ///@overload
- ///...
- /// @param mains Pointer to subject string
- /// @param repl Pointer to string to replace with
- /// @param mod Modifier string.
- ///@param counter Pointer to a counter to store the number of replacement done.
- /// @return Resultant string after regex replace
- /// @see RegexReplace::replace()
- String replace(String *mains, String const *repl, Modifier const& mod="", SIZE_T* counter=0) {
- return initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(counter).replace();
- }
-
- /// Perl compatible replace method.
- /// Modifies subject string in-place and returns replace count.
- ///
- /// It's a shorthand method to `RegexReplace::preplace()`.
- /// @param mains Pointer to subject string.
- /// @param repl Replacement string (string to replace with).
- /// @param mod Modifier string.
- /// @return replace count.
- SIZE_T preplace(String * mains, String const& repl, Modifier const& mod=""){
- SIZE_T counter = 0;
- if(mains) *mains = initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(&counter).replace();
- return counter;
- }
-
- /// @overload
- ///
- /// Perl compatible replace method.
- /// Modifies subject string in-place and returns replace count.
- ///
- /// It's a shorthand method to `RegexReplace::preplace()`.
- /// @param mains Pointer to subject string.
- /// @param repl Pointer to replacement string (string to replace with).
- /// @param mod Modifier string.
- /// @return replace count.
- SIZE_T preplace(String * mains, String const* repl, Modifier const& mod=""){
- SIZE_T counter = 0;
- if(mains) *mains = initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(&counter).replace();
- return counter;
- }
-
- /// @overload
- ///
- /// Perl compatible replace method.
- /// Returns replace count and discards subject string.
- ///
- /// It's a shorthand method to `RegexReplace::preplace()`.
- /// @param mains Subject string.
- /// @param repl Replacement string (string to replace with).
- /// @param mod Modifier string.
- /// @return replace count.
- SIZE_T preplace(String const& mains, String const& repl, Modifier const& mod=""){
- SIZE_T counter = 0;
- initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(&counter).replace();
- return counter;
- }
-
- /// @overload
- ///
- /// Perl compatible replace method.
- /// Returns replace count and discards subject string.
- ///
- /// It's a shorthand method to `RegexReplace::preplace()`.
- /// @param mains Subject string.
- /// @param repl Pointer to replacement string (string to replace with).
- /// @param mod Modifier string.
- /// @return replace count.
- SIZE_T preplace(String const& mains, String const* repl, Modifier const& mod=""){
- SIZE_T counter = 0;
- initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(&counter).replace();
- return counter;
- }
- };
-
- private:
- //prevent object instantiation of select class
- select();
- select(select const &);
- #if __cplusplus >= 201103L
- select(select&&);
- #endif
- ~select();
- };//struct select
- }//jpcre2 namespace
- inline void jpcre2::ModifierTable::parseModifierTable(std::string& tabjs, VecOpt& tabjv,
- std::string& tab_s, VecOpt& tab_v,
- std::string const& tabs, VecOpt const& tabv){
- SIZE_T n = tabs.length();
- JPCRE2_ASSERT(n == tabv.size(), ("ValueError: Could not set Modifier table.\
- Modifier character and value tables are not of the same size (" + _tostdstring(n) + " == " + _tostdstring(tabv.size()) + ").").c_str());
- tabjs.clear();
- tab_s.clear(); tab_s.reserve(n);
- tabjv.clear();
- tab_v.clear(); tab_v.reserve(n);
- for(SIZE_T i=0;i<n;++i){
- switch(tabv[i]){
- case JIT_COMPILE:
- case FIND_ALL: //JPCRE2 options are unique, so it's not necessary to check if it's compile or replace or match.
- tabjs.push_back(tabs[i]); tabjv.push_back(tabv[i]);break;
- default: tab_s.push_back(tabs[i]); tab_v.push_back(tabv[i]); break;
- }
- }
- }
- #if __cplusplus >= 201103L
- template<typename Char_T, template<typename...> class Map>
- void jpcre2::select<Char_T, Map>::Regex::compile() {
- #else
- template<typename Char_T>
- void jpcre2::select<Char_T>::Regex::compile() {
- #endif
- //Get c_str of pattern
- Pcre2Sptr c_pattern = (Pcre2Sptr) pat_str_ptr->c_str();
- int err_number = 0;
- PCRE2_SIZE err_offset = 0;
- /**************************************************************************
- * Compile the regular expression pattern, and handle
- * any errors that are detected.
- *************************************************************************/
- //first release any previous memory
- freeRegexMemory();
- code = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::compile( c_pattern, /* the pattern */
- PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
- compile_opts, /* default options */
- &err_number, /* for error number */
- &err_offset, /* for error offset */
- ccontext); /* use compile context */
-
- if (code == 0) {
- /* Compilation failed */
- //must not free regex memory, the only function has that right is the destructor
- error_number = err_number;
- error_offset = err_offset;
- return;
- } else if ((jpcre2_compile_opts & JIT_COMPILE) != 0) {
- ///perform JIT compilation it it's enabled
- int jit_ret = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::jit_compile(code, PCRE2_JIT_COMPLETE);
- if(jit_ret < 0) error_number = jit_ret;
- }
- //everything's OK
- }
- #if __cplusplus >= 201103L
- template<typename Char_T, template<typename...> class Map>
- typename jpcre2::select<Char_T, Map>::String jpcre2::select<Char_T, Map>::MatchEvaluator::replace(bool do_match, Uint replace_opts, SIZE_T * counter) {
- #else
- template<typename Char_T>
- typename jpcre2::select<Char_T>::String jpcre2::select<Char_T>::MatchEvaluator::replace(bool do_match, Uint replace_opts, SIZE_T * counter) {
- #endif
- if(counter) *counter = 0;
-
- replace_opts |= PCRE2_SUBSTITUTE_OVERFLOW_LENGTH;
- replace_opts &= ~PCRE2_SUBSTITUTE_GLOBAL;
- Regex const * re = RegexMatch::getRegexObject();
- // If re or re->code is null, return the subject string unmodified.
- if (!re || re->code == 0)
- return RegexMatch::getSubject();
-
- Pcre2Sptr r_subject_ptr = (Pcre2Sptr) RegexMatch::getSubjectPointer()->c_str();
- //~ SIZE_T totlen = RegexMatch::getSubjectPointer()->length();
- if(do_match) match();
- SIZE_T mcount = vec_soff.size();
- // if mcount is 0, return the subject string. (there's no need to worry about re)
- if(!mcount) return RegexMatch::getSubject();
- SIZE_T current_offset = 0; //needs to be zero, not start_offset, because it's from where unmatched parts will be copied.
- String res, tmp;
-
- //A check, this check is not fullproof.
- SIZE_T last = vec_eoff.size();
- last = (last>0)?last-1:0;
- JPCRE2_ASSERT(vec_eoff[last] <= RegexMatch::getSubject().size(), "ValueError: subject string is not of the required size, may be it's changed!!!\
- If you are using esisting match data, try a new match.");
-
- //loop through the matches
- for(SIZE_T i=0;i<mcount;++i){
- //first copy the unmatched part.
- //Matches that use \K to end before they start are not supported.
- if(vec_soff[i] < current_offset || vec_eoff[i] < vec_soff[i]){
- RegexMatch::error_number = PCRE2_ERROR_BADSUBSPATTERN;
- return RegexMatch::getSubject();
- } else {
- //~ res += RegexMatch::getSubject().substr(current_offset, vec_soff[i]-current_offset);
- res += String(r_subject_ptr+current_offset, r_subject_ptr+vec_soff[i]);
- }
- //now process the matched part
- switch(callbackn){
- case 0: tmp = callback0((void*)0, (void*)0, (void*)0); break;
- case 1: JPCRE2_VECTOR_DATA_ASSERT(vec_num.size() == mcount, "VecNum");
- tmp = callback1(vec_num[i], (void*)0, (void*)0); break;
- case 2: JPCRE2_VECTOR_DATA_ASSERT(vec_nas.size() == mcount, "VecNas");
- tmp = callback2((void*)0, vec_nas[i], (void*)0); break;
- case 3: JPCRE2_VECTOR_DATA_ASSERT(vec_num.size() == mcount && vec_nas.size() == mcount, "VecNum or VecNas");
- tmp = callback3(vec_num[i], vec_nas[i], (void*)0); break;
- case 4: JPCRE2_VECTOR_DATA_ASSERT(vec_ntn.size() == mcount, "VecNtn");
- tmp = callback4((void*)0, (void*)0, vec_ntn[i]); break;
- case 5: JPCRE2_VECTOR_DATA_ASSERT(vec_num.size() == mcount && vec_ntn.size() == mcount, "VecNum or VecNtn");
- tmp = callback5(vec_num[i], (void*)0, vec_ntn[i]); break;
- case 6: JPCRE2_VECTOR_DATA_ASSERT(vec_nas.size() == mcount && vec_ntn.size() == mcount, "VecNas or VecNtn");
- tmp = callback6((void*)0, vec_nas[i], vec_ntn[i]); break;
- case 7: JPCRE2_VECTOR_DATA_ASSERT(vec_num.size() == mcount && vec_nas.size() == mcount && vec_ntn.size() == mcount, "VecNum\n or VecNas or VecNtn");
- tmp = callback7(vec_num[i], vec_nas[i], vec_ntn[i]); break;
- default: JPCRE2_ASSERT(2 == 1, "Invalid callbackn. Please file a bug report (must include the line number from below)."); break;
- }
- //reset the current offset
- current_offset = vec_eoff[i];
-
- //second part
- ///the matched part is the subject
- //~ Pcre2Sptr subject = (Pcre2Sptr) RegexMatch::getSubjectPointer()->c_str();
- //substr(vec_soff[i], vec_eoff[i] - vec_soff[i]).c_str();//->substr(vec_soff[i], vec_eoff[i]-vec_soff[i]);
- Pcre2Sptr subject = r_subject_ptr + vec_soff[i];
- PCRE2_SIZE subject_length = vec_eoff[i] - vec_soff[i];
- ///the string returned from the callback is the replacement string.
- Pcre2Sptr replace = (Pcre2Sptr) tmp.c_str();
- PCRE2_SIZE replace_length = tmp.length();
- bool retry = true;
- int ret = 0;
- PCRE2_SIZE outlengthptr = 0;
- Pcre2Uchar* output_buffer = new Pcre2Uchar[outlengthptr + 1]();
-
- while (true) {
- ret = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::substitute(
- re->code, /*Points to the compiled pattern*/
- subject, /*Points to the subject string*/
- subject_length, /*Length of the subject string*/
- 0, /*Offset in the subject at which to start matching*/ //must be zero
- replace_opts, /*Option bits*/
- RegexMatch::mdata, /*Points to a match data block, or is NULL*/
- RegexMatch::mcontext, /*Points to a match context, or is NULL*/
- replace, /*Points to the replacement string*/
- replace_length, /*Length of the replacement string*/
- output_buffer, /*Points to the output buffer*/
- &outlengthptr /*Points to the length of the output buffer*/
- );
- if (ret < 0) {
- //Handle errors
- if ((replace_opts & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0
- && ret == (int) PCRE2_ERROR_NOMEMORY && retry) {
- retry = false;
- /// If initial #buffer_size wasn't big enough for resultant string,
- /// we will try once more with a new buffer size adjusted to the length of the resultant string.
- delete[] output_buffer;
- output_buffer = new Pcre2Uchar[outlengthptr + 1]();
- // Go and try to perform the substitute again
- continue;
- } else {
- RegexMatch::error_number = ret;
- delete[] output_buffer;
- return RegexMatch::getSubject();
- }
- }
- //If everything's ok exit the loop
- break;
- }
- res += String((Char*) output_buffer,(Char*) (output_buffer + outlengthptr) );
- delete[] output_buffer;
- if(counter) *counter += ret;
- //if FIND_ALL is not set, single match will be performed
- if((RegexMatch::getJpcre2Option() & FIND_ALL) == 0) break;
- }
- //All matched parts have been dealt with.
- //now copy rest of the string from current_offset
- res += RegexMatch::getSubject().substr(current_offset, String::npos);
- return res;
- }
- #if __cplusplus >= 201103L
- template<typename Char_T, template<typename...> class Map>
- typename jpcre2::select<Char_T, Map>::String jpcre2::select<Char_T, Map>::MatchEvaluator::nreplace(bool do_match, Uint jo, SIZE_T* counter){
- #else
- template<typename Char_T>
- typename jpcre2::select<Char_T>::String jpcre2::select<Char_T>::MatchEvaluator::nreplace(bool do_match, Uint jo, SIZE_T* counter){
- #endif
- if(counter) *counter = 0;
- if(do_match) match();
- SIZE_T mcount = vec_soff.size();
- // if mcount is 0, return the subject string. (there's no need to worry about re)
- if(!mcount) return RegexMatch::getSubject();
- SIZE_T current_offset = 0; //no need for worrying about start offset, it's handled by match and we get valid offsets out of it.
- String res;
-
- //A check, this check is not fullproof
- SIZE_T last = vec_eoff.size();
- last = (last>0)?last-1:0;
- JPCRE2_ASSERT(vec_eoff[last] <= RegexMatch::getSubject().size(), "ValueError: subject string is not of the required size, may be it's changed!!!\
- If you are using esisting match data, try a new match.");
-
- //loop through the matches
- for(SIZE_T i=0;i<mcount;++i){
- //first copy the unmatched part.
- //Matches that use \K to end before they start are not supported.
- if(vec_soff[i] < current_offset){
- RegexMatch::error_number = PCRE2_ERROR_BADSUBSPATTERN;
- return RegexMatch::getSubject();
- } else {
- res += RegexMatch::getSubject().substr(current_offset, vec_soff[i]-current_offset);
- }
- //now process the matched part
- switch(callbackn){
- case 0: res += callback0((void*)0, (void*)0, (void*)0); break;
- case 1: JPCRE2_VECTOR_DATA_ASSERT(vec_num.size() == mcount, "VecNum");
- res += callback1(vec_num[i], (void*)0, (void*)0); break;
- case 2: JPCRE2_VECTOR_DATA_ASSERT(vec_nas.size() == mcount, "VecNas");
- res += callback2((void*)0, vec_nas[i], (void*)0); break;
- case 3: JPCRE2_VECTOR_DATA_ASSERT(vec_num.size() == mcount && vec_nas.size() == mcount, "VecNum or VecNas");
- res += callback3(vec_num[i], vec_nas[i], (void*)0); break;
- case 4: JPCRE2_VECTOR_DATA_ASSERT(vec_ntn.size() == mcount, "VecNtn");
- res += callback4((void*)0, (void*)0, vec_ntn[i]); break;
- case 5: JPCRE2_VECTOR_DATA_ASSERT(vec_num.size() == mcount && vec_ntn.size() == mcount, "VecNum or VecNtn");
- res += callback5(vec_num[i], (void*)0, vec_ntn[i]); break;
- case 6: JPCRE2_VECTOR_DATA_ASSERT(vec_nas.size() == mcount && vec_ntn.size() == mcount, "VecNas or VecNtn");
- res += callback6((void*)0, vec_nas[i], vec_ntn[i]); break;
- case 7: JPCRE2_VECTOR_DATA_ASSERT(vec_num.size() == mcount && vec_nas.size() == mcount && vec_ntn.size() == mcount, "VecNum\n or VecNas or VecNtn");
- res += callback7(vec_num[i], vec_nas[i], vec_ntn[i]); break;
- default: JPCRE2_ASSERT(2 == 1, "Invalid callbackn. Please file a bug report (must include the line number from below)."); break;
- }
- //reset the current offset
- current_offset = vec_eoff[i];
- if(counter) *counter += 1;
- //if FIND_ALL is not set, single match will be performd
- if((RegexMatch::getJpcre2Option() & FIND_ALL) == 0) break;
- }
- //All matched parts have been dealt with.
- //now copy rest of the string from current_offset
- res += RegexMatch::getSubject().substr(current_offset, String::npos);
- return res;
- }
- #if __cplusplus >= 201103L
- template<typename Char_T, template<typename...> class Map>
- typename jpcre2::select<Char_T, Map>::String jpcre2::select<Char_T, Map>::RegexReplace::replace() {
- #else
- template<typename Char_T>
- typename jpcre2::select<Char_T>::String jpcre2::select<Char_T>::RegexReplace::replace() {
- #endif
- *last_replace_counter = 0;
- // If re or re->code is null, return the subject string unmodified.
- if (!re || re->code == 0)
- return *r_subject_ptr;
- Pcre2Sptr subject = (Pcre2Sptr) r_subject_ptr->c_str();
- PCRE2_SIZE subject_length = r_subject_ptr->length();
- Pcre2Sptr replace = (Pcre2Sptr) r_replw_ptr->c_str();
- PCRE2_SIZE replace_length = r_replw_ptr->length();
- PCRE2_SIZE outlengthptr = (PCRE2_SIZE) buffer_size;
- bool retry = true;
- int ret = 0;
- Pcre2Uchar* output_buffer = new Pcre2Uchar[outlengthptr + 1]();
- while (true) {
- ret = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::substitute(
- re->code, /*Points to the compiled pattern*/
- subject, /*Points to the subject string*/
- subject_length, /*Length of the subject string*/
- _start_offset, /*Offset in the subject at which to start matching*/
- replace_opts, /*Option bits*/
- mdata, /*Points to a match data block, or is NULL*/
- mcontext, /*Points to a match context, or is NULL*/
- replace, /*Points to the replacement string*/
- replace_length, /*Length of the replacement string*/
- output_buffer, /*Points to the output buffer*/
- &outlengthptr /*Points to the length of the output buffer*/
- );
- if (ret < 0) {
- //Handle errors
- if ((replace_opts & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0
- && ret == (int) PCRE2_ERROR_NOMEMORY && retry) {
- retry = false;
- /// If initial #buffer_size wasn't big enough for resultant string,
- /// we will try once more with a new buffer size adjusted to the length of the resultant string.
- delete[] output_buffer;
- output_buffer = new Pcre2Uchar[outlengthptr + 1]();
- // Go and try to perform the substitute again
- continue;
- } else {
- error_number = ret;
- delete[] output_buffer;
- return *r_subject_ptr;
- }
- }
- //If everything's ok exit the loop
- break;
- }
- *last_replace_counter += ret;
- String result = String((Char*) output_buffer,(Char*) (output_buffer + outlengthptr) );
- delete[] output_buffer;
- return result;
- }
- #if __cplusplus >= 201103L
- template<typename Char_T, template<typename...> class Map>
- bool jpcre2::select<Char_T, Map>::RegexMatch::getNumberedSubstrings(int rc, Pcre2Sptr subject, PCRE2_SIZE* ovector) {
- #else
- template<typename Char_T>
- bool jpcre2::select<Char_T>::RegexMatch::getNumberedSubstrings(int rc, Pcre2Sptr subject, PCRE2_SIZE* ovector) {
- #endif
- NumSub num_sub;
- num_sub.reserve(rc); //we know exactly how many elements it will have.
- for (int i = 0; i < rc; i++)
- num_sub.push_back(String((Char*)(subject + ovector[2*i]), ovector[2*i+1] - ovector[2*i]));
- vec_num->push_back(num_sub); //this function shouldn't be called if this vector is null
- return true;
- }
- #if __cplusplus >= 201103L
- template<typename Char_T, template<typename...> class Map>
- bool jpcre2::select<Char_T, Map>::RegexMatch::getNamedSubstrings(int namecount, int name_entry_size,
- Pcre2Sptr name_table,
- Pcre2Sptr subject, PCRE2_SIZE* ovector ) {
- #else
- template<typename Char_T>
- bool jpcre2::select<Char_T>::RegexMatch::getNamedSubstrings(int namecount, int name_entry_size,
- Pcre2Sptr name_table,
- Pcre2Sptr subject, PCRE2_SIZE* ovector ) {
- #endif
- Pcre2Sptr tabptr = name_table;
- String key;
- MapNas map_nas;
- MapNtN map_ntn;
- for (int i = 0; i < namecount; i++) {
- int n;
- if(sizeof( Char_T ) * CHAR_BIT == 8){
- n = (int)((tabptr[0] << 8) | tabptr[1]);
- key = toString((Char*) (tabptr + 2));
- }
- else{
- n = (int)tabptr[0];
- key = toString((Char*) (tabptr + 1));
- }
- //Use of tabptr is finished for this iteration, let's increment it now.
- tabptr += name_entry_size;
- String value((Char*)(subject + ovector[2*n]), ovector[2*n+1] - ovector[2*n]); //n, not i.
- if(vec_nas) map_nas[key] = value;
- if(vec_ntn) map_ntn[key] = n;
- }
- //push the maps into vectors:
- if(vec_nas) vec_nas->push_back(map_nas);
- if(vec_ntn) vec_ntn->push_back(map_ntn);
- return true;
- }
- #if __cplusplus >= 201103L
- template<typename Char_T, template<typename...> class Map>
- jpcre2::SIZE_T jpcre2::select<Char_T, Map>::RegexMatch::match() {
- #else
- template<typename Char_T>
- jpcre2::SIZE_T jpcre2::select<Char_T>::RegexMatch::match() {
- #endif
- // If re or re->code is null, return 0 as the match count
- if (!re || re->code == 0)
- return 0;
- Pcre2Sptr subject = (Pcre2Sptr) m_subject_ptr->c_str();
- Pcre2Sptr name_table = 0;
- int crlf_is_newline = 0;
- int namecount = 0;
- int name_entry_size = 0;
- int rc = 0;
- int utf = 0;
- SIZE_T count = 0;
- Uint option_bits;
- Uint newline = 0;
- PCRE2_SIZE *ovector = 0;
- SIZE_T subject_length = 0;
- MatchData *match_data = 0;
- subject_length = m_subject_ptr->length();
- bool mdc = false; //mdata created.
-
- if (vec_num) vec_num->clear();
- if (vec_nas) vec_nas->clear();
- if (vec_ntn) vec_ntn->clear();
- if(vec_soff) vec_soff->clear();
- if(vec_eoff) vec_eoff->clear();
- /* Using this function ensures that the block is exactly the right size for
- the number of capturing parentheses in the pattern. */
- if(mdata) match_data = mdata;
- else {
- match_data = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::match_data_create_from_pattern(re->code, 0);
- mdc = true;
- }
- rc = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::match( re->code, /* the compiled pattern */
- subject, /* the subject string */
- subject_length, /* the length of the subject */
- _start_offset, /* start at offset 'start_offset' in the subject */
- match_opts, /* default options */
- match_data, /* block for storing the result */
- mcontext); /* use default match context */
- /* Matching failed: handle error cases */
- if (rc < 0) {
- if(mdc)
- Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::match_data_free(match_data); /* Release memory used for the match */
- //must not free code. This function has no right to modify regex
- switch (rc) {
- case PCRE2_ERROR_NOMATCH:
- return count;
- /*
- Handle other special cases if you like
- */
- default:;
- }
- error_number = rc;
- return count;
- }
- ++count; //Increment the counter
- /* Match succeded. Get a pointer to the output vector, where string offsets are
- stored. */
- ovector = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::get_ovector_pointer(match_data);
- /************************************************************************//*
- * We have found the first match within the subject string. If the output *
- * vector wasn't big enough, say so. Then output any substrings that were *
- * captured. *
- *************************************************************************/
- /* The output vector wasn't big enough. This should not happen, because we used
- pcre2_match_data_create_from_pattern() above. */
- if (rc == 0) {
- //ovector was not big enough for all the captured substrings;
- error_number = (int)ERROR::INSUFFICIENT_OVECTOR;
- rc = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::get_ovector_count(match_data);
- }
- //match succeeded at offset ovector[0]
- if(vec_soff) vec_soff->push_back(ovector[0]);
- if(vec_eoff) vec_eoff->push_back(ovector[1]);
- // Get numbered substrings if vec_num isn't null
- if (vec_num) { //must do null check
- if(!getNumberedSubstrings(rc, subject, ovector))
- return count;
- }
-
- //get named substrings if either vec_nas or vec_ntn is given.
- if (vec_nas || vec_ntn) {
- /* See if there are any named substrings, and if so, show them by name. First
- we have to extract the count of named parentheses from the pattern. */
- (void) Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::pattern_info( re->code, /* the compiled pattern */
- PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */
- &namecount); /* where to put the answer */
- if (namecount <= 0); /*No named substrings*/
- else {
- /* Before we can access the substrings, we must extract the table for
- translating names to numbers, and the size of each entry in the table. */
- (void) Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::pattern_info( re->code, /* the compiled pattern */
- PCRE2_INFO_NAMETABLE, /* address of the table */
- &name_table); /* where to put the answer */
- (void) Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::pattern_info( re->code, /* the compiled pattern */
- PCRE2_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
- &name_entry_size); /* where to put the answer */
- /* Now we can scan the table and, for each entry, print the number, the name,
- and the substring itself. In the 8-bit library the number is held in two
- bytes, most significant first. */
- // Get named substrings if vec_nas isn't null.
- // Get name to number map if vec_ntn isn't null.
- }
- //the following must be outside the above if-else
- if(!getNamedSubstrings(namecount, name_entry_size, name_table, subject, ovector))
- return count;
- }
- /***********************************************************************//*
- * If the "g" modifier was given, we want to continue *
- * to search for additional matches in the subject string, in a similar *
- * way to the /g option in Perl. This turns out to be trickier than you *
- * might think because of the possibility of matching an empty string. *
- * What happens is as follows: *
- * *
- * If the previous match was NOT for an empty string, we can just start *
- * the next match at the end of the previous one. *
- * *
- * If the previous match WAS for an empty string, we can't do that, as it *
- * would lead to an infinite loop. Instead, a call of pcre2_match() is *
- * made with the PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set. The *
- * first of these tells PCRE2 that an empty string at the start of the *
- * subject is not a valid match; other possibilities must be tried. The *
- * second flag restricts PCRE2 to one match attempt at the initial string *
- * position. If this match succeeds, an alternative to the empty string *
- * match has been found, and we can print it and proceed round the loop, *
- * advancing by the length of whatever was found. If this match does not *
- * succeed, we still stay in the loop, advancing by just one character. *
- * In UTF-8 mode, which can be set by (*UTF) in the pattern, this may be *
- * more than one byte. *
- * *
- * However, there is a complication concerned with newlines. When the *
- * newline convention is such that CRLF is a valid newline, we must *
- * advance by two characters rather than one. The newline convention can *
- * be set in the regex by (*CR), etc.; if not, we must find the default. *
- *************************************************************************/
- if ((jpcre2_match_opts & FIND_ALL) == 0) {
- if(mdc)
- Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::match_data_free(match_data); /* Release the memory that was used */
- // Must not free code. This function has no right to modify regex.
- return count; /* Exit the program. */
- }
- /* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
- sequence. First, find the options with which the regex was compiled and extract
- the UTF state. */
- (void) Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::pattern_info(re->code, PCRE2_INFO_ALLOPTIONS, &option_bits);
- utf = ((option_bits & PCRE2_UTF) != 0);
-
- /* Now find the newline convention and see whether CRLF is a valid newline
- sequence. */
- (void) Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::pattern_info(re->code, PCRE2_INFO_NEWLINE, &newline);
- crlf_is_newline = newline == PCRE2_NEWLINE_ANY
- || newline == PCRE2_NEWLINE_CRLF
- || newline == PCRE2_NEWLINE_ANYCRLF;
- /** We got the first match. Now loop for second and subsequent matches. */
- for (;;) {
- Uint options = match_opts; /* Normally no options */
- PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */
- /* If the previous match was for an empty string, we are finished if we are
- at the end of the subject. Otherwise, arrange to run another match at the
- same point to see if a non-empty match can be found. */
- if (ovector[0] == ovector[1]) {
- if (ovector[0] == subject_length)
- break;
- options |= PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
- }
- /// Run the next matching operation */
- rc = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::match( re->code, /* the compiled pattern */
- subject, /* the subject string */
- subject_length, /* the length of the subject */
- start_offset, /* starting offset in the subject */
- options, /* options */
- match_data, /* block for storing the result */
- mcontext); /* use match context */
- /* This time, a result of NOMATCH isn't an error. If the value in "options"
- is zero, it just means we have found all possible matches, so the loop ends.
- Otherwise, it means we have failed to find a non-empty-string match at a
- point where there was a previous empty-string match. In this case, we do what
- Perl does: advance the matching position by one character, and continue. We
- do this by setting the "end of previous match" offset, because that is picked
- up at the top of the loop as the point at which to start again.
- There are two complications: (a) When CRLF is a valid newline sequence, and
- the current position is just before it, advance by an extra byte. (b)
- Otherwise we must ensure that we skip an entire UTF character if we are in
- UTF mode. */
- if (rc == PCRE2_ERROR_NOMATCH) {
- if (options == 0)
- break; /* All matches found */
- ovector[1] = start_offset + 1; /* Advance one code unit */
- if (crlf_is_newline && /* If CRLF is newline & */
- start_offset < subject_length - 1 && /* we are at CRLF, */
- subject[start_offset] == '\r' && subject[start_offset + 1] == '\n')
- ovector[1] += 1; /* Advance by one more. */
- else if (utf) { /* advance a whole UTF (8 or 16), for UTF-32, it's not needed */
- while (ovector[1] < subject_length) {
- if(sizeof( Char_T ) * CHAR_BIT == 8 && (subject[ovector[1]] & 0xc0) != 0x80) break;
- else if(sizeof( Char_T ) * CHAR_BIT == 16 && (subject[ovector[1]] & 0xfc00) != 0xdc00) break;
- else if(sizeof( Char_T ) * CHAR_BIT == 32) break; //must be else if
- ovector[1] += 1;
- }
- }
- continue; /* Go round the loop again */
- }
- /* Other matching errors are not recoverable. */
- if (rc < 0) {
- if(mdc)
- Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::match_data_free(match_data);
- // Must not free code. This function has no right to modify regex.
- error_number = rc;
- return count;
- }
- /* match succeeded */
- ++count; //Increment the counter
- if (rc == 0) {
- /* The match succeeded, but the output vector wasn't big enough. This
- should not happen. */
- error_number = (int)ERROR::INSUFFICIENT_OVECTOR;
- rc = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::get_ovector_count(match_data);
- }
-
- //match succeded at ovector[0]
- if(vec_soff) vec_soff->push_back(ovector[0]);
- if(vec_eoff) vec_eoff->push_back(ovector[1]);
-
- /* As before, get substrings stored in the output vector by number, and then
- also any named substrings. */
- // Get numbered substrings if vec_num isn't null
- if (vec_num) { //must do null check
- if(!getNumberedSubstrings(rc, subject, ovector))
- return count;
- }
- if (vec_nas || vec_ntn) {
- //must call this whether we have named substrings or not:
- if(!getNamedSubstrings(namecount, name_entry_size, name_table, subject, ovector))
- return count;
- }
- } /* End of loop to find second and subsequent matches */
-
- if(mdc)
- Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::match_data_free(match_data);
- // Must not free code. This function has no right to modify regex.
- return count;
- }
- #undef JPCRE2_VECTOR_DATA_ASSERT
- #undef JPCRE2_UNUSED
- //some macro documentation for doxygen
- #ifdef __DOXYGEN__
- #ifndef JPCRE2_USE_FUNCTION_POINTER_CALLBACK
- #define JPCRE2_USE_FUNCTION_POINTER_CALLBACK
- #endif
- #ifndef JPCRE2_NDEBUG
- #define JPCRE2_NDEBUG
- #endif
- ///@def JPCRE2_USE_FUNCTION_POINTER_CALLBACK
- ///Use function pointer in all cases for MatchEvaluatorCallback function.
- ///By default function pointer is used for callback in MatchEvaluator when using <C++11 compiler, but for
- ///`>=C++11` compiler `std::function` instead of function pointer is used.
- ///If this macro is defined before including jpcre2.hpp, function pointer will be used in all cases.
- ///It you are using lambda function with captures, stick with `std::function`, on the other hand, if
- ///you are using older compilers, you might want to use function pointer instead.
- ///
- ///For example, with gcc-4.7, `std::function` will give compile error in C++11 mode, in such cases where full C++11
- ///support is not available, use function pointer.
- ///@def JPCRE2_ASSERT(cond, msg)
- ///Macro to call `jpcre2::jassert()` with file path and line number.
- ///When `NDEBUG` or `JPCRE2_NDEBUG` is defined before including this header, this macro will
- ///be defined as `((void)0)` thus eliminating this assertion.
- ///@param cond condtion (boolean)
- ///@param msg message
- ///@def JPCRE2_NDEBUG
- ///Macro to remove debug codes.
- ///Using this macro is discouraged even in production mode but provided for completeness.
- ///You should not use this macro to bypass any error in your program.
- ///Define this macro before including this header if you want to remove debug codes included in this library.
- ///
- ///Using the standard `NDEBUG` macro will have the same effect,
- ///but it is recommended that you use `JPCRE2_NDEBUG` to strip out debug codes specifically for this library.
- #endif
- #endif
|