lexer.cpp 27 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010
  1. /** BEGIN COPYRIGHT BLOCK
  2. * This Program is free software; you can redistribute it and/or modify it under
  3. * the terms of the GNU General Public License as published by the Free Software
  4. * Foundation; version 2 of the License.
  5. *
  6. * This Program is distributed in the hope that it will be useful, but WITHOUT
  7. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  8. * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  9. *
  10. * You should have received a copy of the GNU General Public License along with
  11. * this Program; if not, write to the Free Software Foundation, Inc., 59 Temple
  12. * Place, Suite 330, Boston, MA 02111-1307 USA.
  13. *
  14. * In addition, as a special exception, Red Hat, Inc. gives You the additional
  15. * right to link the code of this Program with code not covered under the GNU
  16. * General Public License ("Non-GPL Code") and to distribute linked combinations
  17. * including the two, subject to the limitations in this paragraph. Non-GPL Code
  18. * permitted under this exception must only link to the code of this Program
  19. * through those well defined interfaces identified in the file named EXCEPTION
  20. * found in the source code files (the "Approved Interfaces"). The files of
  21. * Non-GPL Code may instantiate templates or use macros or inline functions from
  22. * the Approved Interfaces without causing the resulting work to be covered by
  23. * the GNU General Public License. Only Red Hat, Inc. may make changes or
  24. * additions to the list of Approved Interfaces. You must obey the GNU General
  25. * Public License in all respects for all of the Program code and other code used
  26. * in conjunction with the Program except the Non-GPL Code covered by this
  27. * exception. If you modify this file, you may extend this exception to your
  28. * version of the file, but you are not obligated to do so. If you do not wish to
  29. * provide this exception without modification, you must delete this exception
  30. * statement from your version and license this file solely under the GPL without
  31. * exception.
  32. *
  33. *
  34. * Copyright (C) 2001 Sun Microsystems, Inc. Used by permission.
  35. * Copyright (C) 2005 Red Hat, Inc.
  36. * All rights reserved.
  37. * END COPYRIGHT BLOCK **/
  38. /*
  39. * Description (lexer.c)
  40. *
  41. * This module provides functions to assist parsers in lexical
  42. * analysis. The idea is to provide a slightly higher-level
  43. * interface than that of ctype.h.
  44. */
  45. #include "netsite.h"
  46. #include "prlog.h"
  47. #include "lexer_pvt.h"
  48. #include "base/lexer.h"
  49. /*
  50. * Description (lex_class_check)
  51. *
  52. * This function checks whether a given character belongs to one or
  53. * specified character classes.
  54. *
  55. * Arguments:
  56. *
  57. * chtab - character class table pointer
  58. * code - character code to be tested
  59. * cbits - bit mask of character classes
  60. *
  61. * Returns:
  62. *
  63. * The return value is zero if the code is not in any of the character
  64. * classes. It is non-zero, if the code is in at least one of the
  65. * classes.
  66. */
  67. NSAPI_PUBLIC
  68. int lex_class_check(void * chtab, char code, unsigned long cbits)
  69. {
  70. LEXClassTab_t * lct; /* character class table pointer */
  71. unsigned char * bp; /* bit vector pointer */
  72. int rv = 0; /* return value */
  73. int i; /* loop index */
  74. lct = (LEXClassTab_t *)chtab;
  75. bp = lct->lct_bv + code * lct->lct_bvbytes;
  76. for (i = 0; i < lct->lct_bvbytes; ++i) {
  77. if (*bp++ & cbits) {
  78. rv = 1;
  79. break;
  80. }
  81. cbits >>= 8;
  82. }
  83. return rv;
  84. }
  85. /*
  86. * Description (lex_class_create)
  87. *
  88. * This function creates a new character class table. A
  89. * character class table is used to map a character code to a
  90. * set of character classes. The mapping for a given character
  91. * is expressed as a bit vector, where each bit indicates the
  92. * membership of that character in one of the character classes.
  93. *
  94. * Arguments:
  95. *
  96. * classc - the number of character classes being defined
  97. * classv - pointers to null-terminated strings containing
  98. * the character codes in each character class
  99. * pchtab - indicates where to store a returned handle for
  100. * the character class table
  101. *
  102. * Returns:
  103. *
  104. * If successful, the return value is the number of character
  105. * classes specified (classc), and a handle for the created table
  106. * is returned through pchtab.
  107. *
  108. * Usage Notes:
  109. *
  110. * Null (\000) can never be in any character classes, since it
  111. * marks the end of the classv[] strings.
  112. *
  113. * classv[] can included NULL pointers, in which case bits will be
  114. * allocated for corresponding empty character classes.
  115. */
  116. NSAPI_PUBLIC
  117. int lex_class_create(int classc, char * classv[], void **pchtab)
  118. {
  119. int ncodes = 128; /* number of character encodings */
  120. int bvbytes; /* bytes per bit vector */
  121. LEXClassTab_t * ct; /* class table pointer */
  122. unsigned char * bp; /* bit vector pointer */
  123. char * cp; /* class string pointer */
  124. int bitmask; /* class bit mask */
  125. int bnum; /* byte number in bit vector */
  126. int ci; /* character index */
  127. int i; /* class index */
  128. /* Get number of bytes per bit vector */
  129. PR_ASSERT(classc > 0);
  130. bvbytes = (classc + 7) >> 3;
  131. /* Allocate the character class table */
  132. ct = (LEXClassTab_t *)calloc(1, sizeof(LEXClassTab_t) + ncodes * bvbytes);
  133. if (ct == NULL) {
  134. /* Error - insufficient memory */
  135. return LEXERR_MALLOC;
  136. }
  137. /* Initialize the class table */
  138. ct->lct_classc = classc;
  139. ct->lct_bvbytes = bvbytes;
  140. ct->lct_bv = (unsigned char *)(ct + 1);
  141. /* Initialize the bit vectors */
  142. for (i = 0; i < classc; ++i) {
  143. cp = classv[i];
  144. if (cp != NULL) {
  145. bitmask = 1 << (i & 7);
  146. bnum = i >> 7;
  147. while ((ci = *cp++) != 0) {
  148. bp = ct->lct_bv + ci + bnum;
  149. *bp |= bitmask;
  150. }
  151. }
  152. }
  153. /* Return pointer to table */
  154. PR_ASSERT(pchtab != NULL);
  155. *pchtab = (void *)ct;
  156. return classc;
  157. }
  158. NSAPI_PUBLIC
  159. void lex_class_destroy(void * chtab)
  160. {
  161. FREE((void *)chtab);
  162. }
  163. NSAPI_PUBLIC
  164. LEXStream_t * lex_stream_create(LEXStreamGet_t strmget, void * strmid,
  165. char * buf, int buflen)
  166. {
  167. LEXStream_t * lst; /* stream structure pointer */
  168. /* Allocate the stream structure */
  169. lst = (LEXStream_t *)MALLOC(sizeof(LEXStream_t));
  170. if (lst == NULL) {
  171. /* Error - insufficient memory */
  172. return 0;
  173. }
  174. lst->lst_strmid = strmid;
  175. lst->lst_get = strmget;
  176. /*
  177. * Allocate a buffer for the stream if there's a positive length
  178. * but a NULL buffer pointer.
  179. */
  180. if ((buflen > 0) && (buf == NULL)) {
  181. buf = (char *)MALLOC(buflen);
  182. if (buf == NULL) {
  183. FREE((void *)lst);
  184. return 0;
  185. }
  186. /* Also initialize the current position and residual length */
  187. lst->lst_cp = buf;
  188. lst->lst_len = 0;
  189. lst->lst_flags = LST_FREEBUF;
  190. }
  191. lst->lst_buf = buf;
  192. lst->lst_buflen = buflen;
  193. return lst;
  194. }
  195. NSAPI_PUBLIC
  196. void lex_stream_destroy(LEXStream_t * lst)
  197. {
  198. if ((lst->lst_flags & LST_FREEBUF) && (lst->lst_buf != NULL)) {
  199. FREE(lst->lst_buf);
  200. }
  201. FREE((void *)lst);
  202. }
  203. /*
  204. * Description (lex_token_new)
  205. *
  206. * This function creates a new token object. A token object is
  207. * used to accumulate text in an associated buffer. If the
  208. * 'growlen' argument is specified as a value that is greater
  209. * than zero, then the token buffer will be reallocated as
  210. * necessary to accomodate more text. The initial size of
  211. * the token buffer is given by 'initlen', which may be zero,
  212. * and should be zero if lex_token_setbuf() is used.
  213. *
  214. * The token object is allocated from the memory pool given
  215. * by the 'pool' argument. The default pool for the current
  216. * thread is used if 'pool' is null.
  217. *
  218. * Arguments:
  219. *
  220. * pool - handle for memory pool to be used
  221. * initlen - initial length of token buffer
  222. * growlen - amount to grow a full token buffer
  223. * token - pointer to returned token handle
  224. *
  225. * Returns:
  226. *
  227. * If successful, the function return value is zero and a handle
  228. * for the new token is returned via 'token'. Otherwise a negative
  229. * error code is returned.
  230. */
  231. NSAPI_PUBLIC
  232. int lex_token_new(pool_handle_t * pool, int initlen, int growlen, void **token)
  233. {
  234. LEXToken_t * lt; /* new token pointer */
  235. /* Allocate the token structure */
  236. if (pool) {
  237. lt = (LEXToken_t *)pool_calloc(pool, 1, sizeof(LEXToken_t));
  238. }
  239. else {
  240. lt = (LEXToken_t *)CALLOC(sizeof(LEXToken_t));
  241. }
  242. if (lt == NULL) {
  243. /* Error - insufficient memory */
  244. return LEXERR_MALLOC;
  245. }
  246. /* Save the memory pool handle for future allocations */
  247. lt->lt_mempool = pool;
  248. /* Allocate the initial token buffer if initlen > 0 */
  249. if (initlen > 0) {
  250. if (pool) {
  251. lt->lt_buf = (char *)pool_malloc(pool, initlen);
  252. }
  253. else {
  254. lt->lt_buf = (char *)MALLOC(initlen);
  255. }
  256. if (lt->lt_buf == NULL) {
  257. /* Error - insufficient memory */
  258. if (pool) {
  259. pool_free(pool, (void *)lt);
  260. }
  261. else {
  262. FREE((void *)lt);
  263. }
  264. return LEXERR_MALLOC;
  265. }
  266. lt->lt_initlen = initlen;
  267. lt->lt_buflen = initlen;
  268. lt->lt_buf[0] = 0;
  269. }
  270. if (growlen > 0) lt->lt_inclen = growlen;
  271. PR_ASSERT(token != NULL);
  272. *token = (void *)lt;
  273. return 0;
  274. }
  275. /*
  276. * Description (lex_token_start)
  277. *
  278. * This function discards any current contents of the token buffer
  279. * associated with a specified token object, so that any new data
  280. * appended to the token will start at the beginning of the token
  281. * buffer. If there is no token buffer currently associated with
  282. * the token, and the 'initlen' value specified to lex_token_new()
  283. * was greater than zero, then a new token buffer is allocated.
  284. * This function enables a token and optionally its token buffer
  285. * to be reused.
  286. *
  287. * Arguments:
  288. *
  289. * token - handle for token object
  290. *
  291. * Returns:
  292. *
  293. * If successful, the function return value is zero. Otherwise
  294. * a negative error code is returned.
  295. */
  296. NSAPI_PUBLIC int
  297. lex_token_start(void * token)
  298. {
  299. LEXToken_t * lt = (LEXToken_t *)token; /* token pointer */
  300. /* Do we need to allocate a token buffer? */
  301. if ((lt->lt_buf == NULL) && (lt->lt_initlen > 0)) {
  302. /* Allocate the initial token buffer */
  303. if (lt->lt_mempool) {
  304. lt->lt_buf = (char *)pool_malloc(lt->lt_mempool, lt->lt_initlen);
  305. }
  306. else {
  307. lt->lt_buf = (char *)MALLOC(lt->lt_initlen);
  308. }
  309. if (lt->lt_buf == NULL) {
  310. /* Error - insufficient memory */
  311. return LEXERR_MALLOC;
  312. }
  313. lt->lt_buflen = lt->lt_initlen;
  314. }
  315. lt->lt_len = 0;
  316. lt->lt_buf[0] = 0;
  317. return 0;
  318. }
  319. /*
  320. * Description (lex_token_info)
  321. *
  322. * This function returns information about the token buffer currently
  323. * associated with a token object. This includes a pointer to the
  324. * token data, if any, the current length of the token data, and the
  325. * current size of the token buffer.
  326. *
  327. * Arguments:
  328. *
  329. * token - handle for token object
  330. * tdatalen - pointer to returned token data length
  331. * (may be null)
  332. * tbufflen - pointer to returned token buffer length
  333. * (may be null)
  334. *
  335. * Returns:
  336. *
  337. * The function return value is a pointer to the beginning of the
  338. * token data, or null if there is no token buffer associated with
  339. * the token. The token data length and token buffer length are
  340. * returned via 'tdatalen' and 'tbufflen', respectively.
  341. */
  342. NSAPI_PUBLIC
  343. char * lex_token_info(void * token, int * tdatalen, int * tbufflen)
  344. {
  345. LEXToken_t * lt = (LEXToken_t *)token; /* token pointer */
  346. if (tdatalen) *tdatalen = lt->lt_len;
  347. if (tbufflen) *tbufflen = lt->lt_buflen;
  348. return lt->lt_buf;
  349. }
  350. /*
  351. * Description (lex_token)
  352. *
  353. * This function returns a pointer to the current token buffer, if any.
  354. * If the length of the token is also needed, use lex_token_info().
  355. * This function would normally be used when the token is a
  356. * null-terminated string. See also lex_token_take().
  357. *
  358. * Arguments:
  359. *
  360. * token - handle for token object
  361. *
  362. * Returns:
  363. *
  364. * A pointer to the beginning of the current token is returned.
  365. * The pointer is null if no token buffer is currently associated
  366. * with the token object.
  367. */
  368. NSAPI_PUBLIC
  369. char * lex_token(void * token)
  370. {
  371. LEXToken_t * lt = (LEXToken_t *)token; /* token pointer */
  372. return lt->lt_buf;
  373. }
  374. /*
  375. * Description (lex_token_destroy)
  376. *
  377. * This function destroys a specified token object. The memory
  378. * associated with the token object and its token buffer, if any,
  379. * is freed to whence it came. Note that token objects can be
  380. * associated with a memory pool, and destroyed implicitly when
  381. * the pool is destroyed via pool_destroy().
  382. *
  383. * Arguments:
  384. *
  385. * token - handle for token object
  386. */
  387. NSAPI_PUBLIC
  388. void lex_token_destroy(void * token)
  389. {
  390. LEXToken_t * lt = (LEXToken_t *)token; /* token pointer */
  391. if (lt) {
  392. if (lt->lt_mempool) {
  393. if (lt->lt_buf) {
  394. pool_free(lt->lt_mempool, (void *)(lt->lt_buf));
  395. }
  396. pool_free(lt->lt_mempool, (void *)lt);
  397. }
  398. else {
  399. if (lt->lt_buf) {
  400. FREE(lt->lt_buf);
  401. }
  402. FREE(lt);
  403. }
  404. }
  405. }
  406. /*
  407. * Description (lex_token_get)
  408. *
  409. * This function returns a pointer to the current token buffer,
  410. * leaving the token with no associated token buffer. The caller
  411. * assumes ownership of the returned token buffer. The length
  412. * of the token data and the length of the token buffer are returned
  413. * if requested. Note that lex_token_take() performs a similar
  414. * operation.
  415. *
  416. * Arguments:
  417. *
  418. * token - handle for token object
  419. * tdatalen - pointer to returned token data length
  420. * (may be null)
  421. * tbufflen - pointer to returned token buffer length
  422. * (may be null)
  423. *
  424. * Returns:
  425. *
  426. * The function return value is a pointer to the beginning of the
  427. * token data, or null if there is no token buffer associated with
  428. * the token. The token data length and token buffer length are
  429. * returned via 'tdatalen' and 'tbufflen', respectively.
  430. */
  431. NSAPI_PUBLIC
  432. char * lex_token_get(void * token, int * tdatalen, int * tbufflen)
  433. {
  434. LEXToken_t * lt = (LEXToken_t *)token; /* token pointer */
  435. char * tokenstr;
  436. tokenstr = lt->lt_buf;
  437. if (tdatalen) *tdatalen = lt->lt_len;
  438. if (tbufflen) *tbufflen = lt->lt_buflen;
  439. lt->lt_buf = NULL;
  440. lt->lt_buflen = 0;
  441. lt->lt_len = 0;
  442. return tokenstr;
  443. }
  444. /*
  445. * Description (lex_token_take)
  446. *
  447. * This function returns a pointer to the current token buffer,
  448. * leaving the token with no associated token buffer. The caller
  449. * assumes ownership of the returned token buffer. Note that
  450. * lex_token_get() performs a similar operation, but returns more
  451. * information.
  452. *
  453. * Arguments:
  454. *
  455. * token - handle for token object
  456. *
  457. * Returns:
  458. *
  459. * A pointer to the beginning of the current token is returned.
  460. * The pointer is null if no token buffer is currently associated
  461. * with the token object.
  462. */
  463. NSAPI_PUBLIC
  464. char * lex_token_take(void * token)
  465. {
  466. LEXToken_t * lt = (LEXToken_t *)token; /* token pointer */
  467. char * tokenstr;
  468. tokenstr = lt->lt_buf;
  469. lt->lt_buf = NULL;
  470. lt->lt_buflen = 0;
  471. lt->lt_len = 0;
  472. return tokenstr;
  473. }
  474. /*
  475. * Description (lex_token_append)
  476. *
  477. * This function appends data to the end of a token. If 'growlen'
  478. * was specified as a greater-than-zero value for lex_token_new(),
  479. * then the token buffer may be reallocated to accomodate the
  480. * new data if necessary. A null byte is maintained in the token
  481. * buffer following the token data, but it is not included in the
  482. * token data length.
  483. *
  484. * Arguments:
  485. *
  486. * token - handle for token object
  487. * nbytes - number of bytes of new data
  488. * src - pointer to new data
  489. *
  490. * Returns:
  491. *
  492. * If successful, the function return value is the new length of
  493. * the token data. Otherwise a negative error code is returned.
  494. */
  495. NSAPI_PUBLIC
  496. int lex_token_append(void * token, int nbytes, char * src)
  497. {
  498. LEXToken_t * lt = (LEXToken_t *)token; /* token pointer */
  499. int bufsize;
  500. int length;
  501. PR_ASSERT(nbytes >= 0);
  502. PR_ASSERT((src != NULL) || (nbytes == 0));
  503. if (nbytes > 0) {
  504. bufsize = lt->lt_buflen;
  505. length = lt->lt_len + nbytes;
  506. if (length >= bufsize) {
  507. while (length >= bufsize) {
  508. bufsize += lt->lt_inclen;
  509. }
  510. if (lt->lt_mempool) {
  511. if (lt->lt_buf) {
  512. lt->lt_buf = (char *)pool_realloc(lt->lt_mempool,
  513. lt->lt_buf, bufsize);
  514. }
  515. else {
  516. lt->lt_buf = (char *)pool_malloc(lt->lt_mempool, bufsize);
  517. }
  518. }
  519. else {
  520. if (lt->lt_buf) {
  521. lt->lt_buf = (char *)REALLOC(lt->lt_buf, bufsize);
  522. }
  523. else {
  524. lt->lt_buf = (char *)MALLOC(bufsize);
  525. }
  526. }
  527. }
  528. if (lt->lt_buf) {
  529. memcpy((void *)(lt->lt_buf + lt->lt_len), (void *)src, nbytes);
  530. lt->lt_buf[length] = 0;
  531. lt->lt_len = length;
  532. lt->lt_buflen = bufsize;
  533. }
  534. else {
  535. /* Error - insufficient memory */
  536. return LEXERR_MALLOC;
  537. }
  538. }
  539. return lt->lt_len;
  540. }
  541. NSAPI_PUBLIC
  542. int lex_next_char(LEXStream_t * lst, void * chtab, unsigned long cbits)
  543. {
  544. LEXClassTab_t * lct; /* character class table pointer */
  545. unsigned char * bp; /* bit vector pointer */
  546. unsigned long bitmask; /* class bit mask temporary */
  547. int rv; /* return value */
  548. int i; /* loop index */
  549. lct = (LEXClassTab_t *)chtab;
  550. /* Go get more stream data if none left in the buffer */
  551. if (lst->lst_len <= 0) {
  552. rv = (*lst->lst_get)(lst);
  553. if (rv <= 0) {
  554. return rv;
  555. }
  556. }
  557. /* Get the next character from the buffer */
  558. rv = *lst->lst_cp;
  559. bitmask = cbits;
  560. bp = lct->lct_bv + rv * lct->lct_bvbytes;
  561. for (i = 0; i < lct->lct_bvbytes; ++i) {
  562. if (*bp++ & bitmask) {
  563. /* Update the buffer pointer and length */
  564. lst->lst_cp += 1;
  565. lst->lst_len -= 1;
  566. break;
  567. }
  568. bitmask >>= 8;
  569. }
  570. return rv;
  571. }
  572. NSAPI_PUBLIC
  573. int lex_scan_over(LEXStream_t * lst, void * chtab, unsigned long cbits,
  574. void * token)
  575. {
  576. LEXClassTab_t * lct; /* character class table pointer */
  577. char * cp; /* current pointer in stream buffer */
  578. unsigned char * bp; /* bit vector pointer */
  579. unsigned long bitmask; /* class bit mask temporary */
  580. int cv = 0; /* current character value */
  581. int rv = 0; /* return value */
  582. int slen; /* token segment length */
  583. int done = 0; /* done indication */
  584. int i; /* loop index */
  585. lct = (LEXClassTab_t *)chtab;
  586. while (!done) {
  587. /* Go get more stream data if none left in the buffer */
  588. if (lst->lst_len <= 0) {
  589. rv = (*lst->lst_get)(lst);
  590. if (rv <= 0) {
  591. return rv;
  592. }
  593. }
  594. slen = 0;
  595. cp = lst->lst_cp;
  596. while (slen < lst->lst_len) {
  597. cv = *cp;
  598. bitmask = cbits;
  599. bp = lct->lct_bv + cv * lct->lct_bvbytes;
  600. for (i = 0; i < lct->lct_bvbytes; ++i) {
  601. if (*bp++ & bitmask) goto more_token;
  602. bitmask >>= 8;
  603. }
  604. done = 1;
  605. break;
  606. more_token:
  607. slen += 1;
  608. cp += 1;
  609. }
  610. /* If the current segment is not empty, append it to the token */
  611. if (slen > 0) {
  612. rv = lex_token_append(token, slen, lst->lst_cp);
  613. if (rv < 0) break;
  614. /* Update the stream buffer pointer and length */
  615. lst->lst_cp += slen;
  616. lst->lst_len -= slen;
  617. }
  618. }
  619. return ((rv < 0) ? rv : cv);
  620. }
  621. /*
  622. * Description (lex_scan_string)
  623. *
  624. * This function parses a quoted string into the specified token.
  625. * The current character in the LEX stream is taken to be the
  626. * beginning quote character. The quote character may be included
  627. * in the string by preceding it with a '\'. Any newline
  628. * characters to be included in the string must also be preceded
  629. * by '\'. The string is terminated by another occurrence of the
  630. * quote character, or an unquoted newline, or EOF.
  631. *
  632. * Arguments:
  633. *
  634. * lst - pointer to LEX stream structure
  635. * token - handle for token
  636. * flags - bit flags (unused - must be zero)
  637. *
  638. * Returns:
  639. *
  640. * The terminating character is returned, or zero if EOF. The
  641. * string is returned in the token, without the beginning and
  642. * ending quote characters. An error is indicated by a negative
  643. * return value.
  644. */
  645. NSAPI_PUBLIC
  646. int lex_scan_string(LEXStream_t * lst, void * token, int flags)
  647. {
  648. char * cp; /* current pointer in stream buffer */
  649. int cv; /* current character value */
  650. int rv; /* return value */
  651. int slen; /* token segment length */
  652. int done = 0; /* done indication */
  653. int cquote = 0; /* character quote indication */
  654. int qchar = -1; /* quote character */
  655. while (!done) {
  656. /* Go get more stream data if none left in the buffer */
  657. if (lst->lst_len <= 0) {
  658. rv = (*lst->lst_get)(lst);
  659. if (rv <= 0) {
  660. return rv;
  661. }
  662. }
  663. slen = 0;
  664. cp = lst->lst_cp;
  665. while (slen < lst->lst_len) {
  666. /* Get the next character */
  667. cv = *cp;
  668. /* Pick up the quote character if we don't have it yet */
  669. if (qchar < 0) {
  670. qchar = cv;
  671. /* Don't include it in the string */
  672. lst->lst_cp += 1;
  673. lst->lst_len -= 1;
  674. cp += 1;
  675. continue;
  676. }
  677. /* cquote is 1 if the last character was '\' */
  678. if (cquote == 0) {
  679. /* Is this a string terminator? */
  680. if ((cv == qchar) || (cv == '\n')) {
  681. /* Append whatever we have to this point */
  682. if (slen > 0) goto append_it;
  683. /*
  684. * If the terminator is the expected quote character,
  685. * just skip it. If it's anything else, leave it as
  686. * the current character.
  687. */
  688. if (cv == qchar) {
  689. lst->lst_cp += 1;
  690. lst->lst_len -= 1;
  691. }
  692. done = 1;
  693. goto append_it;
  694. }
  695. /* Got the character quote character? */
  696. if (cv == '\\') {
  697. /* Append anything we have so far first */
  698. if (slen > 0) goto append_it;
  699. /* Then skip the character */
  700. cquote = 1;
  701. lst->lst_cp += 1;
  702. lst->lst_len -= 1;
  703. cp += 1;
  704. continue;
  705. }
  706. }
  707. else {
  708. /* Include any character following '\' */
  709. cquote = 0;
  710. }
  711. /* Include this character in the string */
  712. slen += 1;
  713. cp += 1;
  714. }
  715. append_it:
  716. /* If the current segment is not empty, append it to the token */
  717. if (slen > 0) {
  718. rv = lex_token_append(token, slen, lst->lst_cp);
  719. if (rv < 0) break;
  720. /* Update the stream buffer pointer and length */
  721. lst->lst_cp += slen;
  722. lst->lst_len -= slen;
  723. }
  724. }
  725. return ((rv < 0) ? rv : cv);
  726. }
  727. NSAPI_PUBLIC
  728. int lex_scan_to(LEXStream_t * lst, void * chtab, unsigned long cbits,
  729. void * token)
  730. {
  731. LEXClassTab_t * lct; /* character class table pointer */
  732. unsigned char * bp; /* bit vector pointer */
  733. char * cp; /* current pointer in stream buffer */
  734. unsigned long bitmask; /* class bit mask temporary */
  735. int cv = 0; /* current character value */
  736. int rv = 0; /* return value */
  737. int slen; /* token segment length */
  738. int done = 0; /* done indication */
  739. int i; /* loop index */
  740. lct = (LEXClassTab_t *)chtab;
  741. while (!done) {
  742. /* Go get more stream data if none left in the buffer */
  743. if (lst->lst_len <= 0) {
  744. rv = (*lst->lst_get)(lst);
  745. if (rv <= 0) {
  746. return rv;
  747. }
  748. }
  749. slen = 0;
  750. cp = lst->lst_cp;
  751. while (slen < lst->lst_len) {
  752. cv = *cp;
  753. bitmask = cbits;
  754. bp = lct->lct_bv + cv * lct->lct_bvbytes;
  755. for (i = 0; i < lct->lct_bvbytes; ++i) {
  756. if (*bp++ & bitmask) {
  757. done = 1;
  758. goto append_it;
  759. }
  760. bitmask >>= 8;
  761. }
  762. slen += 1;
  763. cp += 1;
  764. }
  765. append_it:
  766. /* If the current segment is not empty, append it to the token */
  767. if (slen > 0) {
  768. rv = lex_token_append(token, slen, lst->lst_cp);
  769. if (rv < 0) break;
  770. /* Update the stream buffer pointer and length */
  771. lst->lst_cp += slen;
  772. lst->lst_len -= slen;
  773. }
  774. }
  775. return ((rv < 0) ? rv : cv);
  776. }
  777. NSAPI_PUBLIC
  778. int lex_skip_over(LEXStream_t * lst, void * chtab, unsigned long cbits)
  779. {
  780. LEXClassTab_t * lct; /* character class table pointer */
  781. unsigned char * bp; /* bit vector pointer */
  782. char * cp; /* current pointer in stream buffer */
  783. unsigned long bitmask; /* class bit mask temporary */
  784. int rv = 0; /* return value */
  785. int slen; /* token segment length */
  786. int done = 0; /* done indication */
  787. int i; /* loop index */
  788. lct = (LEXClassTab_t *)chtab;
  789. while (!done) {
  790. /* Go get more stream data if none left in the buffer */
  791. if (lst->lst_len <= 0) {
  792. rv = (*lst->lst_get)(lst);
  793. if (rv <= 0) {
  794. return rv;
  795. }
  796. }
  797. slen = 0;
  798. cp = lst->lst_cp;
  799. while (slen < lst->lst_len) {
  800. rv = *cp;
  801. bitmask = cbits;
  802. bp = lct->lct_bv + rv * lct->lct_bvbytes;
  803. for (i = 0; i < lct->lct_bvbytes; ++i) {
  804. if (*bp++ & bitmask) goto next_ch;
  805. bitmask >>= 8;
  806. }
  807. done = 1;
  808. break;
  809. next_ch:
  810. slen += 1;
  811. cp += 1;
  812. }
  813. if (slen > 0) {
  814. /* Update the stream buffer pointer and length */
  815. lst->lst_cp += slen;
  816. lst->lst_len -= slen;
  817. }
  818. }
  819. return rv;
  820. }
  821. NSAPI_PUBLIC
  822. int lex_skip_to(LEXStream_t * lst, void * chtab, unsigned long cbits)
  823. {
  824. LEXClassTab_t * lct; /* character class table pointer */
  825. unsigned char * bp; /* bit vector pointer */
  826. char * cp; /* current pointer in stream buffer */
  827. unsigned long bitmask; /* class bit mask temporary */
  828. int rv; /* return value */
  829. int slen; /* token segment length */
  830. int done = 0; /* done indication */
  831. int i; /* loop index */
  832. lct = (LEXClassTab_t *)chtab;
  833. while (!done) {
  834. /* Go get more stream data if none left in the buffer */
  835. if (lst->lst_len <= 0) {
  836. rv = (*lst->lst_get)(lst);
  837. if (rv <= 0) {
  838. return rv;
  839. }
  840. }
  841. slen = 0;
  842. cp = lst->lst_cp;
  843. while (slen < lst->lst_len) {
  844. rv = *cp;
  845. bitmask = cbits;
  846. bp = lct->lct_bv + rv * lct->lct_bvbytes;
  847. for (i = 0; i < lct->lct_bvbytes; ++i) {
  848. if (*bp++ & bitmask) {
  849. done = 1;
  850. goto update_it;
  851. }
  852. bitmask >>= 8;
  853. }
  854. slen += 1;
  855. cp += 1;
  856. }
  857. update_it:
  858. /* Update the stream buffer pointer and length */
  859. if (slen > 0) {
  860. lst->lst_cp += slen;
  861. lst->lst_len -= slen;
  862. }
  863. }
  864. return rv;
  865. }