lexer.cpp 27 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015
  1. /** BEGIN COPYRIGHT BLOCK
  2. * This Program is free software; you can redistribute it and/or modify it under
  3. * the terms of the GNU General Public License as published by the Free Software
  4. * Foundation; version 2 of the License.
  5. *
  6. * This Program is distributed in the hope that it will be useful, but WITHOUT
  7. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  8. * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  9. *
  10. * You should have received a copy of the GNU General Public License along with
  11. * this Program; if not, write to the Free Software Foundation, Inc., 59 Temple
  12. * Place, Suite 330, Boston, MA 02111-1307 USA.
  13. *
  14. * In addition, as a special exception, Red Hat, Inc. gives You the additional
  15. * right to link the code of this Program with code not covered under the GNU
  16. * General Public License ("Non-GPL Code") and to distribute linked combinations
  17. * including the two, subject to the limitations in this paragraph. Non-GPL Code
  18. * permitted under this exception must only link to the code of this Program
  19. * through those well defined interfaces identified in the file named EXCEPTION
  20. * found in the source code files (the "Approved Interfaces"). The files of
  21. * Non-GPL Code may instantiate templates or use macros or inline functions from
  22. * the Approved Interfaces without causing the resulting work to be covered by
  23. * the GNU General Public License. Only Red Hat, Inc. may make changes or
  24. * additions to the list of Approved Interfaces. You must obey the GNU General
  25. * Public License in all respects for all of the Program code and other code used
  26. * in conjunction with the Program except the Non-GPL Code covered by this
  27. * exception. If you modify this file, you may extend this exception to your
  28. * version of the file, but you are not obligated to do so. If you do not wish to
  29. * provide this exception without modification, you must delete this exception
  30. * statement from your version and license this file solely under the GPL without
  31. * exception.
  32. *
  33. *
  34. * Copyright (C) 2001 Sun Microsystems, Inc. Used by permission.
  35. * Copyright (C) 2005 Red Hat, Inc.
  36. * All rights reserved.
  37. * END COPYRIGHT BLOCK **/
  38. #ifdef HAVE_CONFIG_H
  39. # include <config.h>
  40. #endif
  41. /*
  42. * Description (lexer.c)
  43. *
  44. * This module provides functions to assist parsers in lexical
  45. * analysis. The idea is to provide a slightly higher-level
  46. * interface than that of ctype.h.
  47. */
  48. #include "netsite.h"
  49. #include "prlog.h"
  50. #include "lexer_pvt.h"
  51. #include "base/lexer.h"
  52. /*
  53. * Description (lex_class_check)
  54. *
  55. * This function checks whether a given character belongs to one or
  56. * specified character classes.
  57. *
  58. * Arguments:
  59. *
  60. * chtab - character class table pointer
  61. * code - character code to be tested
  62. * cbits - bit mask of character classes
  63. *
  64. * Returns:
  65. *
  66. * The return value is zero if the code is not in any of the character
  67. * classes. It is non-zero, if the code is in at least one of the
  68. * classes.
  69. */
  70. NSAPI_PUBLIC
  71. int lex_class_check(void * chtab, char code, unsigned long cbits)
  72. {
  73. LEXClassTab_t * lct; /* character class table pointer */
  74. unsigned char * bp; /* bit vector pointer */
  75. int rv = 0; /* return value */
  76. int i; /* loop index */
  77. lct = (LEXClassTab_t *)chtab;
  78. bp = lct->lct_bv + code * lct->lct_bvbytes;
  79. for (i = 0; i < lct->lct_bvbytes; ++i) {
  80. if (*bp++ & cbits) {
  81. rv = 1;
  82. break;
  83. }
  84. cbits >>= 8;
  85. }
  86. return rv;
  87. }
  88. /*
  89. * Description (lex_class_create)
  90. *
  91. * This function creates a new character class table. A
  92. * character class table is used to map a character code to a
  93. * set of character classes. The mapping for a given character
  94. * is expressed as a bit vector, where each bit indicates the
  95. * membership of that character in one of the character classes.
  96. *
  97. * Arguments:
  98. *
  99. * classc - the number of character classes being defined
  100. * classv - pointers to null-terminated strings containing
  101. * the character codes in each character class
  102. * pchtab - indicates where to store a returned handle for
  103. * the character class table
  104. *
  105. * Returns:
  106. *
  107. * If successful, the return value is the number of character
  108. * classes specified (classc), and a handle for the created table
  109. * is returned through pchtab.
  110. *
  111. * Usage Notes:
  112. *
  113. * Null (\000) can never be in any character classes, since it
  114. * marks the end of the classv[] strings.
  115. *
  116. * classv[] can included NULL pointers, in which case bits will be
  117. * allocated for corresponding empty character classes.
  118. */
  119. NSAPI_PUBLIC
  120. int lex_class_create(int classc, char * classv[], void **pchtab)
  121. {
  122. int ncodes = 128; /* number of character encodings */
  123. int bvbytes; /* bytes per bit vector */
  124. LEXClassTab_t * ct; /* class table pointer */
  125. unsigned char * bp; /* bit vector pointer */
  126. char * cp; /* class string pointer */
  127. int bitmask; /* class bit mask */
  128. int bnum; /* byte number in bit vector */
  129. int ci; /* character index */
  130. int i; /* class index */
  131. /* Get number of bytes per bit vector */
  132. PR_ASSERT(classc > 0);
  133. bvbytes = (classc + 7) >> 3;
  134. /* Allocate the character class table */
  135. ct = (LEXClassTab_t *)calloc(1, sizeof(LEXClassTab_t) + ncodes * bvbytes);
  136. if (ct == NULL) {
  137. /* Error - insufficient memory */
  138. return LEXERR_MALLOC;
  139. }
  140. /* Initialize the class table */
  141. ct->lct_classc = classc;
  142. ct->lct_bvbytes = bvbytes;
  143. ct->lct_bv = (unsigned char *)(ct + 1);
  144. /* Initialize the bit vectors */
  145. for (i = 0; i < classc; ++i) {
  146. cp = classv[i];
  147. if (cp != NULL) {
  148. bitmask = 1 << (i & 7);
  149. bnum = i >> 7;
  150. while ((ci = *cp++) != 0) {
  151. bp = ct->lct_bv + ci + bnum;
  152. *bp |= bitmask;
  153. }
  154. }
  155. }
  156. /* Return pointer to table */
  157. PR_ASSERT(pchtab != NULL);
  158. *pchtab = (void *)ct;
  159. return classc;
  160. }
  161. NSAPI_PUBLIC
  162. void lex_class_destroy(void * chtab)
  163. {
  164. FREE((void *)chtab);
  165. }
  166. NSAPI_PUBLIC
  167. LEXStream_t * lex_stream_create(LEXStreamGet_t strmget, void * strmid,
  168. char * buf, int buflen)
  169. {
  170. LEXStream_t * lst; /* stream structure pointer */
  171. /* Allocate the stream structure */
  172. lst = (LEXStream_t *)MALLOC(sizeof(LEXStream_t));
  173. if (lst == NULL) {
  174. /* Error - insufficient memory */
  175. return 0;
  176. }
  177. lst->lst_strmid = strmid;
  178. lst->lst_get = strmget;
  179. /*
  180. * Allocate a buffer for the stream if there's a positive length
  181. * but a NULL buffer pointer.
  182. */
  183. if ((buflen > 0) && (buf == NULL)) {
  184. buf = (char *)MALLOC(buflen);
  185. if (buf == NULL) {
  186. FREE((void *)lst);
  187. return 0;
  188. }
  189. /* Also initialize the current position and residual length */
  190. lst->lst_cp = buf;
  191. lst->lst_len = 0;
  192. lst->lst_flags = LST_FREEBUF;
  193. }
  194. lst->lst_buf = buf;
  195. lst->lst_buflen = buflen;
  196. return lst;
  197. }
  198. NSAPI_PUBLIC
  199. void lex_stream_destroy(LEXStream_t * lst)
  200. {
  201. if ((lst->lst_flags & LST_FREEBUF) && (lst->lst_buf != NULL)) {
  202. FREE(lst->lst_buf);
  203. }
  204. FREE((void *)lst);
  205. }
  206. /*
  207. * Description (lex_token_new)
  208. *
  209. * This function creates a new token object. A token object is
  210. * used to accumulate text in an associated buffer. If the
  211. * 'growlen' argument is specified as a value that is greater
  212. * than zero, then the token buffer will be reallocated as
  213. * necessary to accomodate more text. The initial size of
  214. * the token buffer is given by 'initlen', which may be zero,
  215. * and should be zero if lex_token_setbuf() is used.
  216. *
  217. * The token object is allocated from the memory pool given
  218. * by the 'pool' argument. The default pool for the current
  219. * thread is used if 'pool' is null.
  220. *
  221. * Arguments:
  222. *
  223. * pool - handle for memory pool to be used
  224. * initlen - initial length of token buffer
  225. * growlen - amount to grow a full token buffer
  226. * token - pointer to returned token handle
  227. *
  228. * Returns:
  229. *
  230. * If successful, the function return value is zero and a handle
  231. * for the new token is returned via 'token'. Otherwise a negative
  232. * error code is returned.
  233. */
  234. NSAPI_PUBLIC
  235. int lex_token_new(pool_handle_t * pool, int initlen, int growlen, void **token)
  236. {
  237. LEXToken_t * lt; /* new token pointer */
  238. /* Allocate the token structure */
  239. if (pool) {
  240. lt = (LEXToken_t *)pool_calloc(pool, 1, sizeof(LEXToken_t));
  241. }
  242. else {
  243. lt = (LEXToken_t *)CALLOC(sizeof(LEXToken_t));
  244. }
  245. if (lt == NULL) {
  246. /* Error - insufficient memory */
  247. return LEXERR_MALLOC;
  248. }
  249. /* Save the memory pool handle for future allocations */
  250. lt->lt_mempool = pool;
  251. /* Allocate the initial token buffer if initlen > 0 */
  252. if (initlen > 0) {
  253. if (pool) {
  254. lt->lt_buf = (char *)pool_malloc(pool, initlen);
  255. }
  256. else {
  257. lt->lt_buf = (char *)MALLOC(initlen);
  258. }
  259. if (lt->lt_buf == NULL) {
  260. /* Error - insufficient memory */
  261. if (pool) {
  262. pool_free(pool, (void *)lt);
  263. }
  264. else {
  265. FREE((void *)lt);
  266. }
  267. return LEXERR_MALLOC;
  268. }
  269. lt->lt_initlen = initlen;
  270. lt->lt_buflen = initlen;
  271. lt->lt_buf[0] = 0;
  272. }
  273. if (growlen > 0) lt->lt_inclen = growlen;
  274. PR_ASSERT(token != NULL);
  275. *token = (void *)lt;
  276. return 0;
  277. }
  278. /*
  279. * Description (lex_token_start)
  280. *
  281. * This function discards any current contents of the token buffer
  282. * associated with a specified token object, so that any new data
  283. * appended to the token will start at the beginning of the token
  284. * buffer. If there is no token buffer currently associated with
  285. * the token, and the 'initlen' value specified to lex_token_new()
  286. * was greater than zero, then a new token buffer is allocated.
  287. * This function enables a token and optionally its token buffer
  288. * to be reused.
  289. *
  290. * Arguments:
  291. *
  292. * token - handle for token object
  293. *
  294. * Returns:
  295. *
  296. * If successful, the function return value is zero. Otherwise
  297. * a negative error code is returned.
  298. */
  299. NSAPI_PUBLIC int
  300. lex_token_start(void * token)
  301. {
  302. LEXToken_t * lt = (LEXToken_t *)token; /* token pointer */
  303. /* Do we need to allocate a token buffer? */
  304. if ((lt->lt_buf == NULL) && (lt->lt_initlen > 0)) {
  305. /* Allocate the initial token buffer */
  306. if (lt->lt_mempool) {
  307. lt->lt_buf = (char *)pool_malloc(lt->lt_mempool, lt->lt_initlen);
  308. }
  309. else {
  310. lt->lt_buf = (char *)MALLOC(lt->lt_initlen);
  311. }
  312. if (lt->lt_buf == NULL) {
  313. /* Error - insufficient memory */
  314. return LEXERR_MALLOC;
  315. }
  316. lt->lt_buflen = lt->lt_initlen;
  317. }
  318. lt->lt_len = 0;
  319. lt->lt_buf[0] = 0;
  320. return 0;
  321. }
  322. /*
  323. * Description (lex_token_info)
  324. *
  325. * This function returns information about the token buffer currently
  326. * associated with a token object. This includes a pointer to the
  327. * token data, if any, the current length of the token data, and the
  328. * current size of the token buffer.
  329. *
  330. * Arguments:
  331. *
  332. * token - handle for token object
  333. * tdatalen - pointer to returned token data length
  334. * (may be null)
  335. * tbufflen - pointer to returned token buffer length
  336. * (may be null)
  337. *
  338. * Returns:
  339. *
  340. * The function return value is a pointer to the beginning of the
  341. * token data, or null if there is no token buffer associated with
  342. * the token. The token data length and token buffer length are
  343. * returned via 'tdatalen' and 'tbufflen', respectively.
  344. */
  345. NSAPI_PUBLIC
  346. char * lex_token_info(void * token, int * tdatalen, int * tbufflen)
  347. {
  348. LEXToken_t * lt = (LEXToken_t *)token; /* token pointer */
  349. if (tdatalen) *tdatalen = lt->lt_len;
  350. if (tbufflen) *tbufflen = lt->lt_buflen;
  351. return lt->lt_buf;
  352. }
  353. /*
  354. * Description (lex_token)
  355. *
  356. * This function returns a pointer to the current token buffer, if any.
  357. * If the length of the token is also needed, use lex_token_info().
  358. * This function would normally be used when the token is a
  359. * null-terminated string. See also lex_token_take().
  360. *
  361. * Arguments:
  362. *
  363. * token - handle for token object
  364. *
  365. * Returns:
  366. *
  367. * A pointer to the beginning of the current token is returned.
  368. * The pointer is null if no token buffer is currently associated
  369. * with the token object.
  370. */
  371. NSAPI_PUBLIC
  372. char * lex_token(void * token)
  373. {
  374. LEXToken_t * lt = (LEXToken_t *)token; /* token pointer */
  375. return lt->lt_buf;
  376. }
  377. /*
  378. * Description (lex_token_destroy)
  379. *
  380. * This function destroys a specified token object. The memory
  381. * associated with the token object and its token buffer, if any,
  382. * is freed to whence it came. Note that token objects can be
  383. * associated with a memory pool, and destroyed implicitly when
  384. * the pool is destroyed via pool_destroy().
  385. *
  386. * Arguments:
  387. *
  388. * token - handle for token object
  389. */
  390. NSAPI_PUBLIC
  391. void lex_token_destroy(void * token)
  392. {
  393. LEXToken_t * lt = (LEXToken_t *)token; /* token pointer */
  394. if (lt) {
  395. if (lt->lt_mempool) {
  396. if (lt->lt_buf) {
  397. pool_free(lt->lt_mempool, (void *)(lt->lt_buf));
  398. }
  399. pool_free(lt->lt_mempool, (void *)lt);
  400. }
  401. else {
  402. if (lt->lt_buf) {
  403. FREE(lt->lt_buf);
  404. }
  405. FREE(lt);
  406. }
  407. }
  408. }
  409. /*
  410. * Description (lex_token_get)
  411. *
  412. * This function returns a pointer to the current token buffer,
  413. * leaving the token with no associated token buffer. The caller
  414. * assumes ownership of the returned token buffer. The length
  415. * of the token data and the length of the token buffer are returned
  416. * if requested. Note that lex_token_take() performs a similar
  417. * operation.
  418. *
  419. * Arguments:
  420. *
  421. * token - handle for token object
  422. * tdatalen - pointer to returned token data length
  423. * (may be null)
  424. * tbufflen - pointer to returned token buffer length
  425. * (may be null)
  426. *
  427. * Returns:
  428. *
  429. * The function return value is a pointer to the beginning of the
  430. * token data, or null if there is no token buffer associated with
  431. * the token. The token data length and token buffer length are
  432. * returned via 'tdatalen' and 'tbufflen', respectively.
  433. */
  434. NSAPI_PUBLIC
  435. char * lex_token_get(void * token, int * tdatalen, int * tbufflen)
  436. {
  437. LEXToken_t * lt = (LEXToken_t *)token; /* token pointer */
  438. char * tokenstr;
  439. tokenstr = lt->lt_buf;
  440. if (tdatalen) *tdatalen = lt->lt_len;
  441. if (tbufflen) *tbufflen = lt->lt_buflen;
  442. lt->lt_buf = NULL;
  443. lt->lt_buflen = 0;
  444. lt->lt_len = 0;
  445. return tokenstr;
  446. }
  447. /*
  448. * Description (lex_token_take)
  449. *
  450. * This function returns a pointer to the current token buffer,
  451. * leaving the token with no associated token buffer. The caller
  452. * assumes ownership of the returned token buffer. Note that
  453. * lex_token_get() performs a similar operation, but returns more
  454. * information.
  455. *
  456. * Arguments:
  457. *
  458. * token - handle for token object
  459. *
  460. * Returns:
  461. *
  462. * A pointer to the beginning of the current token is returned.
  463. * The pointer is null if no token buffer is currently associated
  464. * with the token object.
  465. */
  466. NSAPI_PUBLIC
  467. char * lex_token_take(void * token)
  468. {
  469. LEXToken_t * lt = (LEXToken_t *)token; /* token pointer */
  470. char * tokenstr;
  471. tokenstr = lt->lt_buf;
  472. lt->lt_buf = NULL;
  473. lt->lt_buflen = 0;
  474. lt->lt_len = 0;
  475. return tokenstr;
  476. }
  477. /*
  478. * Description (lex_token_append)
  479. *
  480. * This function appends data to the end of a token. If 'growlen'
  481. * was specified as a greater-than-zero value for lex_token_new(),
  482. * then the token buffer may be reallocated to accomodate the
  483. * new data if necessary. A null byte is maintained in the token
  484. * buffer following the token data, but it is not included in the
  485. * token data length.
  486. *
  487. * Arguments:
  488. *
  489. * token - handle for token object
  490. * nbytes - number of bytes of new data
  491. * src - pointer to new data
  492. *
  493. * Returns:
  494. *
  495. * If successful, the function return value is the new length of
  496. * the token data. Otherwise a negative error code is returned.
  497. */
  498. NSAPI_PUBLIC
  499. int lex_token_append(void * token, int nbytes, char * src)
  500. {
  501. LEXToken_t * lt = (LEXToken_t *)token; /* token pointer */
  502. int bufsize;
  503. int length;
  504. PR_ASSERT(nbytes >= 0);
  505. PR_ASSERT((src != NULL) || (nbytes == 0));
  506. if (nbytes > 0) {
  507. bufsize = lt->lt_buflen;
  508. length = lt->lt_len + nbytes;
  509. if (length >= bufsize) {
  510. while (length >= bufsize) {
  511. bufsize += lt->lt_inclen;
  512. }
  513. if (lt->lt_mempool) {
  514. if (lt->lt_buf) {
  515. lt->lt_buf = (char *)pool_realloc(lt->lt_mempool,
  516. lt->lt_buf, bufsize);
  517. }
  518. else {
  519. lt->lt_buf = (char *)pool_malloc(lt->lt_mempool, bufsize);
  520. }
  521. }
  522. else {
  523. if (lt->lt_buf) {
  524. lt->lt_buf = (char *)REALLOC(lt->lt_buf, bufsize);
  525. }
  526. else {
  527. lt->lt_buf = (char *)MALLOC(bufsize);
  528. }
  529. }
  530. }
  531. if (lt->lt_buf) {
  532. memcpy((void *)(lt->lt_buf + lt->lt_len), (void *)src, nbytes);
  533. lt->lt_buf[length] = 0;
  534. lt->lt_len = length;
  535. lt->lt_buflen = bufsize;
  536. }
  537. else {
  538. /* Error - insufficient memory */
  539. return LEXERR_MALLOC;
  540. }
  541. }
  542. return lt->lt_len;
  543. }
  544. NSAPI_PUBLIC
  545. int lex_next_char(LEXStream_t * lst, void * chtab, unsigned long cbits)
  546. {
  547. LEXClassTab_t * lct; /* character class table pointer */
  548. unsigned char * bp; /* bit vector pointer */
  549. unsigned long bitmask; /* class bit mask temporary */
  550. int rv; /* return value */
  551. int i; /* loop index */
  552. lct = (LEXClassTab_t *)chtab;
  553. /* Go get more stream data if none left in the buffer */
  554. if (lst->lst_len <= 0) {
  555. rv = (*lst->lst_get)(lst);
  556. if (rv <= 0) {
  557. return rv;
  558. }
  559. }
  560. /* Get the next character from the buffer */
  561. rv = *lst->lst_cp;
  562. bitmask = cbits;
  563. bp = lct->lct_bv + rv * lct->lct_bvbytes;
  564. for (i = 0; i < lct->lct_bvbytes; ++i) {
  565. if (*bp++ & bitmask) {
  566. /* Update the buffer pointer and length */
  567. lst->lst_cp += 1;
  568. lst->lst_len -= 1;
  569. break;
  570. }
  571. bitmask >>= 8;
  572. }
  573. return rv;
  574. }
  575. NSAPI_PUBLIC
  576. int lex_scan_over(LEXStream_t * lst, void * chtab, unsigned long cbits,
  577. void * token)
  578. {
  579. LEXClassTab_t * lct; /* character class table pointer */
  580. char * cp; /* current pointer in stream buffer */
  581. unsigned char * bp; /* bit vector pointer */
  582. unsigned long bitmask; /* class bit mask temporary */
  583. int cv = 0; /* current character value */
  584. int rv = 0; /* return value */
  585. int slen; /* token segment length */
  586. int done = 0; /* done indication */
  587. int i; /* loop index */
  588. lct = (LEXClassTab_t *)chtab;
  589. while (!done) {
  590. /* Go get more stream data if none left in the buffer */
  591. if (lst->lst_len <= 0) {
  592. rv = (*lst->lst_get)(lst);
  593. if (rv <= 0) {
  594. return rv;
  595. }
  596. }
  597. slen = 0;
  598. cp = lst->lst_cp;
  599. while (slen < lst->lst_len) {
  600. cv = *cp;
  601. bitmask = cbits;
  602. bp = lct->lct_bv + cv * lct->lct_bvbytes;
  603. for (i = 0; i < lct->lct_bvbytes; ++i) {
  604. if (*bp++ & bitmask) goto more_token;
  605. bitmask >>= 8;
  606. }
  607. done = 1;
  608. break;
  609. more_token:
  610. slen += 1;
  611. cp += 1;
  612. }
  613. /* If the current segment is not empty, append it to the token */
  614. if (slen > 0) {
  615. rv = lex_token_append(token, slen, lst->lst_cp);
  616. if (rv < 0) break;
  617. /* Update the stream buffer pointer and length */
  618. lst->lst_cp += slen;
  619. lst->lst_len -= slen;
  620. }
  621. }
  622. return ((rv < 0) ? rv : cv);
  623. }
  624. /*
  625. * Description (lex_scan_string)
  626. *
  627. * This function parses a quoted string into the specified token.
  628. * The current character in the LEX stream is taken to be the
  629. * beginning quote character. The quote character may be included
  630. * in the string by preceding it with a '\'. Any newline
  631. * characters to be included in the string must also be preceded
  632. * by '\'. The string is terminated by another occurrence of the
  633. * quote character, or an unquoted newline, or EOF.
  634. *
  635. * Arguments:
  636. *
  637. * lst - pointer to LEX stream structure
  638. * token - handle for token
  639. * flags - bit flags (unused - must be zero)
  640. *
  641. * Returns:
  642. *
  643. * The terminating character is returned, or zero if EOF. The
  644. * string is returned in the token, without the beginning and
  645. * ending quote characters. An error is indicated by a negative
  646. * return value.
  647. */
  648. NSAPI_PUBLIC
  649. int lex_scan_string(LEXStream_t * lst, void * token, int flags)
  650. {
  651. char * cp; /* current pointer in stream buffer */
  652. int cv; /* current character value */
  653. int rv; /* return value */
  654. int slen; /* token segment length */
  655. int done = 0; /* done indication */
  656. int cquote = 0; /* character quote indication */
  657. int qchar = -1; /* quote character */
  658. while (!done) {
  659. /* Go get more stream data if none left in the buffer */
  660. if (lst->lst_len <= 0) {
  661. rv = (*lst->lst_get)(lst);
  662. if (rv <= 0) {
  663. return rv;
  664. }
  665. }
  666. slen = 0;
  667. cp = lst->lst_cp;
  668. while (slen < lst->lst_len) {
  669. /* Get the next character */
  670. cv = *cp;
  671. /* Pick up the quote character if we don't have it yet */
  672. if (qchar < 0) {
  673. qchar = cv;
  674. /* Don't include it in the string */
  675. lst->lst_cp += 1;
  676. lst->lst_len -= 1;
  677. cp += 1;
  678. continue;
  679. }
  680. /* cquote is 1 if the last character was '\' */
  681. if (cquote == 0) {
  682. /* Is this a string terminator? */
  683. if ((cv == qchar) || (cv == '\n')) {
  684. /* Append whatever we have to this point */
  685. if (slen > 0) goto append_it;
  686. /*
  687. * If the terminator is the expected quote character,
  688. * just skip it. If it's anything else, leave it as
  689. * the current character.
  690. */
  691. if (cv == qchar) {
  692. lst->lst_cp += 1;
  693. lst->lst_len -= 1;
  694. }
  695. done = 1;
  696. goto append_it;
  697. }
  698. /* Got the character quote character? */
  699. if (cv == '\\') {
  700. /* Append anything we have so far first */
  701. if (slen > 0) goto append_it;
  702. /* Then skip the character */
  703. cquote = 1;
  704. lst->lst_cp += 1;
  705. lst->lst_len -= 1;
  706. cp += 1;
  707. continue;
  708. }
  709. }
  710. else {
  711. /* Include any character following '\' */
  712. cquote = 0;
  713. }
  714. /* Include this character in the string */
  715. slen += 1;
  716. cp += 1;
  717. }
  718. append_it:
  719. /* If the current segment is not empty, append it to the token */
  720. if (slen > 0) {
  721. rv = lex_token_append(token, slen, lst->lst_cp);
  722. if (rv < 0) break;
  723. /* Update the stream buffer pointer and length */
  724. lst->lst_cp += slen;
  725. lst->lst_len -= slen;
  726. }
  727. }
  728. return ((rv < 0) ? rv : cv);
  729. }
  730. NSAPI_PUBLIC
  731. int lex_scan_to(LEXStream_t * lst, void * chtab, unsigned long cbits,
  732. void * token)
  733. {
  734. LEXClassTab_t * lct; /* character class table pointer */
  735. unsigned char * bp; /* bit vector pointer */
  736. char * cp; /* current pointer in stream buffer */
  737. unsigned long bitmask; /* class bit mask temporary */
  738. int cv = 0; /* current character value */
  739. int rv = 0; /* return value */
  740. int slen; /* token segment length */
  741. int done = 0; /* done indication */
  742. int i; /* loop index */
  743. lct = (LEXClassTab_t *)chtab;
  744. while (!done) {
  745. /* Go get more stream data if none left in the buffer */
  746. if (lst->lst_len <= 0) {
  747. rv = (*lst->lst_get)(lst);
  748. if (rv <= 0) {
  749. return rv;
  750. }
  751. }
  752. slen = 0;
  753. cp = lst->lst_cp;
  754. while (slen < lst->lst_len) {
  755. cv = *cp;
  756. bitmask = cbits;
  757. bp = lct->lct_bv + cv * lct->lct_bvbytes;
  758. for (i = 0; i < lct->lct_bvbytes; ++i) {
  759. if (*bp++ & bitmask) {
  760. done = 1;
  761. goto append_it;
  762. }
  763. bitmask >>= 8;
  764. }
  765. slen += 1;
  766. cp += 1;
  767. }
  768. append_it:
  769. /* If the current segment is not empty, append it to the token */
  770. if (slen > 0) {
  771. rv = lex_token_append(token, slen, lst->lst_cp);
  772. if (rv < 0) break;
  773. /* Update the stream buffer pointer and length */
  774. lst->lst_cp += slen;
  775. lst->lst_len -= slen;
  776. }
  777. }
  778. return ((rv < 0) ? rv : cv);
  779. }
  780. NSAPI_PUBLIC
  781. int lex_skip_over(LEXStream_t * lst, void * chtab, unsigned long cbits)
  782. {
  783. LEXClassTab_t * lct; /* character class table pointer */
  784. unsigned char * bp; /* bit vector pointer */
  785. char * cp; /* current pointer in stream buffer */
  786. unsigned long bitmask; /* class bit mask temporary */
  787. int rv = 0; /* return value */
  788. int slen; /* token segment length */
  789. int done = 0; /* done indication */
  790. int i; /* loop index */
  791. lct = (LEXClassTab_t *)chtab;
  792. while (!done) {
  793. /* Go get more stream data if none left in the buffer */
  794. if (lst->lst_len <= 0) {
  795. rv = (*lst->lst_get)(lst);
  796. if (rv <= 0) {
  797. return rv;
  798. }
  799. }
  800. slen = 0;
  801. cp = lst->lst_cp;
  802. while (slen < lst->lst_len) {
  803. rv = *cp;
  804. bitmask = cbits;
  805. bp = lct->lct_bv + rv * lct->lct_bvbytes;
  806. for (i = 0; i < lct->lct_bvbytes; ++i) {
  807. if (*bp++ & bitmask) goto next_ch;
  808. bitmask >>= 8;
  809. }
  810. done = 1;
  811. break;
  812. next_ch:
  813. slen += 1;
  814. cp += 1;
  815. }
  816. if (slen > 0) {
  817. /* Update the stream buffer pointer and length */
  818. lst->lst_cp += slen;
  819. lst->lst_len -= slen;
  820. }
  821. }
  822. return rv;
  823. }
  824. NSAPI_PUBLIC
  825. int lex_skip_to(LEXStream_t * lst, void * chtab, unsigned long cbits)
  826. {
  827. LEXClassTab_t * lct; /* character class table pointer */
  828. unsigned char * bp; /* bit vector pointer */
  829. char * cp; /* current pointer in stream buffer */
  830. unsigned long bitmask; /* class bit mask temporary */
  831. int rv; /* return value */
  832. int slen; /* token segment length */
  833. int done = 0; /* done indication */
  834. int i; /* loop index */
  835. lct = (LEXClassTab_t *)chtab;
  836. while (!done) {
  837. /* Go get more stream data if none left in the buffer */
  838. if (lst->lst_len <= 0) {
  839. rv = (*lst->lst_get)(lst);
  840. if (rv <= 0) {
  841. return rv;
  842. }
  843. }
  844. slen = 0;
  845. cp = lst->lst_cp;
  846. while (slen < lst->lst_len) {
  847. rv = *cp;
  848. bitmask = cbits;
  849. bp = lct->lct_bv + rv * lct->lct_bvbytes;
  850. for (i = 0; i < lct->lct_bvbytes; ++i) {
  851. if (*bp++ & bitmask) {
  852. done = 1;
  853. goto update_it;
  854. }
  855. bitmask >>= 8;
  856. }
  857. slen += 1;
  858. cp += 1;
  859. }
  860. update_it:
  861. /* Update the stream buffer pointer and length */
  862. if (slen > 0) {
  863. lst->lst_cp += slen;
  864. lst->lst_len -= slen;
  865. }
  866. }
  867. return rv;
  868. }