1
0

lexer.h 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273
  1. /*
  2. * Copyright (c) 2023 Lain Bailey <[email protected]>
  3. *
  4. * Permission to use, copy, modify, and distribute this software for any
  5. * purpose with or without fee is hereby granted, provided that the above
  6. * copyright notice and this permission notice appear in all copies.
  7. *
  8. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  10. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  11. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  12. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  13. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  14. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  15. */
  16. #pragma once
  17. #include "c99defs.h"
  18. #include "dstr.h"
  19. #include "darray.h"
  20. #ifdef __cplusplus
  21. extern "C" {
  22. #endif
  23. /* ------------------------------------------------------------------------- */
  24. /* string reference (string segment within an already existing array) */
  25. struct strref {
  26. const char *array;
  27. size_t len;
  28. };
  29. static inline void strref_clear(struct strref *dst)
  30. {
  31. dst->array = NULL;
  32. dst->len = 0;
  33. }
  34. static inline void strref_set(struct strref *dst, const char *array, size_t len)
  35. {
  36. dst->array = array;
  37. dst->len = len;
  38. }
  39. static inline void strref_copy(struct strref *dst, const struct strref *src)
  40. {
  41. dst->array = src->array;
  42. dst->len = src->len;
  43. }
  44. static inline void strref_add(struct strref *dst, const struct strref *t)
  45. {
  46. if (!dst->array)
  47. strref_copy(dst, t);
  48. else
  49. dst->len += t->len;
  50. }
  51. static inline bool strref_is_empty(const struct strref *str)
  52. {
  53. return !str || !str->array || !str->len || !*str->array;
  54. }
  55. EXPORT int strref_cmp(const struct strref *str1, const char *str2);
  56. EXPORT int strref_cmpi(const struct strref *str1, const char *str2);
  57. EXPORT int strref_cmp_strref(const struct strref *str1, const struct strref *str2);
  58. EXPORT int strref_cmpi_strref(const struct strref *str1, const struct strref *str2);
  59. /* ------------------------------------------------------------------------- */
  60. EXPORT bool valid_int_str(const char *str, size_t n);
  61. EXPORT bool valid_float_str(const char *str, size_t n);
  62. static inline bool valid_int_strref(const struct strref *str)
  63. {
  64. return valid_int_str(str->array, str->len);
  65. }
  66. static inline bool valid_float_strref(const struct strref *str)
  67. {
  68. return valid_float_str(str->array, str->len);
  69. }
  70. static inline bool is_whitespace(char ch)
  71. {
  72. return ch == ' ' || ch == '\r' || ch == '\t' || ch == '\n';
  73. }
  74. static inline bool is_newline(char ch)
  75. {
  76. return ch == '\r' || ch == '\n';
  77. }
  78. static inline bool is_space_or_tab(const char ch)
  79. {
  80. return ch == ' ' || ch == '\t';
  81. }
  82. static inline bool is_newline_pair(char ch1, char ch2)
  83. {
  84. return (ch1 == '\r' && ch2 == '\n') || (ch1 == '\n' && ch2 == '\r');
  85. }
  86. static inline int newline_size(const char *array)
  87. {
  88. if (strncmp(array, "\r\n", 2) == 0 || strncmp(array, "\n\r", 2) == 0)
  89. return 2;
  90. else if (*array == '\r' || *array == '\n')
  91. return 1;
  92. return 0;
  93. }
  94. /* ------------------------------------------------------------------------- */
  95. /*
  96. * A "base" token is one of four things:
  97. * 1.) A sequence of alpha characters
  98. * 2.) A sequence of numeric characters
  99. * 3.) A single whitespace character if whitespace is not ignored
  100. * 4.) A single character that does not fall into the above 3 categories
  101. */
  102. enum base_token_type {
  103. BASETOKEN_NONE,
  104. BASETOKEN_ALPHA,
  105. BASETOKEN_DIGIT,
  106. BASETOKEN_WHITESPACE,
  107. BASETOKEN_OTHER,
  108. };
  109. struct base_token {
  110. struct strref text;
  111. enum base_token_type type;
  112. bool passed_whitespace;
  113. };
  114. static inline void base_token_clear(struct base_token *t)
  115. {
  116. memset(t, 0, sizeof(struct base_token));
  117. }
  118. static inline void base_token_copy(struct base_token *dst, struct base_token *src)
  119. {
  120. memcpy(dst, src, sizeof(struct base_token));
  121. }
  122. /* ------------------------------------------------------------------------- */
  123. #define LEX_ERROR 0
  124. #define LEX_WARNING 1
  125. struct error_item {
  126. char *error;
  127. const char *file;
  128. uint32_t row, column;
  129. int level;
  130. };
  131. static inline void error_item_init(struct error_item *ei)
  132. {
  133. memset(ei, 0, sizeof(struct error_item));
  134. }
  135. static inline void error_item_free(struct error_item *ei)
  136. {
  137. bfree(ei->error);
  138. error_item_init(ei);
  139. }
  140. static inline void error_item_array_free(struct error_item *array, size_t num)
  141. {
  142. size_t i;
  143. for (i = 0; i < num; i++)
  144. error_item_free(array + i);
  145. }
  146. /* ------------------------------------------------------------------------- */
  147. struct error_data {
  148. DARRAY(struct error_item) errors;
  149. };
  150. static inline void error_data_init(struct error_data *data)
  151. {
  152. da_init(data->errors);
  153. }
  154. static inline void error_data_free(struct error_data *data)
  155. {
  156. error_item_array_free(data->errors.array, data->errors.num);
  157. da_free(data->errors);
  158. }
  159. static inline const struct error_item *error_data_item(struct error_data *ed, size_t idx)
  160. {
  161. return ed->errors.array + idx;
  162. }
  163. EXPORT char *error_data_buildstring(struct error_data *ed);
  164. EXPORT void error_data_add(struct error_data *ed, const char *file, uint32_t row, uint32_t column, const char *msg,
  165. int level);
  166. static inline size_t error_data_type_count(struct error_data *ed, int type)
  167. {
  168. size_t count = 0, i;
  169. for (i = 0; i < ed->errors.num; i++) {
  170. if (ed->errors.array[i].level == type)
  171. count++;
  172. }
  173. return count;
  174. }
  175. static inline bool error_data_has_errors(struct error_data *ed)
  176. {
  177. size_t i;
  178. for (i = 0; i < ed->errors.num; i++)
  179. if (ed->errors.array[i].level == LEX_ERROR)
  180. return true;
  181. return false;
  182. }
  183. /* ------------------------------------------------------------------------- */
  184. struct lexer {
  185. char *text;
  186. const char *offset;
  187. };
  188. static inline void lexer_init(struct lexer *lex)
  189. {
  190. memset(lex, 0, sizeof(struct lexer));
  191. }
  192. static inline void lexer_free(struct lexer *lex)
  193. {
  194. bfree(lex->text);
  195. lexer_init(lex);
  196. }
  197. static inline void lexer_start(struct lexer *lex, const char *text)
  198. {
  199. lexer_free(lex);
  200. lex->text = bstrdup(text);
  201. lex->offset = lex->text;
  202. }
  203. static inline void lexer_start_move(struct lexer *lex, char *text)
  204. {
  205. lexer_free(lex);
  206. lex->text = text;
  207. lex->offset = lex->text;
  208. }
  209. static inline void lexer_reset(struct lexer *lex)
  210. {
  211. lex->offset = lex->text;
  212. }
  213. enum ignore_whitespace { PARSE_WHITESPACE, IGNORE_WHITESPACE };
  214. EXPORT bool lexer_getbasetoken(struct lexer *lex, struct base_token *t, enum ignore_whitespace iws);
  215. EXPORT void lexer_getstroffset(const struct lexer *lex, const char *str, uint32_t *row, uint32_t *col);
  216. #ifdef __cplusplus
  217. }
  218. #endif