lexer.c 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. /*
  2. * Copyright (c) 2013 Hugh Bailey <[email protected]>
  3. *
  4. * Permission to use, copy, modify, and distribute this software for any
  5. * purpose with or without fee is hereby granted, provided that the above
  6. * copyright notice and this permission notice appear in all copies.
  7. *
  8. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  10. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  11. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  12. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  13. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  14. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  15. */
  16. #include <ctype.h>
  17. #include "lexer.h"
  18. static const char *astrblank = "";
  19. int strref_cmp(const struct strref *str1, const char *str2)
  20. {
  21. size_t i = 0;
  22. if (strref_is_empty(str1))
  23. return (!str2 || !*str2) ? 0 : -1;
  24. if (!str2)
  25. str2 = astrblank;
  26. do {
  27. char ch1, ch2;
  28. ch1 = (i < str1->len) ? str1->array[i] : 0;
  29. ch2 = *str2;
  30. if (ch1 < ch2)
  31. return -1;
  32. else if (ch1 > ch2)
  33. return 1;
  34. } while (i++ < str1->len && *str2++);
  35. return 0;
  36. }
  37. int strref_cmpi(const struct strref *str1, const char *str2)
  38. {
  39. size_t i = 0;
  40. if (strref_is_empty(str1))
  41. return (!str2 || !*str2) ? 0 : -1;
  42. if (!str2)
  43. str2 = astrblank;
  44. do {
  45. char ch1, ch2;
  46. ch1 = (i < str1->len) ? (char)toupper(str1->array[i]) : 0;
  47. ch2 = (char)toupper(*str2);
  48. if (ch1 < ch2)
  49. return -1;
  50. else if (ch1 > ch2)
  51. return 1;
  52. } while (i++ < str1->len && *str2++);
  53. return 0;
  54. }
  55. int strref_cmp_strref(const struct strref *str1, const struct strref *str2)
  56. {
  57. size_t i = 0;
  58. if (strref_is_empty(str1))
  59. return strref_is_empty(str2) ? 0 : -1;
  60. if (strref_is_empty(str2))
  61. return -1;
  62. do {
  63. char ch1, ch2;
  64. ch1 = (i < str1->len) ? str1->array[i] : 0;
  65. ch2 = (i < str2->len) ? str2->array[i] : 0;
  66. if (ch1 < ch2)
  67. return -1;
  68. else if (ch1 > ch2)
  69. return 1;
  70. i++;
  71. } while (i <= str1->len && i <= str2->len);
  72. return 0;
  73. }
  74. int strref_cmpi_strref(const struct strref *str1, const struct strref *str2)
  75. {
  76. size_t i = 0;
  77. if (strref_is_empty(str1))
  78. return strref_is_empty(str2) ? 0 : -1;
  79. if (strref_is_empty(str2))
  80. return -1;
  81. do {
  82. char ch1, ch2;
  83. ch1 = (i < str1->len) ? (char)toupper(str1->array[i]) : 0;
  84. ch2 = (i < str2->len) ? (char)toupper(str2->array[i]) : 0;
  85. if (ch1 < ch2)
  86. return -1;
  87. else if (ch1 > ch2)
  88. return 1;
  89. i++;
  90. } while (i <= str1->len && i <= str2->len);
  91. return 0;
  92. }
  93. /* ------------------------------------------------------------------------- */
  94. bool valid_int_str(const char *str, size_t n)
  95. {
  96. bool found_num = false;
  97. if (!str)
  98. return false;
  99. if (!*str)
  100. return false;
  101. if (!n)
  102. n = strlen(str);
  103. if (*str == '-' || *str == '+')
  104. ++str;
  105. do {
  106. if (*str > '9' || *str < '0')
  107. return false;
  108. found_num = true;
  109. } while (*++str && --n);
  110. return found_num;
  111. }
  112. bool valid_float_str(const char *str, size_t n)
  113. {
  114. bool found_num = false;
  115. bool found_exp = false;
  116. bool found_dec = false;
  117. if (!str)
  118. return false;
  119. if (!*str)
  120. return false;
  121. if (!n)
  122. n = strlen(str);
  123. if (*str == '-' || *str == '+')
  124. ++str;
  125. do {
  126. if (*str == '.') {
  127. if (found_dec || found_exp || !found_num)
  128. return false;
  129. found_dec = true;
  130. } else if (*str == 'e') {
  131. if (found_exp || !found_num)
  132. return false;
  133. found_exp = true;
  134. found_num = false;
  135. } else if (*str == '-' || *str == '+') {
  136. if (!found_exp || !found_num)
  137. return false;
  138. } else if (*str > '9' || *str < '0') {
  139. return false;
  140. } else {
  141. found_num = true;
  142. }
  143. } while (*++str && --n);
  144. return found_num;
  145. }
  146. /* ------------------------------------------------------------------------- */
  147. void error_data_add(struct error_data *data, const char *file, uint32_t row,
  148. uint32_t column, const char *msg, int level)
  149. {
  150. struct error_item item;
  151. if (!data)
  152. return;
  153. item.file = file;
  154. item.row = row;
  155. item.column = column;
  156. item.level = level;
  157. item.error = bstrdup(msg);
  158. da_push_back(data->errors, &item);
  159. }
  160. char *error_data_buildstring(struct error_data *ed)
  161. {
  162. struct dstr str;
  163. struct error_item *items = ed->errors.array;
  164. size_t i;
  165. dstr_init(&str);
  166. for (i = 0; i < ed->errors.num; i++) {
  167. struct error_item *item = items + i;
  168. dstr_catf(&str, "%s (%u, %u): %s\n", item->file, item->row,
  169. item->column, item->error);
  170. }
  171. return str.array;
  172. }
  173. /* ------------------------------------------------------------------------- */
  174. static inline enum base_token_type get_char_token_type(const char ch)
  175. {
  176. if (is_whitespace(ch))
  177. return BASETOKEN_WHITESPACE;
  178. else if (ch >= '0' && ch <= '9')
  179. return BASETOKEN_DIGIT;
  180. else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
  181. return BASETOKEN_ALPHA;
  182. return BASETOKEN_OTHER;
  183. }
  184. bool lexer_getbasetoken(struct lexer *lex, struct base_token *token,
  185. enum ignore_whitespace iws)
  186. {
  187. const char *offset = lex->offset;
  188. const char *token_start = NULL;
  189. enum base_token_type type = BASETOKEN_NONE;
  190. bool ignore_whitespace = (iws == IGNORE_WHITESPACE);
  191. if (!offset)
  192. return false;
  193. while (*offset != 0) {
  194. char ch = *(offset++);
  195. enum base_token_type new_type = get_char_token_type(ch);
  196. if (type == BASETOKEN_NONE) {
  197. if (new_type == BASETOKEN_WHITESPACE &&
  198. ignore_whitespace)
  199. continue;
  200. token_start = offset - 1;
  201. type = new_type;
  202. if (type != BASETOKEN_DIGIT &&
  203. type != BASETOKEN_ALPHA) {
  204. if (is_newline(ch) &&
  205. is_newline_pair(ch, *offset)) {
  206. offset++;
  207. }
  208. break;
  209. }
  210. } else if (type != new_type) {
  211. offset--;
  212. break;
  213. }
  214. }
  215. lex->offset = offset;
  216. if (token_start && offset > token_start) {
  217. strref_set(&token->text, token_start, offset - token_start);
  218. token->type = type;
  219. return true;
  220. }
  221. return false;
  222. }
  223. void lexer_getstroffset(const struct lexer *lex, const char *str, uint32_t *row,
  224. uint32_t *col)
  225. {
  226. uint32_t cur_col = 1, cur_row = 1;
  227. const char *text = lex->text;
  228. if (!str)
  229. return;
  230. while (text < str) {
  231. if (is_newline(*text)) {
  232. text += newline_size(text) - 1;
  233. cur_col = 1;
  234. cur_row++;
  235. } else {
  236. cur_col++;
  237. }
  238. text++;
  239. }
  240. *row = cur_row;
  241. *col = cur_col;
  242. }