cf-lexer.c 31 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264
  1. /*
  2. * Copyright (c) 2023 Lain Bailey <[email protected]>
  3. *
  4. * Permission to use, copy, modify, and distribute this software for any
  5. * purpose with or without fee is hereby granted, provided that the above
  6. * copyright notice and this permission notice appear in all copies.
  7. *
  8. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  10. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  11. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  12. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  13. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  14. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  15. */
  16. #include <ctype.h>
  17. #include <stdio.h>
  18. #include "platform.h"
  19. #include "cf-lexer.h"
  20. static inline void cf_convert_from_escape_literal(char **p_dst, const char **p_src)
  21. {
  22. char *dst = *p_dst;
  23. const char *src = *p_src;
  24. switch (*(src++)) {
  25. case '\'':
  26. *(dst++) = '\'';
  27. break;
  28. case '\"':
  29. *(dst++) = '\"';
  30. break;
  31. case '\?':
  32. *(dst++) = '\?';
  33. break;
  34. case '\\':
  35. *(dst++) = '\\';
  36. break;
  37. case '0':
  38. *(dst++) = '\0';
  39. break;
  40. case 'a':
  41. *(dst++) = '\a';
  42. break;
  43. case 'b':
  44. *(dst++) = '\b';
  45. break;
  46. case 'f':
  47. *(dst++) = '\f';
  48. break;
  49. case 'n':
  50. *(dst++) = '\n';
  51. break;
  52. case 'r':
  53. *(dst++) = '\r';
  54. break;
  55. case 't':
  56. *(dst++) = '\t';
  57. break;
  58. case 'v':
  59. *(dst++) = '\v';
  60. break;
  61. /* hex */
  62. case 'X':
  63. case 'x':
  64. *(dst++) = (char)strtoul(src, NULL, 16);
  65. src += 2;
  66. break;
  67. /* oct */
  68. default:
  69. if (isdigit(*src)) {
  70. *(dst++) = (char)strtoul(src, NULL, 8);
  71. src += 3;
  72. }
  73. /* case 'u':
  74. case 'U': */
  75. }
  76. *p_dst = dst;
  77. *p_src = src;
  78. }
  79. char *cf_literal_to_str(const char *literal, size_t count)
  80. {
  81. const char *temp_src;
  82. char *str, *temp_dst;
  83. if (!count)
  84. count = strlen(literal);
  85. if (count < 2)
  86. return NULL;
  87. if (literal[0] != literal[count - 1])
  88. return NULL;
  89. if (literal[0] != '\"' && literal[0] != '\'')
  90. return NULL;
  91. /* strip leading and trailing quote characters */
  92. str = bzalloc(--count);
  93. temp_src = literal + 1;
  94. temp_dst = str;
  95. while (*temp_src && --count > 0) {
  96. if (*temp_src == '\\') {
  97. temp_src++;
  98. cf_convert_from_escape_literal(&temp_dst, &temp_src);
  99. } else {
  100. *(temp_dst++) = *(temp_src++);
  101. }
  102. }
  103. *temp_dst = 0;
  104. return str;
  105. }
  106. static bool cf_is_token_break(struct base_token *start_token, const struct base_token *token)
  107. {
  108. switch (start_token->type) {
  109. case BASETOKEN_ALPHA:
  110. if (token->type == BASETOKEN_OTHER || token->type == BASETOKEN_WHITESPACE)
  111. return true;
  112. break;
  113. case BASETOKEN_DIGIT:
  114. if (token->type == BASETOKEN_WHITESPACE ||
  115. (token->type == BASETOKEN_OTHER && *token->text.array != '.'))
  116. return true;
  117. break;
  118. case BASETOKEN_WHITESPACE:
  119. /* lump all non-newline whitespace together when possible */
  120. if (is_space_or_tab(*start_token->text.array) && is_space_or_tab(*token->text.array))
  121. break;
  122. return true;
  123. case BASETOKEN_OTHER:
  124. if (*start_token->text.array == '.' && token->type == BASETOKEN_DIGIT) {
  125. start_token->type = BASETOKEN_DIGIT;
  126. break;
  127. }
  128. /* Falls through. */
  129. case BASETOKEN_NONE:
  130. return true;
  131. }
  132. return false;
  133. }
  134. static inline bool cf_is_splice(const char *array)
  135. {
  136. return (*array == '\\' && is_newline(array[1]));
  137. }
  138. static inline void cf_pass_any_splices(const char **parray)
  139. {
  140. while (cf_is_splice(*parray))
  141. *parray += 1 + newline_size((*parray) + 1);
  142. }
  143. static inline bool cf_is_comment(const char *array)
  144. {
  145. const char *offset = array;
  146. if (*offset++ == '/') {
  147. cf_pass_any_splices(&offset);
  148. return (*offset == '*' || *offset == '/');
  149. }
  150. return false;
  151. }
  152. static bool cf_lexer_process_comment(struct cf_lexer *lex, struct cf_token *out_token)
  153. {
  154. const char *offset;
  155. if (!cf_is_comment(out_token->unmerged_str.array))
  156. return false;
  157. offset = lex->base_lexer.offset;
  158. cf_pass_any_splices(&offset);
  159. strcpy(lex->write_offset++, " ");
  160. out_token->str.len = 1;
  161. if (*offset == '/') {
  162. while (*++offset && !is_newline(*offset))
  163. cf_pass_any_splices(&offset);
  164. } else if (*offset == '*') {
  165. bool was_star = false;
  166. lex->unexpected_eof = true;
  167. while (*++offset) {
  168. cf_pass_any_splices(&offset);
  169. if (was_star && *offset == '/') {
  170. offset++;
  171. lex->unexpected_eof = false;
  172. break;
  173. } else {
  174. was_star = (*offset == '*');
  175. }
  176. }
  177. }
  178. out_token->unmerged_str.len += (size_t)(offset - out_token->unmerged_str.array);
  179. out_token->type = CFTOKEN_SPACETAB;
  180. lex->base_lexer.offset = offset;
  181. return true;
  182. }
  183. static inline void cf_lexer_write_strref(struct cf_lexer *lex, const struct strref *ref)
  184. {
  185. strncpy(lex->write_offset, ref->array, ref->len);
  186. lex->write_offset[ref->len] = 0;
  187. lex->write_offset += ref->len;
  188. }
  189. static bool cf_lexer_is_include(struct cf_lexer *lex)
  190. {
  191. bool found_include_import = false;
  192. bool found_preprocessor = false;
  193. size_t i;
  194. for (i = lex->tokens.num; i > 0; i--) {
  195. struct cf_token *token = lex->tokens.array + (i - 1);
  196. if (is_space_or_tab(*token->str.array))
  197. continue;
  198. if (!found_include_import) {
  199. if (strref_cmp(&token->str, "include") != 0 && strref_cmp(&token->str, "import") != 0)
  200. break;
  201. found_include_import = true;
  202. } else if (!found_preprocessor) {
  203. if (*token->str.array != '#')
  204. break;
  205. found_preprocessor = true;
  206. } else {
  207. return is_newline(*token->str.array);
  208. }
  209. }
  210. /* if starting line */
  211. return found_preprocessor && found_include_import;
  212. }
  213. static void cf_lexer_getstrtoken(struct cf_lexer *lex, struct cf_token *out_token, char delimiter,
  214. bool allow_escaped_delimiters)
  215. {
  216. const char *offset = lex->base_lexer.offset;
  217. bool escaped = false;
  218. out_token->unmerged_str.len++;
  219. out_token->str.len++;
  220. cf_lexer_write_strref(lex, &out_token->unmerged_str);
  221. while (*offset) {
  222. cf_pass_any_splices(&offset);
  223. if (*offset == delimiter) {
  224. if (!escaped) {
  225. *lex->write_offset++ = *offset;
  226. out_token->str.len++;
  227. offset++;
  228. break;
  229. }
  230. } else if (is_newline(*offset)) {
  231. break;
  232. }
  233. *lex->write_offset++ = *offset;
  234. out_token->str.len++;
  235. escaped = (allow_escaped_delimiters && *offset == '\\');
  236. offset++;
  237. }
  238. *lex->write_offset = 0;
  239. out_token->unmerged_str.len += (size_t)(offset - out_token->unmerged_str.array);
  240. out_token->type = CFTOKEN_STRING;
  241. lex->base_lexer.offset = offset;
  242. }
  243. static bool cf_lexer_process_string(struct cf_lexer *lex, struct cf_token *out_token)
  244. {
  245. char ch = *out_token->unmerged_str.array;
  246. if (ch == '<' && cf_lexer_is_include(lex)) {
  247. cf_lexer_getstrtoken(lex, out_token, '>', false);
  248. return true;
  249. } else if (ch == '"' || ch == '\'') {
  250. cf_lexer_getstrtoken(lex, out_token, ch, !cf_lexer_is_include(lex));
  251. return true;
  252. }
  253. return false;
  254. }
  255. static inline enum cf_token_type cf_get_token_type(const struct cf_token *token, const struct base_token *start_token)
  256. {
  257. switch (start_token->type) {
  258. case BASETOKEN_ALPHA:
  259. return CFTOKEN_NAME;
  260. case BASETOKEN_DIGIT:
  261. return CFTOKEN_NUM;
  262. case BASETOKEN_WHITESPACE:
  263. if (is_newline(*token->str.array))
  264. return CFTOKEN_NEWLINE;
  265. else
  266. return CFTOKEN_SPACETAB;
  267. case BASETOKEN_NONE:
  268. case BASETOKEN_OTHER:
  269. break;
  270. }
  271. return CFTOKEN_OTHER;
  272. }
  273. static bool cf_lexer_nexttoken(struct cf_lexer *lex, struct cf_token *out_token)
  274. {
  275. struct base_token token, start_token;
  276. bool wrote_data = false;
  277. base_token_clear(&token);
  278. base_token_clear(&start_token);
  279. cf_token_clear(out_token);
  280. while (lexer_getbasetoken(&lex->base_lexer, &token, PARSE_WHITESPACE)) {
  281. /* reclassify underscore as alpha for alnum tokens */
  282. if (*token.text.array == '_')
  283. token.type = BASETOKEN_ALPHA;
  284. /* ignore escaped newlines to merge spliced lines */
  285. if (cf_is_splice(token.text.array)) {
  286. lex->base_lexer.offset += newline_size(token.text.array + 1);
  287. continue;
  288. }
  289. if (!wrote_data) {
  290. out_token->unmerged_str.array = token.text.array;
  291. out_token->str.array = lex->write_offset;
  292. /* if comment then output a space */
  293. if (cf_lexer_process_comment(lex, out_token))
  294. return true;
  295. /* process string tokens if any */
  296. if (cf_lexer_process_string(lex, out_token))
  297. return true;
  298. base_token_copy(&start_token, &token);
  299. wrote_data = true;
  300. } else if (cf_is_token_break(&start_token, &token)) {
  301. lex->base_lexer.offset -= token.text.len;
  302. break;
  303. }
  304. /* write token to CF lexer to account for splicing/comments */
  305. cf_lexer_write_strref(lex, &token.text);
  306. out_token->str.len += token.text.len;
  307. }
  308. if (wrote_data) {
  309. out_token->unmerged_str.len = (size_t)(lex->base_lexer.offset - out_token->unmerged_str.array);
  310. out_token->type = cf_get_token_type(out_token, &start_token);
  311. }
  312. return wrote_data;
  313. }
  314. void cf_lexer_init(struct cf_lexer *lex)
  315. {
  316. lexer_init(&lex->base_lexer);
  317. da_init(lex->tokens);
  318. lex->file = NULL;
  319. lex->reformatted = NULL;
  320. lex->write_offset = NULL;
  321. lex->unexpected_eof = false;
  322. }
  323. void cf_lexer_free(struct cf_lexer *lex)
  324. {
  325. bfree(lex->file);
  326. bfree(lex->reformatted);
  327. lexer_free(&lex->base_lexer);
  328. da_free(lex->tokens);
  329. lex->file = NULL;
  330. lex->reformatted = NULL;
  331. lex->write_offset = NULL;
  332. lex->unexpected_eof = false;
  333. }
  334. bool cf_lexer_lex(struct cf_lexer *lex, const char *str, const char *file)
  335. {
  336. struct cf_token token;
  337. struct cf_token *last_token = NULL;
  338. cf_lexer_free(lex);
  339. if (!str || !*str)
  340. return false;
  341. if (file)
  342. lex->file = bstrdup(file);
  343. lexer_start(&lex->base_lexer, str);
  344. cf_token_clear(&token);
  345. lex->reformatted = bmalloc(strlen(str) + 1);
  346. lex->reformatted[0] = 0;
  347. lex->write_offset = lex->reformatted;
  348. while (cf_lexer_nexttoken(lex, &token)) {
  349. if (last_token && is_space_or_tab(*last_token->str.array) && is_space_or_tab(*token.str.array)) {
  350. cf_token_add(last_token, &token);
  351. continue;
  352. }
  353. token.lex = lex;
  354. last_token = da_push_back_new(lex->tokens);
  355. memcpy(last_token, &token, sizeof(struct cf_token));
  356. }
  357. cf_token_clear(&token);
  358. token.str.array = lex->write_offset;
  359. token.unmerged_str.array = lex->base_lexer.offset;
  360. token.lex = lex;
  361. da_push_back(lex->tokens, &token);
  362. return !lex->unexpected_eof;
  363. }
  364. /* ------------------------------------------------------------------------- */
  365. struct macro_param {
  366. struct cf_token name;
  367. cf_token_array_t tokens;
  368. };
  369. static inline void macro_param_init(struct macro_param *param)
  370. {
  371. cf_token_clear(&param->name);
  372. da_init(param->tokens);
  373. }
  374. static inline void macro_param_free(struct macro_param *param)
  375. {
  376. cf_token_clear(&param->name);
  377. da_free(param->tokens);
  378. }
  379. /* ------------------------------------------------------------------------- */
  380. struct macro_params {
  381. DARRAY(struct macro_param) params;
  382. };
  383. static inline void macro_params_init(struct macro_params *params)
  384. {
  385. da_init(params->params);
  386. }
  387. static inline void macro_params_free(struct macro_params *params)
  388. {
  389. size_t i;
  390. for (i = 0; i < params->params.num; i++)
  391. macro_param_free(params->params.array + i);
  392. da_free(params->params);
  393. }
  394. static inline struct macro_param *get_macro_param(const struct macro_params *params, const struct strref *name)
  395. {
  396. size_t i;
  397. if (!params)
  398. return NULL;
  399. for (i = 0; i < params->params.num; i++) {
  400. struct macro_param *param = params->params.array + i;
  401. if (strref_cmp_strref(&param->name.str, name) == 0)
  402. return param;
  403. }
  404. return NULL;
  405. }
  406. /* ------------------------------------------------------------------------- */
  407. static bool cf_preprocessor(struct cf_preprocessor *pp, bool if_block, struct cf_token **p_cur_token);
  408. static void cf_preprocess_tokens(struct cf_preprocessor *pp, bool if_block, struct cf_token **p_cur_token);
  409. static inline bool go_to_newline(struct cf_token **p_cur_token)
  410. {
  411. struct cf_token *cur_token = *p_cur_token;
  412. while (cur_token->type != CFTOKEN_NEWLINE && cur_token->type != CFTOKEN_NONE)
  413. cur_token++;
  414. *p_cur_token = cur_token;
  415. return cur_token->type != CFTOKEN_NONE;
  416. }
  417. static inline bool next_token(struct cf_token **p_cur_token, bool preprocessor)
  418. {
  419. struct cf_token *cur_token = *p_cur_token;
  420. if (cur_token->type != CFTOKEN_NONE)
  421. cur_token++;
  422. /* if preprocessor, stop at newline */
  423. while (cur_token->type == CFTOKEN_SPACETAB && (preprocessor || cur_token->type == CFTOKEN_NEWLINE))
  424. cur_token++;
  425. *p_cur_token = cur_token;
  426. return cur_token->type != CFTOKEN_NONE;
  427. }
  428. static inline void cf_gettokenoffset(struct cf_preprocessor *pp, const struct cf_token *token, uint32_t *row,
  429. uint32_t *col)
  430. {
  431. lexer_getstroffset(&pp->lex->base_lexer, token->unmerged_str.array, row, col);
  432. }
  433. static void cf_addew(struct cf_preprocessor *pp, const struct cf_token *token, const char *message, int error_level,
  434. const char *val1, const char *val2, const char *val3)
  435. {
  436. uint32_t row, col;
  437. cf_gettokenoffset(pp, token, &row, &col);
  438. if (!val1 && !val2 && !val3) {
  439. error_data_add(pp->ed, token->lex->file, row, col, message, error_level);
  440. } else {
  441. struct dstr formatted;
  442. dstr_init(&formatted);
  443. dstr_safe_printf(&formatted, message, val1, val2, val3, NULL);
  444. error_data_add(pp->ed, token->lex->file, row, col, formatted.array, error_level);
  445. dstr_free(&formatted);
  446. }
  447. }
  448. static inline void cf_adderror(struct cf_preprocessor *pp, const struct cf_token *token, const char *error,
  449. const char *val1, const char *val2, const char *val3)
  450. {
  451. cf_addew(pp, token, error, LEX_ERROR, val1, val2, val3);
  452. }
  453. static inline void cf_addwarning(struct cf_preprocessor *pp, const struct cf_token *token, const char *warning,
  454. const char *val1, const char *val2, const char *val3)
  455. {
  456. cf_addew(pp, token, warning, LEX_WARNING, val1, val2, val3);
  457. }
  458. static inline void cf_adderror_expecting(struct cf_preprocessor *pp, const struct cf_token *token,
  459. const char *expecting)
  460. {
  461. cf_adderror(pp, token, "Expected $1", expecting, NULL, NULL);
  462. }
  463. static inline void cf_adderror_expected_newline(struct cf_preprocessor *pp, const struct cf_token *token)
  464. {
  465. cf_adderror(pp, token,
  466. "Unexpected token after preprocessor, expected "
  467. "newline",
  468. NULL, NULL, NULL);
  469. }
  470. static inline void cf_adderror_unexpected_endif_eof(struct cf_preprocessor *pp, const struct cf_token *token)
  471. {
  472. cf_adderror(pp, token, "Unexpected end of file before #endif", NULL, NULL, NULL);
  473. }
  474. static inline void cf_adderror_unexpected_eof(struct cf_preprocessor *pp, const struct cf_token *token)
  475. {
  476. cf_adderror(pp, token, "Unexpected end of file", NULL, NULL, NULL);
  477. }
  478. static inline void insert_path(struct cf_preprocessor *pp, struct dstr *str_file)
  479. {
  480. const char *file;
  481. const char *slash;
  482. if (pp && pp->lex && pp->lex->file) {
  483. file = pp->lex->file;
  484. slash = strrchr(file, '/');
  485. if (slash) {
  486. struct dstr path = {0};
  487. dstr_ncopy(&path, file, slash - file + 1);
  488. dstr_insert_dstr(str_file, 0, &path);
  489. dstr_free(&path);
  490. }
  491. }
  492. }
  493. static void cf_include_file(struct cf_preprocessor *pp, const struct cf_token *file_token)
  494. {
  495. struct cf_lexer new_lex;
  496. struct dstr str_file;
  497. FILE *file;
  498. char *file_data;
  499. struct cf_token *tokens;
  500. size_t i;
  501. dstr_init(&str_file);
  502. dstr_copy_strref(&str_file, &file_token->str);
  503. dstr_mid(&str_file, &str_file, 1, str_file.len - 2);
  504. insert_path(pp, &str_file);
  505. /* if dependency already exists, run preprocessor on it */
  506. for (i = 0; i < pp->dependencies.num; i++) {
  507. struct cf_lexer *dep = pp->dependencies.array + i;
  508. if (strcmp(dep->file, str_file.array) == 0) {
  509. tokens = cf_lexer_get_tokens(dep);
  510. cf_preprocess_tokens(pp, false, &tokens);
  511. goto exit;
  512. }
  513. }
  514. file = os_fopen(str_file.array, "rb");
  515. if (!file) {
  516. cf_adderror(pp, file_token, "Could not open file '$1'", file_token->str.array, NULL, NULL);
  517. goto exit;
  518. }
  519. os_fread_utf8(file, &file_data);
  520. fclose(file);
  521. cf_lexer_init(&new_lex);
  522. cf_lexer_lex(&new_lex, file_data, str_file.array);
  523. tokens = cf_lexer_get_tokens(&new_lex);
  524. cf_preprocess_tokens(pp, false, &tokens);
  525. bfree(file_data);
  526. da_push_back(pp->dependencies, &new_lex);
  527. exit:
  528. dstr_free(&str_file);
  529. }
  530. static inline bool is_sys_include(struct strref *ref)
  531. {
  532. return ref->len >= 2 && ref->array[0] == '<' && ref->array[ref->len - 1] == '>';
  533. }
  534. static inline bool is_loc_include(struct strref *ref)
  535. {
  536. return ref->len >= 2 && ref->array[0] == '"' && ref->array[ref->len - 1] == '"';
  537. }
  538. static void cf_preprocess_include(struct cf_preprocessor *pp, struct cf_token **p_cur_token)
  539. {
  540. struct cf_token *cur_token = *p_cur_token;
  541. if (pp->ignore_state) {
  542. go_to_newline(p_cur_token);
  543. return;
  544. }
  545. next_token(&cur_token, true);
  546. if (cur_token->type != CFTOKEN_STRING) {
  547. cf_adderror_expecting(pp, cur_token, "string");
  548. go_to_newline(&cur_token);
  549. goto exit;
  550. }
  551. if (is_sys_include(&cur_token->str)) {
  552. /* TODO */
  553. } else if (is_loc_include(&cur_token->str)) {
  554. if (!pp->ignore_state)
  555. cf_include_file(pp, cur_token);
  556. } else {
  557. cf_adderror(pp, cur_token, "Invalid or incomplete string", NULL, NULL, NULL);
  558. go_to_newline(&cur_token);
  559. goto exit;
  560. }
  561. cur_token++;
  562. exit:
  563. *p_cur_token = cur_token;
  564. }
  565. static bool cf_preprocess_macro_params(struct cf_preprocessor *pp, struct cf_def *def, struct cf_token **p_cur_token)
  566. {
  567. struct cf_token *cur_token = *p_cur_token;
  568. bool success = false;
  569. def->macro = true;
  570. do {
  571. next_token(&cur_token, true);
  572. if (cur_token->type != CFTOKEN_NAME) {
  573. cf_adderror_expecting(pp, cur_token, "identifier");
  574. go_to_newline(&cur_token);
  575. goto exit;
  576. }
  577. cf_def_addparam(def, cur_token);
  578. next_token(&cur_token, true);
  579. if (cur_token->type != CFTOKEN_OTHER ||
  580. (*cur_token->str.array != ',' && *cur_token->str.array != ')')) {
  581. cf_adderror_expecting(pp, cur_token, "',' or ')'");
  582. go_to_newline(&cur_token);
  583. goto exit;
  584. }
  585. } while (*cur_token->str.array != ')');
  586. /* ended properly, now go to first define token (or newline) */
  587. next_token(&cur_token, true);
  588. success = true;
  589. exit:
  590. *p_cur_token = cur_token;
  591. return success;
  592. }
  593. #define INVALID_INDEX ((size_t)-1)
  594. static inline size_t cf_preprocess_get_def_idx(struct cf_preprocessor *pp, const struct strref *def_name)
  595. {
  596. struct cf_def *array = pp->defines.array;
  597. size_t i;
  598. for (i = 0; i < pp->defines.num; i++) {
  599. struct cf_def *cur_def = array + i;
  600. if (strref_cmp_strref(&cur_def->name.str, def_name) == 0)
  601. return i;
  602. }
  603. return INVALID_INDEX;
  604. }
  605. static inline struct cf_def *cf_preprocess_get_def(struct cf_preprocessor *pp, const struct strref *def_name)
  606. {
  607. size_t idx = cf_preprocess_get_def_idx(pp, def_name);
  608. if (idx == INVALID_INDEX)
  609. return NULL;
  610. return pp->defines.array + idx;
  611. }
  612. static char space_filler[2] = " ";
  613. static inline void append_space(struct cf_preprocessor *pp, cf_token_array_t *tokens, const struct cf_token *base)
  614. {
  615. struct cf_token token;
  616. strref_set(&token.str, space_filler, 1);
  617. token.type = CFTOKEN_SPACETAB;
  618. if (base) {
  619. token.lex = base->lex;
  620. strref_copy(&token.unmerged_str, &base->unmerged_str);
  621. } else {
  622. token.lex = pp->lex;
  623. strref_copy(&token.unmerged_str, &token.str);
  624. }
  625. da_push_back(*tokens, &token);
  626. }
  627. static inline void append_end_token(cf_token_array_t *tokens)
  628. {
  629. struct cf_token end;
  630. cf_token_clear(&end);
  631. da_push_back(*tokens, &end);
  632. }
  633. static void cf_preprocess_define(struct cf_preprocessor *pp, struct cf_token **p_cur_token)
  634. {
  635. struct cf_token *cur_token = *p_cur_token;
  636. struct cf_def def;
  637. if (pp->ignore_state) {
  638. go_to_newline(p_cur_token);
  639. return;
  640. }
  641. cf_def_init(&def);
  642. next_token(&cur_token, true);
  643. if (cur_token->type != CFTOKEN_NAME) {
  644. cf_adderror_expecting(pp, cur_token, "identifier");
  645. go_to_newline(&cur_token);
  646. goto exit;
  647. }
  648. append_space(pp, &def.tokens, NULL);
  649. cf_token_copy(&def.name, cur_token);
  650. if (!next_token(&cur_token, true))
  651. goto complete;
  652. /* process macro */
  653. if (*cur_token->str.array == '(') {
  654. if (!cf_preprocess_macro_params(pp, &def, &cur_token))
  655. goto error;
  656. }
  657. while (cur_token->type != CFTOKEN_NEWLINE && cur_token->type != CFTOKEN_NONE)
  658. cf_def_addtoken(&def, cur_token++);
  659. complete:
  660. append_end_token(&def.tokens);
  661. append_space(pp, &def.tokens, NULL);
  662. da_push_back(pp->defines, &def);
  663. goto exit;
  664. error:
  665. cf_def_free(&def);
  666. exit:
  667. *p_cur_token = cur_token;
  668. }
  669. static inline void cf_preprocess_remove_def_strref(struct cf_preprocessor *pp, const struct strref *ref)
  670. {
  671. size_t def_idx = cf_preprocess_get_def_idx(pp, ref);
  672. if (def_idx != INVALID_INDEX) {
  673. struct cf_def *array = pp->defines.array;
  674. cf_def_free(array + def_idx);
  675. da_erase(pp->defines, def_idx);
  676. }
  677. }
  678. static void cf_preprocess_undef(struct cf_preprocessor *pp, struct cf_token **p_cur_token)
  679. {
  680. struct cf_token *cur_token = *p_cur_token;
  681. if (pp->ignore_state) {
  682. go_to_newline(p_cur_token);
  683. return;
  684. }
  685. next_token(&cur_token, true);
  686. if (cur_token->type != CFTOKEN_NAME) {
  687. cf_adderror_expecting(pp, cur_token, "identifier");
  688. go_to_newline(&cur_token);
  689. goto exit;
  690. }
  691. cf_preprocess_remove_def_strref(pp, &cur_token->str);
  692. cur_token++;
  693. exit:
  694. *p_cur_token = cur_token;
  695. }
  696. /* Processes an #ifdef/#ifndef/#if/#else/#elif sub block recursively */
  697. static inline bool cf_preprocess_subblock(struct cf_preprocessor *pp, bool ignore, struct cf_token **p_cur_token)
  698. {
  699. bool eof;
  700. if (!next_token(p_cur_token, true))
  701. return false;
  702. if (!pp->ignore_state) {
  703. pp->ignore_state = ignore;
  704. cf_preprocess_tokens(pp, true, p_cur_token);
  705. pp->ignore_state = false;
  706. } else {
  707. cf_preprocess_tokens(pp, true, p_cur_token);
  708. }
  709. eof = ((*p_cur_token)->type == CFTOKEN_NONE);
  710. if (eof)
  711. cf_adderror_unexpected_endif_eof(pp, *p_cur_token);
  712. return !eof;
  713. }
  714. static void cf_preprocess_ifdef(struct cf_preprocessor *pp, bool ifnot, struct cf_token **p_cur_token)
  715. {
  716. struct cf_token *cur_token = *p_cur_token;
  717. struct cf_def *def;
  718. bool is_true;
  719. next_token(&cur_token, true);
  720. if (cur_token->type != CFTOKEN_NAME) {
  721. cf_adderror_expecting(pp, cur_token, "identifier");
  722. go_to_newline(&cur_token);
  723. goto exit;
  724. }
  725. def = cf_preprocess_get_def(pp, &cur_token->str);
  726. is_true = (def == NULL) == ifnot;
  727. if (!cf_preprocess_subblock(pp, !is_true, &cur_token))
  728. goto exit;
  729. if (strref_cmp(&cur_token->str, "else") == 0) {
  730. if (!cf_preprocess_subblock(pp, is_true, &cur_token))
  731. goto exit;
  732. /*} else if (strref_cmp(&cur_token->str, "elif") == 0) {*/
  733. }
  734. cur_token++;
  735. exit:
  736. *p_cur_token = cur_token;
  737. }
  738. static bool cf_preprocessor(struct cf_preprocessor *pp, bool if_block, struct cf_token **p_cur_token)
  739. {
  740. struct cf_token *cur_token = *p_cur_token;
  741. if (strref_cmp(&cur_token->str, "include") == 0) {
  742. cf_preprocess_include(pp, p_cur_token);
  743. } else if (strref_cmp(&cur_token->str, "define") == 0) {
  744. cf_preprocess_define(pp, p_cur_token);
  745. } else if (strref_cmp(&cur_token->str, "undef") == 0) {
  746. cf_preprocess_undef(pp, p_cur_token);
  747. } else if (strref_cmp(&cur_token->str, "ifdef") == 0) {
  748. cf_preprocess_ifdef(pp, false, p_cur_token);
  749. } else if (strref_cmp(&cur_token->str, "ifndef") == 0) {
  750. cf_preprocess_ifdef(pp, true, p_cur_token);
  751. /*} else if (strref_cmp(&cur_token->str, "if") == 0) {
  752. TODO;*/
  753. } else if (strref_cmp(&cur_token->str, "else") == 0 ||
  754. /*strref_cmp(&cur_token->str, "elif") == 0 ||*/
  755. strref_cmp(&cur_token->str, "endif") == 0) {
  756. if (!if_block) {
  757. struct dstr name;
  758. dstr_init_copy_strref(&name, &cur_token->str);
  759. cf_adderror(pp, cur_token,
  760. "#$1 outside of "
  761. "#if/#ifdef/#ifndef block",
  762. name.array, NULL, NULL);
  763. dstr_free(&name);
  764. (*p_cur_token)++;
  765. return true;
  766. }
  767. return false;
  768. } else if (cur_token->type != CFTOKEN_NEWLINE && cur_token->type != CFTOKEN_NONE) {
  769. /*
  770. * TODO: language-specific preprocessor stuff should be sent to
  771. * handler of some sort
  772. */
  773. (*p_cur_token)++;
  774. }
  775. return true;
  776. }
  777. static void cf_preprocess_addtoken(struct cf_preprocessor *pp, cf_token_array_t *dst, struct cf_token **p_cur_token,
  778. const struct cf_token *base, const struct macro_params *params);
  779. /*
  780. * collects tokens for a macro parameter
  781. *
  782. * note that it is important to make sure that any usage of function calls
  783. * within a macro parameter is preserved, example MACRO(func(1, 2), 3), do not
  784. * let it stop on the comma at "1,"
  785. */
  786. static void cf_preprocess_save_macro_param(struct cf_preprocessor *pp, struct cf_token **p_cur_token,
  787. struct macro_param *param, const struct cf_token *base,
  788. const struct macro_params *cur_params)
  789. {
  790. struct cf_token *cur_token = *p_cur_token;
  791. int brace_count = 0;
  792. append_space(pp, &param->tokens, base);
  793. while (cur_token->type != CFTOKEN_NONE) {
  794. if (*cur_token->str.array == '(') {
  795. brace_count++;
  796. } else if (*cur_token->str.array == ')') {
  797. if (brace_count)
  798. brace_count--;
  799. else
  800. break;
  801. } else if (*cur_token->str.array == ',') {
  802. if (!brace_count)
  803. break;
  804. }
  805. cf_preprocess_addtoken(pp, &param->tokens, &cur_token, base, cur_params);
  806. }
  807. if (cur_token->type == CFTOKEN_NONE)
  808. cf_adderror_unexpected_eof(pp, cur_token);
  809. append_space(pp, &param->tokens, base);
  810. append_end_token(&param->tokens);
  811. *p_cur_token = cur_token;
  812. }
  813. static inline bool param_is_whitespace(const struct macro_param *param)
  814. {
  815. struct cf_token *array = param->tokens.array;
  816. size_t i;
  817. for (i = 0; i < param->tokens.num; i++)
  818. if (array[i].type != CFTOKEN_NONE && array[i].type != CFTOKEN_SPACETAB &&
  819. array[i].type != CFTOKEN_NEWLINE)
  820. return false;
  821. return true;
  822. }
  823. /* collects parameter tokens of a used macro and stores them for the unwrap */
  824. static void cf_preprocess_save_macro_params(struct cf_preprocessor *pp, struct cf_token **p_cur_token,
  825. const struct cf_def *def, const struct cf_token *base,
  826. const struct macro_params *cur_params, struct macro_params *dst)
  827. {
  828. struct cf_token *cur_token = *p_cur_token;
  829. size_t count = 0;
  830. next_token(&cur_token, false);
  831. if (cur_token->type != CFTOKEN_OTHER || *cur_token->str.array != '(') {
  832. cf_adderror_expecting(pp, cur_token, "'('");
  833. goto exit;
  834. }
  835. do {
  836. struct macro_param param;
  837. macro_param_init(&param);
  838. cur_token++;
  839. count++;
  840. cf_preprocess_save_macro_param(pp, &cur_token, &param, base, cur_params);
  841. if (cur_token->type != CFTOKEN_OTHER ||
  842. (*cur_token->str.array != ',' && *cur_token->str.array != ')')) {
  843. macro_param_free(&param);
  844. cf_adderror_expecting(pp, cur_token, "',' or ')'");
  845. goto exit;
  846. }
  847. if (param_is_whitespace(&param)) {
  848. /* if 0-param macro, ignore first entry */
  849. if (count == 1 && !def->params.num && *cur_token->str.array == ')') {
  850. macro_param_free(&param);
  851. break;
  852. }
  853. }
  854. if (count <= def->params.num) {
  855. cf_token_copy(&param.name, cf_def_getparam(def, count - 1));
  856. da_push_back(dst->params, &param);
  857. } else {
  858. macro_param_free(&param);
  859. }
  860. } while (*cur_token->str.array != ')');
  861. if (count != def->params.num)
  862. cf_adderror(pp, cur_token, "Mismatching number of macro parameters", NULL, NULL, NULL);
  863. exit:
  864. *p_cur_token = cur_token;
  865. }
  866. static inline void cf_preprocess_unwrap_param(struct cf_preprocessor *pp, cf_token_array_t *dst,
  867. struct cf_token **p_cur_token, const struct cf_token *base,
  868. const struct macro_param *param)
  869. {
  870. struct cf_token *cur_token = *p_cur_token;
  871. struct cf_token *cur_param_token = param->tokens.array;
  872. while (cur_param_token->type != CFTOKEN_NONE)
  873. cf_preprocess_addtoken(pp, dst, &cur_param_token, base, NULL);
  874. cur_token++;
  875. *p_cur_token = cur_token;
  876. }
  877. static inline void cf_preprocess_unwrap_define(struct cf_preprocessor *pp, cf_token_array_t *dst,
  878. struct cf_token **p_cur_token, const struct cf_token *base,
  879. const struct cf_def *def, const struct macro_params *cur_params)
  880. {
  881. struct cf_token *cur_token = *p_cur_token;
  882. struct macro_params new_params;
  883. struct cf_token *cur_def_token = def->tokens.array;
  884. macro_params_init(&new_params);
  885. if (def->macro)
  886. cf_preprocess_save_macro_params(pp, &cur_token, def, base, cur_params, &new_params);
  887. while (cur_def_token->type != CFTOKEN_NONE)
  888. cf_preprocess_addtoken(pp, dst, &cur_def_token, base, &new_params);
  889. macro_params_free(&new_params);
  890. cur_token++;
  891. *p_cur_token = cur_token;
  892. }
  893. static void cf_preprocess_addtoken(struct cf_preprocessor *pp, cf_token_array_t *dst, struct cf_token **p_cur_token,
  894. const struct cf_token *base, const struct macro_params *params)
  895. {
  896. struct cf_token *cur_token = *p_cur_token;
  897. if (pp->ignore_state)
  898. goto ignore;
  899. if (!base)
  900. base = cur_token;
  901. if (cur_token->type == CFTOKEN_NAME) {
  902. struct cf_def *def;
  903. struct macro_param *param;
  904. param = get_macro_param(params, &cur_token->str);
  905. if (param) {
  906. cf_preprocess_unwrap_param(pp, dst, &cur_token, base, param);
  907. goto exit;
  908. }
  909. def = cf_preprocess_get_def(pp, &cur_token->str);
  910. if (def) {
  911. cf_preprocess_unwrap_define(pp, dst, &cur_token, base, def, params);
  912. goto exit;
  913. }
  914. }
  915. da_push_back(*dst, cur_token);
  916. ignore:
  917. cur_token++;
  918. exit:
  919. *p_cur_token = cur_token;
  920. }
  921. static void cf_preprocess_tokens(struct cf_preprocessor *pp, bool if_block, struct cf_token **p_cur_token)
  922. {
  923. bool newline = true;
  924. bool preprocessor_line = if_block;
  925. struct cf_token *cur_token = *p_cur_token;
  926. while (cur_token->type != CFTOKEN_NONE) {
  927. if (cur_token->type != CFTOKEN_SPACETAB && cur_token->type != CFTOKEN_NEWLINE) {
  928. if (preprocessor_line) {
  929. cf_adderror_expected_newline(pp, cur_token);
  930. if (!go_to_newline(&cur_token))
  931. break;
  932. }
  933. if (newline && *cur_token->str.array == '#') {
  934. next_token(&cur_token, true);
  935. preprocessor_line = true;
  936. if (!cf_preprocessor(pp, if_block, &cur_token))
  937. break;
  938. continue;
  939. }
  940. newline = false;
  941. }
  942. if (cur_token->type == CFTOKEN_NEWLINE) {
  943. newline = true;
  944. preprocessor_line = false;
  945. } else if (cur_token->type == CFTOKEN_NONE) {
  946. break;
  947. }
  948. cf_preprocess_addtoken(pp, &pp->tokens, &cur_token, NULL, NULL);
  949. }
  950. *p_cur_token = cur_token;
  951. }
  952. void cf_preprocessor_init(struct cf_preprocessor *pp)
  953. {
  954. da_init(pp->defines);
  955. da_init(pp->sys_include_dirs);
  956. da_init(pp->dependencies);
  957. da_init(pp->tokens);
  958. pp->lex = NULL;
  959. pp->ed = NULL;
  960. pp->ignore_state = false;
  961. }
  962. void cf_preprocessor_free(struct cf_preprocessor *pp)
  963. {
  964. struct cf_lexer *dependencies = pp->dependencies.array;
  965. char **sys_include_dirs = pp->sys_include_dirs.array;
  966. struct cf_def *defs = pp->defines.array;
  967. size_t i;
  968. for (i = 0; i < pp->defines.num; i++)
  969. cf_def_free(defs + i);
  970. for (i = 0; i < pp->sys_include_dirs.num; i++)
  971. bfree(sys_include_dirs[i]);
  972. for (i = 0; i < pp->dependencies.num; i++)
  973. cf_lexer_free(dependencies + i);
  974. da_free(pp->defines);
  975. da_free(pp->sys_include_dirs);
  976. da_free(pp->dependencies);
  977. da_free(pp->tokens);
  978. pp->lex = NULL;
  979. pp->ed = NULL;
  980. pp->ignore_state = false;
  981. }
  982. bool cf_preprocess(struct cf_preprocessor *pp, struct cf_lexer *lex, struct error_data *ed)
  983. {
  984. struct cf_token *token = cf_lexer_get_tokens(lex);
  985. if (!token)
  986. return false;
  987. pp->ed = ed;
  988. pp->lex = lex;
  989. cf_preprocess_tokens(pp, false, &token);
  990. da_push_back(pp->tokens, token);
  991. return !lex->unexpected_eof;
  992. }
  993. void cf_preprocessor_add_def(struct cf_preprocessor *pp, struct cf_def *def)
  994. {
  995. struct cf_def *existing = cf_preprocess_get_def(pp, &def->name.str);
  996. if (existing) {
  997. struct dstr name;
  998. dstr_init_copy_strref(&name, &def->name.str);
  999. cf_addwarning(pp, &def->name, "Token $1 already defined", name.array, NULL, NULL);
  1000. cf_addwarning(pp, &existing->name, "Previous definition of $1 is here", name.array, NULL, NULL);
  1001. cf_def_free(existing);
  1002. memcpy(existing, def, sizeof(struct cf_def));
  1003. } else {
  1004. da_push_back(pp->defines, def);
  1005. }
  1006. }
  1007. void cf_preprocessor_remove_def(struct cf_preprocessor *pp, const char *def_name)
  1008. {
  1009. struct strref ref;
  1010. ref.array = def_name;
  1011. ref.len = strlen(def_name);
  1012. cf_preprocess_remove_def_strref(pp, &ref);
  1013. }