cf-lexer.c 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356
  1. /*
  2. * Copyright (c) 2013 Hugh Bailey <[email protected]>
  3. *
  4. * Permission to use, copy, modify, and distribute this software for any
  5. * purpose with or without fee is hereby granted, provided that the above
  6. * copyright notice and this permission notice appear in all copies.
  7. *
  8. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  10. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  11. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  12. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  13. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  14. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  15. */
  16. #include <ctype.h>
  17. #include <stdio.h>
  18. #include "platform.h"
  19. #include "cf-lexer.h"
  20. static inline void cf_convert_from_escape_literal(char **p_dst,
  21. const char **p_src)
  22. {
  23. char *dst = *p_dst;
  24. const char *src = *p_src;
  25. switch (*(src++)) {
  26. case '\'':
  27. *(dst++) = '\'';
  28. break;
  29. case '\"':
  30. *(dst++) = '\"';
  31. break;
  32. case '\?':
  33. *(dst++) = '\?';
  34. break;
  35. case '\\':
  36. *(dst++) = '\\';
  37. break;
  38. case '0':
  39. *(dst++) = '\0';
  40. break;
  41. case 'a':
  42. *(dst++) = '\a';
  43. break;
  44. case 'b':
  45. *(dst++) = '\b';
  46. break;
  47. case 'f':
  48. *(dst++) = '\f';
  49. break;
  50. case 'n':
  51. *(dst++) = '\n';
  52. break;
  53. case 'r':
  54. *(dst++) = '\r';
  55. break;
  56. case 't':
  57. *(dst++) = '\t';
  58. break;
  59. case 'v':
  60. *(dst++) = '\v';
  61. break;
  62. /* hex */
  63. case 'X':
  64. case 'x':
  65. *(dst++) = (char)strtoul(src, NULL, 16);
  66. src += 2;
  67. break;
  68. /* oct */
  69. default:
  70. if (isdigit(*src)) {
  71. *(dst++) = (char)strtoul(src, NULL, 8);
  72. src += 3;
  73. }
  74. /* case 'u':
  75. case 'U': */
  76. }
  77. *p_dst = dst;
  78. *p_src = src;
  79. }
  80. char *cf_literal_to_str(const char *literal, size_t count)
  81. {
  82. const char *temp_src;
  83. char *str, *temp_dst;
  84. if (!count)
  85. count = strlen(literal);
  86. if (count < 2)
  87. return NULL;
  88. if (literal[0] != literal[count - 1])
  89. return NULL;
  90. if (literal[0] != '\"' && literal[0] != '\'')
  91. return NULL;
  92. /* strip leading and trailing quote characters */
  93. str = bzalloc(--count);
  94. temp_src = literal + 1;
  95. temp_dst = str;
  96. while (*temp_src && --count > 0) {
  97. if (*temp_src == '\\') {
  98. temp_src++;
  99. cf_convert_from_escape_literal(&temp_dst, &temp_src);
  100. } else {
  101. *(temp_dst++) = *(temp_src++);
  102. }
  103. }
  104. *temp_dst = 0;
  105. return str;
  106. }
  107. static bool cf_is_token_break(struct base_token *start_token,
  108. const struct base_token *token)
  109. {
  110. switch (start_token->type) {
  111. case BASETOKEN_ALPHA:
  112. if (token->type == BASETOKEN_OTHER ||
  113. token->type == BASETOKEN_WHITESPACE)
  114. return true;
  115. break;
  116. case BASETOKEN_DIGIT:
  117. if (token->type == BASETOKEN_WHITESPACE ||
  118. (token->type == BASETOKEN_OTHER &&
  119. *token->text.array != '.'))
  120. return true;
  121. break;
  122. case BASETOKEN_WHITESPACE:
  123. /* lump all non-newline whitespace together when possible */
  124. if (is_space_or_tab(*start_token->text.array) &&
  125. is_space_or_tab(*token->text.array))
  126. break;
  127. return true;
  128. case BASETOKEN_OTHER:
  129. if (*start_token->text.array == '.' &&
  130. token->type == BASETOKEN_DIGIT) {
  131. start_token->type = BASETOKEN_DIGIT;
  132. break;
  133. }
  134. /* Falls through. */
  135. case BASETOKEN_NONE:
  136. return true;
  137. }
  138. return false;
  139. }
  140. static inline bool cf_is_splice(const char *array)
  141. {
  142. return (*array == '\\' && is_newline(array[1]));
  143. }
  144. static inline void cf_pass_any_splices(const char **parray)
  145. {
  146. while (cf_is_splice(*parray))
  147. *parray += 1 + newline_size((*parray) + 1);
  148. }
  149. static inline bool cf_is_comment(const char *array)
  150. {
  151. const char *offset = array;
  152. if (*offset++ == '/') {
  153. cf_pass_any_splices(&offset);
  154. return (*offset == '*' || *offset == '/');
  155. }
  156. return false;
  157. }
  158. static bool cf_lexer_process_comment(struct cf_lexer *lex,
  159. struct cf_token *out_token)
  160. {
  161. const char *offset;
  162. if (!cf_is_comment(out_token->unmerged_str.array))
  163. return false;
  164. offset = lex->base_lexer.offset;
  165. cf_pass_any_splices(&offset);
  166. strcpy(lex->write_offset++, " ");
  167. out_token->str.len = 1;
  168. if (*offset == '/') {
  169. while (*++offset && !is_newline(*offset))
  170. cf_pass_any_splices(&offset);
  171. } else if (*offset == '*') {
  172. bool was_star = false;
  173. lex->unexpected_eof = true;
  174. while (*++offset) {
  175. cf_pass_any_splices(&offset);
  176. if (was_star && *offset == '/') {
  177. offset++;
  178. lex->unexpected_eof = false;
  179. break;
  180. } else {
  181. was_star = (*offset == '*');
  182. }
  183. }
  184. }
  185. out_token->unmerged_str.len +=
  186. (size_t)(offset - out_token->unmerged_str.array);
  187. out_token->type = CFTOKEN_SPACETAB;
  188. lex->base_lexer.offset = offset;
  189. return true;
  190. }
  191. static inline void cf_lexer_write_strref(struct cf_lexer *lex,
  192. const struct strref *ref)
  193. {
  194. strncpy(lex->write_offset, ref->array, ref->len);
  195. lex->write_offset[ref->len] = 0;
  196. lex->write_offset += ref->len;
  197. }
  198. static bool cf_lexer_is_include(struct cf_lexer *lex)
  199. {
  200. bool found_include_import = false;
  201. bool found_preprocessor = false;
  202. size_t i;
  203. for (i = lex->tokens.num; i > 0; i--) {
  204. struct cf_token *token = lex->tokens.array + (i - 1);
  205. if (is_space_or_tab(*token->str.array))
  206. continue;
  207. if (!found_include_import) {
  208. if (strref_cmp(&token->str, "include") != 0 &&
  209. strref_cmp(&token->str, "import") != 0)
  210. break;
  211. found_include_import = true;
  212. } else if (!found_preprocessor) {
  213. if (*token->str.array != '#')
  214. break;
  215. found_preprocessor = true;
  216. } else {
  217. return is_newline(*token->str.array);
  218. }
  219. }
  220. /* if starting line */
  221. return found_preprocessor && found_include_import;
  222. }
  223. static void cf_lexer_getstrtoken(struct cf_lexer *lex,
  224. struct cf_token *out_token, char delimiter,
  225. bool allow_escaped_delimiters)
  226. {
  227. const char *offset = lex->base_lexer.offset;
  228. bool escaped = false;
  229. out_token->unmerged_str.len++;
  230. out_token->str.len++;
  231. cf_lexer_write_strref(lex, &out_token->unmerged_str);
  232. while (*offset) {
  233. cf_pass_any_splices(&offset);
  234. if (*offset == delimiter) {
  235. if (!escaped) {
  236. *lex->write_offset++ = *offset;
  237. out_token->str.len++;
  238. offset++;
  239. break;
  240. }
  241. } else if (is_newline(*offset)) {
  242. break;
  243. }
  244. *lex->write_offset++ = *offset;
  245. out_token->str.len++;
  246. escaped = (allow_escaped_delimiters && *offset == '\\');
  247. offset++;
  248. }
  249. *lex->write_offset = 0;
  250. out_token->unmerged_str.len +=
  251. (size_t)(offset - out_token->unmerged_str.array);
  252. out_token->type = CFTOKEN_STRING;
  253. lex->base_lexer.offset = offset;
  254. }
  255. static bool cf_lexer_process_string(struct cf_lexer *lex,
  256. struct cf_token *out_token)
  257. {
  258. char ch = *out_token->unmerged_str.array;
  259. if (ch == '<' && cf_lexer_is_include(lex)) {
  260. cf_lexer_getstrtoken(lex, out_token, '>', false);
  261. return true;
  262. } else if (ch == '"' || ch == '\'') {
  263. cf_lexer_getstrtoken(lex, out_token, ch,
  264. !cf_lexer_is_include(lex));
  265. return true;
  266. }
  267. return false;
  268. }
  269. static inline enum cf_token_type
  270. cf_get_token_type(const struct cf_token *token,
  271. const struct base_token *start_token)
  272. {
  273. switch (start_token->type) {
  274. case BASETOKEN_ALPHA:
  275. return CFTOKEN_NAME;
  276. case BASETOKEN_DIGIT:
  277. return CFTOKEN_NUM;
  278. case BASETOKEN_WHITESPACE:
  279. if (is_newline(*token->str.array))
  280. return CFTOKEN_NEWLINE;
  281. else
  282. return CFTOKEN_SPACETAB;
  283. case BASETOKEN_NONE:
  284. case BASETOKEN_OTHER:
  285. break;
  286. }
  287. return CFTOKEN_OTHER;
  288. }
  289. static bool cf_lexer_nexttoken(struct cf_lexer *lex, struct cf_token *out_token)
  290. {
  291. struct base_token token, start_token;
  292. bool wrote_data = false;
  293. base_token_clear(&token);
  294. base_token_clear(&start_token);
  295. cf_token_clear(out_token);
  296. while (lexer_getbasetoken(&lex->base_lexer, &token, PARSE_WHITESPACE)) {
  297. /* reclassify underscore as alpha for alnum tokens */
  298. if (*token.text.array == '_')
  299. token.type = BASETOKEN_ALPHA;
  300. /* ignore escaped newlines to merge spliced lines */
  301. if (cf_is_splice(token.text.array)) {
  302. lex->base_lexer.offset +=
  303. newline_size(token.text.array + 1);
  304. continue;
  305. }
  306. if (!wrote_data) {
  307. out_token->unmerged_str.array = token.text.array;
  308. out_token->str.array = lex->write_offset;
  309. /* if comment then output a space */
  310. if (cf_lexer_process_comment(lex, out_token))
  311. return true;
  312. /* process string tokens if any */
  313. if (cf_lexer_process_string(lex, out_token))
  314. return true;
  315. base_token_copy(&start_token, &token);
  316. wrote_data = true;
  317. } else if (cf_is_token_break(&start_token, &token)) {
  318. lex->base_lexer.offset -= token.text.len;
  319. break;
  320. }
  321. /* write token to CF lexer to account for splicing/comments */
  322. cf_lexer_write_strref(lex, &token.text);
  323. out_token->str.len += token.text.len;
  324. }
  325. if (wrote_data) {
  326. out_token->unmerged_str.len = (size_t)(
  327. lex->base_lexer.offset - out_token->unmerged_str.array);
  328. out_token->type = cf_get_token_type(out_token, &start_token);
  329. }
  330. return wrote_data;
  331. }
  332. void cf_lexer_init(struct cf_lexer *lex)
  333. {
  334. lexer_init(&lex->base_lexer);
  335. da_init(lex->tokens);
  336. lex->file = NULL;
  337. lex->reformatted = NULL;
  338. lex->write_offset = NULL;
  339. lex->unexpected_eof = false;
  340. }
  341. void cf_lexer_free(struct cf_lexer *lex)
  342. {
  343. bfree(lex->file);
  344. bfree(lex->reformatted);
  345. lexer_free(&lex->base_lexer);
  346. da_free(lex->tokens);
  347. lex->file = NULL;
  348. lex->reformatted = NULL;
  349. lex->write_offset = NULL;
  350. lex->unexpected_eof = false;
  351. }
  352. bool cf_lexer_lex(struct cf_lexer *lex, const char *str, const char *file)
  353. {
  354. struct cf_token token;
  355. struct cf_token *last_token = NULL;
  356. cf_lexer_free(lex);
  357. if (!str || !*str)
  358. return false;
  359. if (file)
  360. lex->file = bstrdup(file);
  361. lexer_start(&lex->base_lexer, str);
  362. cf_token_clear(&token);
  363. lex->reformatted = bmalloc(strlen(str) + 1);
  364. lex->reformatted[0] = 0;
  365. lex->write_offset = lex->reformatted;
  366. while (cf_lexer_nexttoken(lex, &token)) {
  367. if (last_token && is_space_or_tab(*last_token->str.array) &&
  368. is_space_or_tab(*token.str.array)) {
  369. cf_token_add(last_token, &token);
  370. continue;
  371. }
  372. token.lex = lex;
  373. last_token = da_push_back_new(lex->tokens);
  374. memcpy(last_token, &token, sizeof(struct cf_token));
  375. }
  376. cf_token_clear(&token);
  377. token.str.array = lex->write_offset;
  378. token.unmerged_str.array = lex->base_lexer.offset;
  379. token.lex = lex;
  380. da_push_back(lex->tokens, &token);
  381. return !lex->unexpected_eof;
  382. }
  383. /* ------------------------------------------------------------------------- */
  384. struct macro_param {
  385. struct cf_token name;
  386. DARRAY(struct cf_token) tokens;
  387. };
  388. static inline void macro_param_init(struct macro_param *param)
  389. {
  390. cf_token_clear(&param->name);
  391. da_init(param->tokens);
  392. }
  393. static inline void macro_param_free(struct macro_param *param)
  394. {
  395. cf_token_clear(&param->name);
  396. da_free(param->tokens);
  397. }
  398. /* ------------------------------------------------------------------------- */
  399. struct macro_params {
  400. DARRAY(struct macro_param) params;
  401. };
  402. static inline void macro_params_init(struct macro_params *params)
  403. {
  404. da_init(params->params);
  405. }
  406. static inline void macro_params_free(struct macro_params *params)
  407. {
  408. size_t i;
  409. for (i = 0; i < params->params.num; i++)
  410. macro_param_free(params->params.array + i);
  411. da_free(params->params);
  412. }
  413. static inline struct macro_param *
  414. get_macro_param(const struct macro_params *params, const struct strref *name)
  415. {
  416. size_t i;
  417. if (!params)
  418. return NULL;
  419. for (i = 0; i < params->params.num; i++) {
  420. struct macro_param *param = params->params.array + i;
  421. if (strref_cmp_strref(&param->name.str, name) == 0)
  422. return param;
  423. }
  424. return NULL;
  425. }
  426. /* ------------------------------------------------------------------------- */
  427. static bool cf_preprocessor(struct cf_preprocessor *pp, bool if_block,
  428. struct cf_token **p_cur_token);
  429. static void cf_preprocess_tokens(struct cf_preprocessor *pp, bool if_block,
  430. struct cf_token **p_cur_token);
  431. static inline bool go_to_newline(struct cf_token **p_cur_token)
  432. {
  433. struct cf_token *cur_token = *p_cur_token;
  434. while (cur_token->type != CFTOKEN_NEWLINE &&
  435. cur_token->type != CFTOKEN_NONE)
  436. cur_token++;
  437. *p_cur_token = cur_token;
  438. return cur_token->type != CFTOKEN_NONE;
  439. }
  440. static inline bool next_token(struct cf_token **p_cur_token, bool preprocessor)
  441. {
  442. struct cf_token *cur_token = *p_cur_token;
  443. if (cur_token->type != CFTOKEN_NONE)
  444. cur_token++;
  445. /* if preprocessor, stop at newline */
  446. while (cur_token->type == CFTOKEN_SPACETAB &&
  447. (preprocessor || cur_token->type == CFTOKEN_NEWLINE))
  448. cur_token++;
  449. *p_cur_token = cur_token;
  450. return cur_token->type != CFTOKEN_NONE;
  451. }
  452. static inline void cf_gettokenoffset(struct cf_preprocessor *pp,
  453. const struct cf_token *token,
  454. uint32_t *row, uint32_t *col)
  455. {
  456. lexer_getstroffset(&pp->lex->base_lexer, token->unmerged_str.array, row,
  457. col);
  458. }
  459. static void cf_addew(struct cf_preprocessor *pp, const struct cf_token *token,
  460. const char *message, int error_level, const char *val1,
  461. const char *val2, const char *val3)
  462. {
  463. uint32_t row, col;
  464. cf_gettokenoffset(pp, token, &row, &col);
  465. if (!val1 && !val2 && !val3) {
  466. error_data_add(pp->ed, token->lex->file, row, col, message,
  467. error_level);
  468. } else {
  469. struct dstr formatted;
  470. dstr_init(&formatted);
  471. dstr_safe_printf(&formatted, message, val1, val2, val3, NULL);
  472. error_data_add(pp->ed, token->lex->file, row, col,
  473. formatted.array, error_level);
  474. dstr_free(&formatted);
  475. }
  476. }
  477. static inline void cf_adderror(struct cf_preprocessor *pp,
  478. const struct cf_token *token, const char *error,
  479. const char *val1, const char *val2,
  480. const char *val3)
  481. {
  482. cf_addew(pp, token, error, LEX_ERROR, val1, val2, val3);
  483. }
  484. static inline void cf_addwarning(struct cf_preprocessor *pp,
  485. const struct cf_token *token,
  486. const char *warning, const char *val1,
  487. const char *val2, const char *val3)
  488. {
  489. cf_addew(pp, token, warning, LEX_WARNING, val1, val2, val3);
  490. }
  491. static inline void cf_adderror_expecting(struct cf_preprocessor *pp,
  492. const struct cf_token *token,
  493. const char *expecting)
  494. {
  495. cf_adderror(pp, token, "Expected $1", expecting, NULL, NULL);
  496. }
  497. static inline void cf_adderror_expected_newline(struct cf_preprocessor *pp,
  498. const struct cf_token *token)
  499. {
  500. cf_adderror(pp, token,
  501. "Unexpected token after preprocessor, expected "
  502. "newline",
  503. NULL, NULL, NULL);
  504. }
  505. static inline void
  506. cf_adderror_unexpected_endif_eof(struct cf_preprocessor *pp,
  507. const struct cf_token *token)
  508. {
  509. cf_adderror(pp, token, "Unexpected end of file before #endif", NULL,
  510. NULL, NULL);
  511. }
  512. static inline void cf_adderror_unexpected_eof(struct cf_preprocessor *pp,
  513. const struct cf_token *token)
  514. {
  515. cf_adderror(pp, token, "Unexpected end of file", NULL, NULL, NULL);
  516. }
  517. static inline void insert_path(struct cf_preprocessor *pp,
  518. struct dstr *str_file)
  519. {
  520. const char *file;
  521. const char *slash;
  522. if (pp && pp->lex && pp->lex->file) {
  523. file = pp->lex->file;
  524. slash = strrchr(file, '/');
  525. if (slash) {
  526. struct dstr path = {0};
  527. dstr_ncopy(&path, file, slash - file + 1);
  528. dstr_insert_dstr(str_file, 0, &path);
  529. dstr_free(&path);
  530. }
  531. }
  532. }
  533. static void cf_include_file(struct cf_preprocessor *pp,
  534. const struct cf_token *file_token)
  535. {
  536. struct cf_lexer new_lex;
  537. struct dstr str_file;
  538. FILE *file;
  539. char *file_data;
  540. struct cf_token *tokens;
  541. size_t i;
  542. dstr_init(&str_file);
  543. dstr_copy_strref(&str_file, &file_token->str);
  544. dstr_mid(&str_file, &str_file, 1, str_file.len - 2);
  545. insert_path(pp, &str_file);
  546. /* if dependency already exists, run preprocessor on it */
  547. for (i = 0; i < pp->dependencies.num; i++) {
  548. struct cf_lexer *dep = pp->dependencies.array + i;
  549. if (strcmp(dep->file, str_file.array) == 0) {
  550. tokens = cf_lexer_get_tokens(dep);
  551. cf_preprocess_tokens(pp, false, &tokens);
  552. goto exit;
  553. }
  554. }
  555. file = os_fopen(str_file.array, "rb");
  556. if (!file) {
  557. cf_adderror(pp, file_token, "Could not open file '$1'",
  558. file_token->str.array, NULL, NULL);
  559. goto exit;
  560. }
  561. os_fread_utf8(file, &file_data);
  562. fclose(file);
  563. cf_lexer_init(&new_lex);
  564. cf_lexer_lex(&new_lex, file_data, str_file.array);
  565. tokens = cf_lexer_get_tokens(&new_lex);
  566. cf_preprocess_tokens(pp, false, &tokens);
  567. bfree(file_data);
  568. da_push_back(pp->dependencies, &new_lex);
  569. exit:
  570. dstr_free(&str_file);
  571. }
  572. static inline bool is_sys_include(struct strref *ref)
  573. {
  574. return ref->len >= 2 && ref->array[0] == '<' &&
  575. ref->array[ref->len - 1] == '>';
  576. }
  577. static inline bool is_loc_include(struct strref *ref)
  578. {
  579. return ref->len >= 2 && ref->array[0] == '"' &&
  580. ref->array[ref->len - 1] == '"';
  581. }
  582. static void cf_preprocess_include(struct cf_preprocessor *pp,
  583. struct cf_token **p_cur_token)
  584. {
  585. struct cf_token *cur_token = *p_cur_token;
  586. if (pp->ignore_state) {
  587. go_to_newline(p_cur_token);
  588. return;
  589. }
  590. next_token(&cur_token, true);
  591. if (cur_token->type != CFTOKEN_STRING) {
  592. cf_adderror_expecting(pp, cur_token, "string");
  593. go_to_newline(&cur_token);
  594. goto exit;
  595. }
  596. if (is_sys_include(&cur_token->str)) {
  597. /* TODO */
  598. } else if (is_loc_include(&cur_token->str)) {
  599. if (!pp->ignore_state)
  600. cf_include_file(pp, cur_token);
  601. } else {
  602. cf_adderror(pp, cur_token, "Invalid or incomplete string", NULL,
  603. NULL, NULL);
  604. go_to_newline(&cur_token);
  605. goto exit;
  606. }
  607. cur_token++;
  608. exit:
  609. *p_cur_token = cur_token;
  610. }
  611. static bool cf_preprocess_macro_params(struct cf_preprocessor *pp,
  612. struct cf_def *def,
  613. struct cf_token **p_cur_token)
  614. {
  615. struct cf_token *cur_token = *p_cur_token;
  616. bool success = false;
  617. def->macro = true;
  618. do {
  619. next_token(&cur_token, true);
  620. if (cur_token->type != CFTOKEN_NAME) {
  621. cf_adderror_expecting(pp, cur_token, "identifier");
  622. go_to_newline(&cur_token);
  623. goto exit;
  624. }
  625. cf_def_addparam(def, cur_token);
  626. next_token(&cur_token, true);
  627. if (cur_token->type != CFTOKEN_OTHER ||
  628. (*cur_token->str.array != ',' &&
  629. *cur_token->str.array != ')')) {
  630. cf_adderror_expecting(pp, cur_token, "',' or ')'");
  631. go_to_newline(&cur_token);
  632. goto exit;
  633. }
  634. } while (*cur_token->str.array != ')');
  635. /* ended properly, now go to first define token (or newline) */
  636. next_token(&cur_token, true);
  637. success = true;
  638. exit:
  639. *p_cur_token = cur_token;
  640. return success;
  641. }
  642. #define INVALID_INDEX ((size_t)-1)
  643. static inline size_t cf_preprocess_get_def_idx(struct cf_preprocessor *pp,
  644. const struct strref *def_name)
  645. {
  646. struct cf_def *array = pp->defines.array;
  647. size_t i;
  648. for (i = 0; i < pp->defines.num; i++) {
  649. struct cf_def *cur_def = array + i;
  650. if (strref_cmp_strref(&cur_def->name.str, def_name) == 0)
  651. return i;
  652. }
  653. return INVALID_INDEX;
  654. }
  655. static inline struct cf_def *
  656. cf_preprocess_get_def(struct cf_preprocessor *pp, const struct strref *def_name)
  657. {
  658. size_t idx = cf_preprocess_get_def_idx(pp, def_name);
  659. if (idx == INVALID_INDEX)
  660. return NULL;
  661. return pp->defines.array + idx;
  662. }
  663. static char space_filler[2] = " ";
  664. static inline void append_space(struct cf_preprocessor *pp,
  665. struct darray *tokens,
  666. const struct cf_token *base)
  667. {
  668. struct cf_token token;
  669. strref_set(&token.str, space_filler, 1);
  670. token.type = CFTOKEN_SPACETAB;
  671. if (base) {
  672. token.lex = base->lex;
  673. strref_copy(&token.unmerged_str, &base->unmerged_str);
  674. } else {
  675. token.lex = pp->lex;
  676. strref_copy(&token.unmerged_str, &token.str);
  677. }
  678. darray_push_back(sizeof(struct cf_token), tokens, &token);
  679. }
  680. static inline void append_end_token(struct darray *tokens)
  681. {
  682. struct cf_token end;
  683. cf_token_clear(&end);
  684. darray_push_back(sizeof(struct cf_token), tokens, &end);
  685. }
  686. static void cf_preprocess_define(struct cf_preprocessor *pp,
  687. struct cf_token **p_cur_token)
  688. {
  689. struct cf_token *cur_token = *p_cur_token;
  690. struct cf_def def;
  691. if (pp->ignore_state) {
  692. go_to_newline(p_cur_token);
  693. return;
  694. }
  695. cf_def_init(&def);
  696. next_token(&cur_token, true);
  697. if (cur_token->type != CFTOKEN_NAME) {
  698. cf_adderror_expecting(pp, cur_token, "identifier");
  699. go_to_newline(&cur_token);
  700. goto exit;
  701. }
  702. append_space(pp, &def.tokens.da, NULL);
  703. cf_token_copy(&def.name, cur_token);
  704. if (!next_token(&cur_token, true))
  705. goto complete;
  706. /* process macro */
  707. if (*cur_token->str.array == '(') {
  708. if (!cf_preprocess_macro_params(pp, &def, &cur_token))
  709. goto error;
  710. }
  711. while (cur_token->type != CFTOKEN_NEWLINE &&
  712. cur_token->type != CFTOKEN_NONE)
  713. cf_def_addtoken(&def, cur_token++);
  714. complete:
  715. append_end_token(&def.tokens.da);
  716. append_space(pp, &def.tokens.da, NULL);
  717. da_push_back(pp->defines, &def);
  718. goto exit;
  719. error:
  720. cf_def_free(&def);
  721. exit:
  722. *p_cur_token = cur_token;
  723. }
  724. static inline void cf_preprocess_remove_def_strref(struct cf_preprocessor *pp,
  725. const struct strref *ref)
  726. {
  727. size_t def_idx = cf_preprocess_get_def_idx(pp, ref);
  728. if (def_idx != INVALID_INDEX) {
  729. struct cf_def *array = pp->defines.array;
  730. cf_def_free(array + def_idx);
  731. da_erase(pp->defines, def_idx);
  732. }
  733. }
  734. static void cf_preprocess_undef(struct cf_preprocessor *pp,
  735. struct cf_token **p_cur_token)
  736. {
  737. struct cf_token *cur_token = *p_cur_token;
  738. if (pp->ignore_state) {
  739. go_to_newline(p_cur_token);
  740. return;
  741. }
  742. next_token(&cur_token, true);
  743. if (cur_token->type != CFTOKEN_NAME) {
  744. cf_adderror_expecting(pp, cur_token, "identifier");
  745. go_to_newline(&cur_token);
  746. goto exit;
  747. }
  748. cf_preprocess_remove_def_strref(pp, &cur_token->str);
  749. cur_token++;
  750. exit:
  751. *p_cur_token = cur_token;
  752. }
  753. /* Processes an #ifdef/#ifndef/#if/#else/#elif sub block recursively */
  754. static inline bool cf_preprocess_subblock(struct cf_preprocessor *pp,
  755. bool ignore,
  756. struct cf_token **p_cur_token)
  757. {
  758. bool eof;
  759. if (!next_token(p_cur_token, true))
  760. return false;
  761. if (!pp->ignore_state) {
  762. pp->ignore_state = ignore;
  763. cf_preprocess_tokens(pp, true, p_cur_token);
  764. pp->ignore_state = false;
  765. } else {
  766. cf_preprocess_tokens(pp, true, p_cur_token);
  767. }
  768. eof = ((*p_cur_token)->type == CFTOKEN_NONE);
  769. if (eof)
  770. cf_adderror_unexpected_endif_eof(pp, *p_cur_token);
  771. return !eof;
  772. }
  773. static void cf_preprocess_ifdef(struct cf_preprocessor *pp, bool ifnot,
  774. struct cf_token **p_cur_token)
  775. {
  776. struct cf_token *cur_token = *p_cur_token;
  777. struct cf_def *def;
  778. bool is_true;
  779. next_token(&cur_token, true);
  780. if (cur_token->type != CFTOKEN_NAME) {
  781. cf_adderror_expecting(pp, cur_token, "identifier");
  782. go_to_newline(&cur_token);
  783. goto exit;
  784. }
  785. def = cf_preprocess_get_def(pp, &cur_token->str);
  786. is_true = (def == NULL) == ifnot;
  787. if (!cf_preprocess_subblock(pp, !is_true, &cur_token))
  788. goto exit;
  789. if (strref_cmp(&cur_token->str, "else") == 0) {
  790. if (!cf_preprocess_subblock(pp, is_true, &cur_token))
  791. goto exit;
  792. /*} else if (strref_cmp(&cur_token->str, "elif") == 0) {*/
  793. }
  794. cur_token++;
  795. exit:
  796. *p_cur_token = cur_token;
  797. }
  798. static bool cf_preprocessor(struct cf_preprocessor *pp, bool if_block,
  799. struct cf_token **p_cur_token)
  800. {
  801. struct cf_token *cur_token = *p_cur_token;
  802. if (strref_cmp(&cur_token->str, "include") == 0) {
  803. cf_preprocess_include(pp, p_cur_token);
  804. } else if (strref_cmp(&cur_token->str, "define") == 0) {
  805. cf_preprocess_define(pp, p_cur_token);
  806. } else if (strref_cmp(&cur_token->str, "undef") == 0) {
  807. cf_preprocess_undef(pp, p_cur_token);
  808. } else if (strref_cmp(&cur_token->str, "ifdef") == 0) {
  809. cf_preprocess_ifdef(pp, false, p_cur_token);
  810. } else if (strref_cmp(&cur_token->str, "ifndef") == 0) {
  811. cf_preprocess_ifdef(pp, true, p_cur_token);
  812. /*} else if (strref_cmp(&cur_token->str, "if") == 0) {
  813. TODO;*/
  814. } else if (strref_cmp(&cur_token->str, "else") == 0 ||
  815. /*strref_cmp(&cur_token->str, "elif") == 0 ||*/
  816. strref_cmp(&cur_token->str, "endif") == 0) {
  817. if (!if_block) {
  818. struct dstr name;
  819. dstr_init_copy_strref(&name, &cur_token->str);
  820. cf_adderror(pp, cur_token,
  821. "#$1 outside of "
  822. "#if/#ifdef/#ifndef block",
  823. name.array, NULL, NULL);
  824. dstr_free(&name);
  825. (*p_cur_token)++;
  826. return true;
  827. }
  828. return false;
  829. } else if (cur_token->type != CFTOKEN_NEWLINE &&
  830. cur_token->type != CFTOKEN_NONE) {
  831. /*
  832. * TODO: language-specific preprocessor stuff should be sent to
  833. * handler of some sort
  834. */
  835. (*p_cur_token)++;
  836. }
  837. return true;
  838. }
  839. static void cf_preprocess_addtoken(struct cf_preprocessor *pp,
  840. struct darray *dst, /* struct cf_token */
  841. struct cf_token **p_cur_token,
  842. const struct cf_token *base,
  843. const struct macro_params *params);
  844. /*
  845. * collects tokens for a macro parameter
  846. *
  847. * note that it is important to make sure that any usage of function calls
  848. * within a macro parameter is preserved, example MACRO(func(1, 2), 3), do not
  849. * let it stop on the comma at "1,"
  850. */
  851. static void cf_preprocess_save_macro_param(
  852. struct cf_preprocessor *pp, struct cf_token **p_cur_token,
  853. struct macro_param *param, const struct cf_token *base,
  854. const struct macro_params *cur_params)
  855. {
  856. struct cf_token *cur_token = *p_cur_token;
  857. int brace_count = 0;
  858. append_space(pp, &param->tokens.da, base);
  859. while (cur_token->type != CFTOKEN_NONE) {
  860. if (*cur_token->str.array == '(') {
  861. brace_count++;
  862. } else if (*cur_token->str.array == ')') {
  863. if (brace_count)
  864. brace_count--;
  865. else
  866. break;
  867. } else if (*cur_token->str.array == ',') {
  868. if (!brace_count)
  869. break;
  870. }
  871. cf_preprocess_addtoken(pp, &param->tokens.da, &cur_token, base,
  872. cur_params);
  873. }
  874. if (cur_token->type == CFTOKEN_NONE)
  875. cf_adderror_unexpected_eof(pp, cur_token);
  876. append_space(pp, &param->tokens.da, base);
  877. append_end_token(&param->tokens.da);
  878. *p_cur_token = cur_token;
  879. }
  880. static inline bool param_is_whitespace(const struct macro_param *param)
  881. {
  882. struct cf_token *array = param->tokens.array;
  883. size_t i;
  884. for (i = 0; i < param->tokens.num; i++)
  885. if (array[i].type != CFTOKEN_NONE &&
  886. array[i].type != CFTOKEN_SPACETAB &&
  887. array[i].type != CFTOKEN_NEWLINE)
  888. return false;
  889. return true;
  890. }
  891. /* collects parameter tokens of a used macro and stores them for the unwrap */
  892. static void cf_preprocess_save_macro_params(
  893. struct cf_preprocessor *pp, struct cf_token **p_cur_token,
  894. const struct cf_def *def, const struct cf_token *base,
  895. const struct macro_params *cur_params, struct macro_params *dst)
  896. {
  897. struct cf_token *cur_token = *p_cur_token;
  898. size_t count = 0;
  899. next_token(&cur_token, false);
  900. if (cur_token->type != CFTOKEN_OTHER || *cur_token->str.array != '(') {
  901. cf_adderror_expecting(pp, cur_token, "'('");
  902. goto exit;
  903. }
  904. do {
  905. struct macro_param param;
  906. macro_param_init(&param);
  907. cur_token++;
  908. count++;
  909. cf_preprocess_save_macro_param(pp, &cur_token, &param, base,
  910. cur_params);
  911. if (cur_token->type != CFTOKEN_OTHER ||
  912. (*cur_token->str.array != ',' &&
  913. *cur_token->str.array != ')')) {
  914. macro_param_free(&param);
  915. cf_adderror_expecting(pp, cur_token, "',' or ')'");
  916. goto exit;
  917. }
  918. if (param_is_whitespace(&param)) {
  919. /* if 0-param macro, ignore first entry */
  920. if (count == 1 && !def->params.num &&
  921. *cur_token->str.array == ')') {
  922. macro_param_free(&param);
  923. break;
  924. }
  925. }
  926. if (count <= def->params.num) {
  927. cf_token_copy(&param.name,
  928. cf_def_getparam(def, count - 1));
  929. da_push_back(dst->params, &param);
  930. } else {
  931. macro_param_free(&param);
  932. }
  933. } while (*cur_token->str.array != ')');
  934. if (count != def->params.num)
  935. cf_adderror(pp, cur_token,
  936. "Mismatching number of macro parameters", NULL,
  937. NULL, NULL);
  938. exit:
  939. *p_cur_token = cur_token;
  940. }
  941. static inline void cf_preprocess_unwrap_param(
  942. struct cf_preprocessor *pp, struct darray *dst, /* struct cf_token */
  943. struct cf_token **p_cur_token, const struct cf_token *base,
  944. const struct macro_param *param)
  945. {
  946. struct cf_token *cur_token = *p_cur_token;
  947. struct cf_token *cur_param_token = param->tokens.array;
  948. while (cur_param_token->type != CFTOKEN_NONE)
  949. cf_preprocess_addtoken(pp, dst, &cur_param_token, base, NULL);
  950. cur_token++;
  951. *p_cur_token = cur_token;
  952. }
  953. static inline void cf_preprocess_unwrap_define(
  954. struct cf_preprocessor *pp, struct darray *dst, /* struct cf_token */
  955. struct cf_token **p_cur_token, const struct cf_token *base,
  956. const struct cf_def *def, const struct macro_params *cur_params)
  957. {
  958. struct cf_token *cur_token = *p_cur_token;
  959. struct macro_params new_params;
  960. struct cf_token *cur_def_token = def->tokens.array;
  961. macro_params_init(&new_params);
  962. if (def->macro)
  963. cf_preprocess_save_macro_params(pp, &cur_token, def, base,
  964. cur_params, &new_params);
  965. while (cur_def_token->type != CFTOKEN_NONE)
  966. cf_preprocess_addtoken(pp, dst, &cur_def_token, base,
  967. &new_params);
  968. macro_params_free(&new_params);
  969. cur_token++;
  970. *p_cur_token = cur_token;
  971. }
  972. static void cf_preprocess_addtoken(struct cf_preprocessor *pp,
  973. struct darray *dst, /* struct cf_token */
  974. struct cf_token **p_cur_token,
  975. const struct cf_token *base,
  976. const struct macro_params *params)
  977. {
  978. struct cf_token *cur_token = *p_cur_token;
  979. if (pp->ignore_state)
  980. goto ignore;
  981. if (!base)
  982. base = cur_token;
  983. if (cur_token->type == CFTOKEN_NAME) {
  984. struct cf_def *def;
  985. struct macro_param *param;
  986. param = get_macro_param(params, &cur_token->str);
  987. if (param) {
  988. cf_preprocess_unwrap_param(pp, dst, &cur_token, base,
  989. param);
  990. goto exit;
  991. }
  992. def = cf_preprocess_get_def(pp, &cur_token->str);
  993. if (def) {
  994. cf_preprocess_unwrap_define(pp, dst, &cur_token, base,
  995. def, params);
  996. goto exit;
  997. }
  998. }
  999. darray_push_back(sizeof(struct cf_token), dst, cur_token);
  1000. ignore:
  1001. cur_token++;
  1002. exit:
  1003. *p_cur_token = cur_token;
  1004. }
  1005. static void cf_preprocess_tokens(struct cf_preprocessor *pp, bool if_block,
  1006. struct cf_token **p_cur_token)
  1007. {
  1008. bool newline = true;
  1009. bool preprocessor_line = if_block;
  1010. struct cf_token *cur_token = *p_cur_token;
  1011. while (cur_token->type != CFTOKEN_NONE) {
  1012. if (cur_token->type != CFTOKEN_SPACETAB &&
  1013. cur_token->type != CFTOKEN_NEWLINE) {
  1014. if (preprocessor_line) {
  1015. cf_adderror_expected_newline(pp, cur_token);
  1016. if (!go_to_newline(&cur_token))
  1017. break;
  1018. }
  1019. if (newline && *cur_token->str.array == '#') {
  1020. next_token(&cur_token, true);
  1021. preprocessor_line = true;
  1022. if (!cf_preprocessor(pp, if_block, &cur_token))
  1023. break;
  1024. continue;
  1025. }
  1026. newline = false;
  1027. }
  1028. if (cur_token->type == CFTOKEN_NEWLINE) {
  1029. newline = true;
  1030. preprocessor_line = false;
  1031. } else if (cur_token->type == CFTOKEN_NONE) {
  1032. break;
  1033. }
  1034. cf_preprocess_addtoken(pp, &pp->tokens.da, &cur_token, NULL,
  1035. NULL);
  1036. }
  1037. *p_cur_token = cur_token;
  1038. }
  1039. void cf_preprocessor_init(struct cf_preprocessor *pp)
  1040. {
  1041. da_init(pp->defines);
  1042. da_init(pp->sys_include_dirs);
  1043. da_init(pp->dependencies);
  1044. da_init(pp->tokens);
  1045. pp->lex = NULL;
  1046. pp->ed = NULL;
  1047. pp->ignore_state = false;
  1048. }
  1049. void cf_preprocessor_free(struct cf_preprocessor *pp)
  1050. {
  1051. struct cf_lexer *dependencies = pp->dependencies.array;
  1052. char **sys_include_dirs = pp->sys_include_dirs.array;
  1053. struct cf_def *defs = pp->defines.array;
  1054. size_t i;
  1055. for (i = 0; i < pp->defines.num; i++)
  1056. cf_def_free(defs + i);
  1057. for (i = 0; i < pp->sys_include_dirs.num; i++)
  1058. bfree(sys_include_dirs[i]);
  1059. for (i = 0; i < pp->dependencies.num; i++)
  1060. cf_lexer_free(dependencies + i);
  1061. da_free(pp->defines);
  1062. da_free(pp->sys_include_dirs);
  1063. da_free(pp->dependencies);
  1064. da_free(pp->tokens);
  1065. pp->lex = NULL;
  1066. pp->ed = NULL;
  1067. pp->ignore_state = false;
  1068. }
  1069. bool cf_preprocess(struct cf_preprocessor *pp, struct cf_lexer *lex,
  1070. struct error_data *ed)
  1071. {
  1072. struct cf_token *token = cf_lexer_get_tokens(lex);
  1073. if (!token)
  1074. return false;
  1075. pp->ed = ed;
  1076. pp->lex = lex;
  1077. cf_preprocess_tokens(pp, false, &token);
  1078. da_push_back(pp->tokens, token);
  1079. return !lex->unexpected_eof;
  1080. }
  1081. void cf_preprocessor_add_def(struct cf_preprocessor *pp, struct cf_def *def)
  1082. {
  1083. struct cf_def *existing = cf_preprocess_get_def(pp, &def->name.str);
  1084. if (existing) {
  1085. struct dstr name;
  1086. dstr_init_copy_strref(&name, &def->name.str);
  1087. cf_addwarning(pp, &def->name, "Token $1 already defined",
  1088. name.array, NULL, NULL);
  1089. cf_addwarning(pp, &existing->name,
  1090. "Previous definition of $1 is here", name.array,
  1091. NULL, NULL);
  1092. cf_def_free(existing);
  1093. memcpy(existing, def, sizeof(struct cf_def));
  1094. } else {
  1095. da_push_back(pp->defines, def);
  1096. }
  1097. }
  1098. void cf_preprocessor_remove_def(struct cf_preprocessor *pp,
  1099. const char *def_name)
  1100. {
  1101. struct strref ref;
  1102. ref.array = def_name;
  1103. ref.len = strlen(def_name);
  1104. cf_preprocess_remove_def_strref(pp, &ref);
  1105. }