cf-lexer.c 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357
  1. /*
  2. * Copyright (c) 2013 Hugh Bailey <[email protected]>
  3. *
  4. * Permission to use, copy, modify, and distribute this software for any
  5. * purpose with or without fee is hereby granted, provided that the above
  6. * copyright notice and this permission notice appear in all copies.
  7. *
  8. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  10. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  11. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  12. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  13. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  14. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  15. */
  16. #include <ctype.h>
  17. #include <stdio.h>
  18. #include "platform.h"
  19. #include "cf-lexer.h"
  20. static inline void cf_convert_from_escape_literal(char **p_dst,
  21. const char **p_src)
  22. {
  23. char *dst = *p_dst;
  24. const char *src = *p_src;
  25. switch (*(src++)) {
  26. case '\'':
  27. *(dst++) = '\'';
  28. break;
  29. case '\"':
  30. *(dst++) = '\"';
  31. break;
  32. case '\?':
  33. *(dst++) = '\?';
  34. break;
  35. case '\\':
  36. *(dst++) = '\\';
  37. break;
  38. case '0':
  39. *(dst++) = '\0';
  40. break;
  41. case 'a':
  42. *(dst++) = '\a';
  43. break;
  44. case 'b':
  45. *(dst++) = '\b';
  46. break;
  47. case 'f':
  48. *(dst++) = '\f';
  49. break;
  50. case 'n':
  51. *(dst++) = '\n';
  52. break;
  53. case 'r':
  54. *(dst++) = '\r';
  55. break;
  56. case 't':
  57. *(dst++) = '\t';
  58. break;
  59. case 'v':
  60. *(dst++) = '\v';
  61. break;
  62. /* hex */
  63. case 'X':
  64. case 'x':
  65. *(dst++) = (char)strtoul(src, NULL, 16);
  66. src += 2;
  67. break;
  68. /* oct */
  69. default:
  70. if (isdigit(*src)) {
  71. *(dst++) = (char)strtoul(src, NULL, 8);
  72. src += 3;
  73. }
  74. /* case 'u':
  75. case 'U': */
  76. }
  77. *p_dst = dst;
  78. *p_src = src;
  79. }
  80. char *cf_literal_to_str(const char *literal, size_t count)
  81. {
  82. const char *temp_src;
  83. char *str, *temp_dst;
  84. if (!count)
  85. count = strlen(literal);
  86. if (count < 2)
  87. return NULL;
  88. if (literal[0] != literal[count - 1])
  89. return NULL;
  90. if (literal[0] != '\"' && literal[0] != '\'')
  91. return NULL;
  92. /* strip leading and trailing quote characters */
  93. str = bzalloc(--count);
  94. temp_src = literal + 1;
  95. temp_dst = str;
  96. while (*temp_src && --count > 0) {
  97. if (*temp_src == '\\') {
  98. temp_src++;
  99. cf_convert_from_escape_literal(&temp_dst, &temp_src);
  100. } else {
  101. *(temp_dst++) = *(temp_src++);
  102. }
  103. }
  104. *temp_dst = 0;
  105. return str;
  106. }
  107. static bool cf_is_token_break(struct base_token *start_token,
  108. const struct base_token *token)
  109. {
  110. switch (start_token->type) {
  111. case BASETOKEN_ALPHA:
  112. if (token->type == BASETOKEN_OTHER ||
  113. token->type == BASETOKEN_WHITESPACE)
  114. return true;
  115. break;
  116. case BASETOKEN_DIGIT:
  117. if (token->type == BASETOKEN_WHITESPACE ||
  118. (token->type == BASETOKEN_OTHER &&
  119. *token->text.array != '.'))
  120. return true;
  121. break;
  122. case BASETOKEN_WHITESPACE:
  123. /* lump all non-newline whitespace together when possible */
  124. if (is_space_or_tab(*start_token->text.array) &&
  125. is_space_or_tab(*token->text.array))
  126. break;
  127. return true;
  128. case BASETOKEN_OTHER:
  129. if (*start_token->text.array == '.' &&
  130. token->type == BASETOKEN_DIGIT) {
  131. start_token->type = BASETOKEN_DIGIT;
  132. break;
  133. }
  134. /* Falls through. */
  135. case BASETOKEN_NONE:
  136. return true;
  137. }
  138. return false;
  139. }
  140. static inline bool cf_is_splice(const char *array)
  141. {
  142. return (*array == '\\' && is_newline(array[1]));
  143. }
  144. static inline void cf_pass_any_splices(const char **parray)
  145. {
  146. while (cf_is_splice(*parray))
  147. *parray += 1 + newline_size((*parray) + 1);
  148. }
  149. static inline bool cf_is_comment(const char *array)
  150. {
  151. const char *offset = array;
  152. if (*offset++ == '/') {
  153. cf_pass_any_splices(&offset);
  154. return (*offset == '*' || *offset == '/');
  155. }
  156. return false;
  157. }
  158. static bool cf_lexer_process_comment(struct cf_lexer *lex,
  159. struct cf_token *out_token)
  160. {
  161. const char *offset;
  162. if (!cf_is_comment(out_token->unmerged_str.array))
  163. return false;
  164. offset = lex->base_lexer.offset;
  165. cf_pass_any_splices(&offset);
  166. strcpy(lex->write_offset++, " ");
  167. out_token->str.len = 1;
  168. if (*offset == '/') {
  169. while (*++offset && !is_newline(*offset))
  170. cf_pass_any_splices(&offset);
  171. } else if (*offset == '*') {
  172. bool was_star = false;
  173. lex->unexpected_eof = true;
  174. while (*++offset) {
  175. cf_pass_any_splices(&offset);
  176. if (was_star && *offset == '/') {
  177. offset++;
  178. lex->unexpected_eof = false;
  179. break;
  180. } else {
  181. was_star = (*offset == '*');
  182. }
  183. }
  184. }
  185. out_token->unmerged_str.len +=
  186. (size_t)(offset - out_token->unmerged_str.array);
  187. out_token->type = CFTOKEN_SPACETAB;
  188. lex->base_lexer.offset = offset;
  189. return true;
  190. }
  191. static inline void cf_lexer_write_strref(struct cf_lexer *lex,
  192. const struct strref *ref)
  193. {
  194. strncpy(lex->write_offset, ref->array, ref->len);
  195. lex->write_offset[ref->len] = 0;
  196. lex->write_offset += ref->len;
  197. }
  198. static bool cf_lexer_is_include(struct cf_lexer *lex)
  199. {
  200. bool found_include_import = false;
  201. bool found_preprocessor = false;
  202. size_t i;
  203. for (i = lex->tokens.num; i > 0; i--) {
  204. struct cf_token *token = lex->tokens.array + (i - 1);
  205. if (is_space_or_tab(*token->str.array))
  206. continue;
  207. if (!found_include_import) {
  208. if (strref_cmp(&token->str, "include") != 0 &&
  209. strref_cmp(&token->str, "import") != 0)
  210. break;
  211. found_include_import = true;
  212. } else if (!found_preprocessor) {
  213. if (*token->str.array != '#')
  214. break;
  215. found_preprocessor = true;
  216. } else {
  217. return is_newline(*token->str.array);
  218. }
  219. }
  220. /* if starting line */
  221. return found_preprocessor && found_include_import;
  222. }
  223. static void cf_lexer_getstrtoken(struct cf_lexer *lex,
  224. struct cf_token *out_token, char delimiter,
  225. bool allow_escaped_delimiters)
  226. {
  227. const char *offset = lex->base_lexer.offset;
  228. bool escaped = false;
  229. out_token->unmerged_str.len++;
  230. out_token->str.len++;
  231. cf_lexer_write_strref(lex, &out_token->unmerged_str);
  232. while (*offset) {
  233. cf_pass_any_splices(&offset);
  234. if (*offset == delimiter) {
  235. if (!escaped) {
  236. *lex->write_offset++ = *offset;
  237. out_token->str.len++;
  238. offset++;
  239. break;
  240. }
  241. } else if (is_newline(*offset)) {
  242. break;
  243. }
  244. *lex->write_offset++ = *offset;
  245. out_token->str.len++;
  246. escaped = (allow_escaped_delimiters && *offset == '\\');
  247. offset++;
  248. }
  249. *lex->write_offset = 0;
  250. out_token->unmerged_str.len +=
  251. (size_t)(offset - out_token->unmerged_str.array);
  252. out_token->type = CFTOKEN_STRING;
  253. lex->base_lexer.offset = offset;
  254. }
  255. static bool cf_lexer_process_string(struct cf_lexer *lex,
  256. struct cf_token *out_token)
  257. {
  258. char ch = *out_token->unmerged_str.array;
  259. if (ch == '<' && cf_lexer_is_include(lex)) {
  260. cf_lexer_getstrtoken(lex, out_token, '>', false);
  261. return true;
  262. } else if (ch == '"' || ch == '\'') {
  263. cf_lexer_getstrtoken(lex, out_token, ch,
  264. !cf_lexer_is_include(lex));
  265. return true;
  266. }
  267. return false;
  268. }
  269. static inline enum cf_token_type
  270. cf_get_token_type(const struct cf_token *token,
  271. const struct base_token *start_token)
  272. {
  273. switch (start_token->type) {
  274. case BASETOKEN_ALPHA:
  275. return CFTOKEN_NAME;
  276. case BASETOKEN_DIGIT:
  277. return CFTOKEN_NUM;
  278. case BASETOKEN_WHITESPACE:
  279. if (is_newline(*token->str.array))
  280. return CFTOKEN_NEWLINE;
  281. else
  282. return CFTOKEN_SPACETAB;
  283. case BASETOKEN_NONE:
  284. case BASETOKEN_OTHER:
  285. break;
  286. }
  287. return CFTOKEN_OTHER;
  288. }
  289. static bool cf_lexer_nexttoken(struct cf_lexer *lex, struct cf_token *out_token)
  290. {
  291. struct base_token token, start_token;
  292. bool wrote_data = false;
  293. base_token_clear(&token);
  294. base_token_clear(&start_token);
  295. cf_token_clear(out_token);
  296. while (lexer_getbasetoken(&lex->base_lexer, &token, PARSE_WHITESPACE)) {
  297. /* reclassify underscore as alpha for alnum tokens */
  298. if (*token.text.array == '_')
  299. token.type = BASETOKEN_ALPHA;
  300. /* ignore escaped newlines to merge spliced lines */
  301. if (cf_is_splice(token.text.array)) {
  302. lex->base_lexer.offset +=
  303. newline_size(token.text.array + 1);
  304. continue;
  305. }
  306. if (!wrote_data) {
  307. out_token->unmerged_str.array = token.text.array;
  308. out_token->str.array = lex->write_offset;
  309. /* if comment then output a space */
  310. if (cf_lexer_process_comment(lex, out_token))
  311. return true;
  312. /* process string tokens if any */
  313. if (cf_lexer_process_string(lex, out_token))
  314. return true;
  315. base_token_copy(&start_token, &token);
  316. wrote_data = true;
  317. } else if (cf_is_token_break(&start_token, &token)) {
  318. lex->base_lexer.offset -= token.text.len;
  319. break;
  320. }
  321. /* write token to CF lexer to account for splicing/comments */
  322. cf_lexer_write_strref(lex, &token.text);
  323. out_token->str.len += token.text.len;
  324. }
  325. if (wrote_data) {
  326. out_token->unmerged_str.len =
  327. (size_t)(lex->base_lexer.offset -
  328. out_token->unmerged_str.array);
  329. out_token->type = cf_get_token_type(out_token, &start_token);
  330. }
  331. return wrote_data;
  332. }
  333. void cf_lexer_init(struct cf_lexer *lex)
  334. {
  335. lexer_init(&lex->base_lexer);
  336. da_init(lex->tokens);
  337. lex->file = NULL;
  338. lex->reformatted = NULL;
  339. lex->write_offset = NULL;
  340. lex->unexpected_eof = false;
  341. }
  342. void cf_lexer_free(struct cf_lexer *lex)
  343. {
  344. bfree(lex->file);
  345. bfree(lex->reformatted);
  346. lexer_free(&lex->base_lexer);
  347. da_free(lex->tokens);
  348. lex->file = NULL;
  349. lex->reformatted = NULL;
  350. lex->write_offset = NULL;
  351. lex->unexpected_eof = false;
  352. }
  353. bool cf_lexer_lex(struct cf_lexer *lex, const char *str, const char *file)
  354. {
  355. struct cf_token token;
  356. struct cf_token *last_token = NULL;
  357. cf_lexer_free(lex);
  358. if (!str || !*str)
  359. return false;
  360. if (file)
  361. lex->file = bstrdup(file);
  362. lexer_start(&lex->base_lexer, str);
  363. cf_token_clear(&token);
  364. lex->reformatted = bmalloc(strlen(str) + 1);
  365. lex->reformatted[0] = 0;
  366. lex->write_offset = lex->reformatted;
  367. while (cf_lexer_nexttoken(lex, &token)) {
  368. if (last_token && is_space_or_tab(*last_token->str.array) &&
  369. is_space_or_tab(*token.str.array)) {
  370. cf_token_add(last_token, &token);
  371. continue;
  372. }
  373. token.lex = lex;
  374. last_token = da_push_back_new(lex->tokens);
  375. memcpy(last_token, &token, sizeof(struct cf_token));
  376. }
  377. cf_token_clear(&token);
  378. token.str.array = lex->write_offset;
  379. token.unmerged_str.array = lex->base_lexer.offset;
  380. token.lex = lex;
  381. da_push_back(lex->tokens, &token);
  382. return !lex->unexpected_eof;
  383. }
  384. /* ------------------------------------------------------------------------- */
  385. struct macro_param {
  386. struct cf_token name;
  387. DARRAY(struct cf_token) tokens;
  388. };
  389. static inline void macro_param_init(struct macro_param *param)
  390. {
  391. cf_token_clear(&param->name);
  392. da_init(param->tokens);
  393. }
  394. static inline void macro_param_free(struct macro_param *param)
  395. {
  396. cf_token_clear(&param->name);
  397. da_free(param->tokens);
  398. }
  399. /* ------------------------------------------------------------------------- */
  400. struct macro_params {
  401. DARRAY(struct macro_param) params;
  402. };
  403. static inline void macro_params_init(struct macro_params *params)
  404. {
  405. da_init(params->params);
  406. }
  407. static inline void macro_params_free(struct macro_params *params)
  408. {
  409. size_t i;
  410. for (i = 0; i < params->params.num; i++)
  411. macro_param_free(params->params.array + i);
  412. da_free(params->params);
  413. }
  414. static inline struct macro_param *
  415. get_macro_param(const struct macro_params *params, const struct strref *name)
  416. {
  417. size_t i;
  418. if (!params)
  419. return NULL;
  420. for (i = 0; i < params->params.num; i++) {
  421. struct macro_param *param = params->params.array + i;
  422. if (strref_cmp_strref(&param->name.str, name) == 0)
  423. return param;
  424. }
  425. return NULL;
  426. }
  427. /* ------------------------------------------------------------------------- */
  428. static bool cf_preprocessor(struct cf_preprocessor *pp, bool if_block,
  429. struct cf_token **p_cur_token);
  430. static void cf_preprocess_tokens(struct cf_preprocessor *pp, bool if_block,
  431. struct cf_token **p_cur_token);
  432. static inline bool go_to_newline(struct cf_token **p_cur_token)
  433. {
  434. struct cf_token *cur_token = *p_cur_token;
  435. while (cur_token->type != CFTOKEN_NEWLINE &&
  436. cur_token->type != CFTOKEN_NONE)
  437. cur_token++;
  438. *p_cur_token = cur_token;
  439. return cur_token->type != CFTOKEN_NONE;
  440. }
  441. static inline bool next_token(struct cf_token **p_cur_token, bool preprocessor)
  442. {
  443. struct cf_token *cur_token = *p_cur_token;
  444. if (cur_token->type != CFTOKEN_NONE)
  445. cur_token++;
  446. /* if preprocessor, stop at newline */
  447. while (cur_token->type == CFTOKEN_SPACETAB &&
  448. (preprocessor || cur_token->type == CFTOKEN_NEWLINE))
  449. cur_token++;
  450. *p_cur_token = cur_token;
  451. return cur_token->type != CFTOKEN_NONE;
  452. }
  453. static inline void cf_gettokenoffset(struct cf_preprocessor *pp,
  454. const struct cf_token *token,
  455. uint32_t *row, uint32_t *col)
  456. {
  457. lexer_getstroffset(&pp->lex->base_lexer, token->unmerged_str.array, row,
  458. col);
  459. }
  460. static void cf_addew(struct cf_preprocessor *pp, const struct cf_token *token,
  461. const char *message, int error_level, const char *val1,
  462. const char *val2, const char *val3)
  463. {
  464. uint32_t row, col;
  465. cf_gettokenoffset(pp, token, &row, &col);
  466. if (!val1 && !val2 && !val3) {
  467. error_data_add(pp->ed, token->lex->file, row, col, message,
  468. error_level);
  469. } else {
  470. struct dstr formatted;
  471. dstr_init(&formatted);
  472. dstr_safe_printf(&formatted, message, val1, val2, val3, NULL);
  473. error_data_add(pp->ed, token->lex->file, row, col,
  474. formatted.array, error_level);
  475. dstr_free(&formatted);
  476. }
  477. }
  478. static inline void cf_adderror(struct cf_preprocessor *pp,
  479. const struct cf_token *token, const char *error,
  480. const char *val1, const char *val2,
  481. const char *val3)
  482. {
  483. cf_addew(pp, token, error, LEX_ERROR, val1, val2, val3);
  484. }
  485. static inline void cf_addwarning(struct cf_preprocessor *pp,
  486. const struct cf_token *token,
  487. const char *warning, const char *val1,
  488. const char *val2, const char *val3)
  489. {
  490. cf_addew(pp, token, warning, LEX_WARNING, val1, val2, val3);
  491. }
  492. static inline void cf_adderror_expecting(struct cf_preprocessor *pp,
  493. const struct cf_token *token,
  494. const char *expecting)
  495. {
  496. cf_adderror(pp, token, "Expected $1", expecting, NULL, NULL);
  497. }
  498. static inline void cf_adderror_expected_newline(struct cf_preprocessor *pp,
  499. const struct cf_token *token)
  500. {
  501. cf_adderror(pp, token,
  502. "Unexpected token after preprocessor, expected "
  503. "newline",
  504. NULL, NULL, NULL);
  505. }
  506. static inline void
  507. cf_adderror_unexpected_endif_eof(struct cf_preprocessor *pp,
  508. const struct cf_token *token)
  509. {
  510. cf_adderror(pp, token, "Unexpected end of file before #endif", NULL,
  511. NULL, NULL);
  512. }
  513. static inline void cf_adderror_unexpected_eof(struct cf_preprocessor *pp,
  514. const struct cf_token *token)
  515. {
  516. cf_adderror(pp, token, "Unexpected end of file", NULL, NULL, NULL);
  517. }
  518. static inline void insert_path(struct cf_preprocessor *pp,
  519. struct dstr *str_file)
  520. {
  521. const char *file;
  522. const char *slash;
  523. if (pp && pp->lex && pp->lex->file) {
  524. file = pp->lex->file;
  525. slash = strrchr(file, '/');
  526. if (slash) {
  527. struct dstr path = {0};
  528. dstr_ncopy(&path, file, slash - file + 1);
  529. dstr_insert_dstr(str_file, 0, &path);
  530. dstr_free(&path);
  531. }
  532. }
  533. }
  534. static void cf_include_file(struct cf_preprocessor *pp,
  535. const struct cf_token *file_token)
  536. {
  537. struct cf_lexer new_lex;
  538. struct dstr str_file;
  539. FILE *file;
  540. char *file_data;
  541. struct cf_token *tokens;
  542. size_t i;
  543. dstr_init(&str_file);
  544. dstr_copy_strref(&str_file, &file_token->str);
  545. dstr_mid(&str_file, &str_file, 1, str_file.len - 2);
  546. insert_path(pp, &str_file);
  547. /* if dependency already exists, run preprocessor on it */
  548. for (i = 0; i < pp->dependencies.num; i++) {
  549. struct cf_lexer *dep = pp->dependencies.array + i;
  550. if (strcmp(dep->file, str_file.array) == 0) {
  551. tokens = cf_lexer_get_tokens(dep);
  552. cf_preprocess_tokens(pp, false, &tokens);
  553. goto exit;
  554. }
  555. }
  556. file = os_fopen(str_file.array, "rb");
  557. if (!file) {
  558. cf_adderror(pp, file_token, "Could not open file '$1'",
  559. file_token->str.array, NULL, NULL);
  560. goto exit;
  561. }
  562. os_fread_utf8(file, &file_data);
  563. fclose(file);
  564. cf_lexer_init(&new_lex);
  565. cf_lexer_lex(&new_lex, file_data, str_file.array);
  566. tokens = cf_lexer_get_tokens(&new_lex);
  567. cf_preprocess_tokens(pp, false, &tokens);
  568. bfree(file_data);
  569. da_push_back(pp->dependencies, &new_lex);
  570. exit:
  571. dstr_free(&str_file);
  572. }
  573. static inline bool is_sys_include(struct strref *ref)
  574. {
  575. return ref->len >= 2 && ref->array[0] == '<' &&
  576. ref->array[ref->len - 1] == '>';
  577. }
  578. static inline bool is_loc_include(struct strref *ref)
  579. {
  580. return ref->len >= 2 && ref->array[0] == '"' &&
  581. ref->array[ref->len - 1] == '"';
  582. }
  583. static void cf_preprocess_include(struct cf_preprocessor *pp,
  584. struct cf_token **p_cur_token)
  585. {
  586. struct cf_token *cur_token = *p_cur_token;
  587. if (pp->ignore_state) {
  588. go_to_newline(p_cur_token);
  589. return;
  590. }
  591. next_token(&cur_token, true);
  592. if (cur_token->type != CFTOKEN_STRING) {
  593. cf_adderror_expecting(pp, cur_token, "string");
  594. go_to_newline(&cur_token);
  595. goto exit;
  596. }
  597. if (is_sys_include(&cur_token->str)) {
  598. /* TODO */
  599. } else if (is_loc_include(&cur_token->str)) {
  600. if (!pp->ignore_state)
  601. cf_include_file(pp, cur_token);
  602. } else {
  603. cf_adderror(pp, cur_token, "Invalid or incomplete string", NULL,
  604. NULL, NULL);
  605. go_to_newline(&cur_token);
  606. goto exit;
  607. }
  608. cur_token++;
  609. exit:
  610. *p_cur_token = cur_token;
  611. }
  612. static bool cf_preprocess_macro_params(struct cf_preprocessor *pp,
  613. struct cf_def *def,
  614. struct cf_token **p_cur_token)
  615. {
  616. struct cf_token *cur_token = *p_cur_token;
  617. bool success = false;
  618. def->macro = true;
  619. do {
  620. next_token(&cur_token, true);
  621. if (cur_token->type != CFTOKEN_NAME) {
  622. cf_adderror_expecting(pp, cur_token, "identifier");
  623. go_to_newline(&cur_token);
  624. goto exit;
  625. }
  626. cf_def_addparam(def, cur_token);
  627. next_token(&cur_token, true);
  628. if (cur_token->type != CFTOKEN_OTHER ||
  629. (*cur_token->str.array != ',' &&
  630. *cur_token->str.array != ')')) {
  631. cf_adderror_expecting(pp, cur_token, "',' or ')'");
  632. go_to_newline(&cur_token);
  633. goto exit;
  634. }
  635. } while (*cur_token->str.array != ')');
  636. /* ended properly, now go to first define token (or newline) */
  637. next_token(&cur_token, true);
  638. success = true;
  639. exit:
  640. *p_cur_token = cur_token;
  641. return success;
  642. }
  643. #define INVALID_INDEX ((size_t)-1)
  644. static inline size_t cf_preprocess_get_def_idx(struct cf_preprocessor *pp,
  645. const struct strref *def_name)
  646. {
  647. struct cf_def *array = pp->defines.array;
  648. size_t i;
  649. for (i = 0; i < pp->defines.num; i++) {
  650. struct cf_def *cur_def = array + i;
  651. if (strref_cmp_strref(&cur_def->name.str, def_name) == 0)
  652. return i;
  653. }
  654. return INVALID_INDEX;
  655. }
  656. static inline struct cf_def *
  657. cf_preprocess_get_def(struct cf_preprocessor *pp, const struct strref *def_name)
  658. {
  659. size_t idx = cf_preprocess_get_def_idx(pp, def_name);
  660. if (idx == INVALID_INDEX)
  661. return NULL;
  662. return pp->defines.array + idx;
  663. }
  664. static char space_filler[2] = " ";
  665. static inline void append_space(struct cf_preprocessor *pp,
  666. struct darray *tokens,
  667. const struct cf_token *base)
  668. {
  669. struct cf_token token;
  670. strref_set(&token.str, space_filler, 1);
  671. token.type = CFTOKEN_SPACETAB;
  672. if (base) {
  673. token.lex = base->lex;
  674. strref_copy(&token.unmerged_str, &base->unmerged_str);
  675. } else {
  676. token.lex = pp->lex;
  677. strref_copy(&token.unmerged_str, &token.str);
  678. }
  679. darray_push_back(sizeof(struct cf_token), tokens, &token);
  680. }
  681. static inline void append_end_token(struct darray *tokens)
  682. {
  683. struct cf_token end;
  684. cf_token_clear(&end);
  685. darray_push_back(sizeof(struct cf_token), tokens, &end);
  686. }
  687. static void cf_preprocess_define(struct cf_preprocessor *pp,
  688. struct cf_token **p_cur_token)
  689. {
  690. struct cf_token *cur_token = *p_cur_token;
  691. struct cf_def def;
  692. if (pp->ignore_state) {
  693. go_to_newline(p_cur_token);
  694. return;
  695. }
  696. cf_def_init(&def);
  697. next_token(&cur_token, true);
  698. if (cur_token->type != CFTOKEN_NAME) {
  699. cf_adderror_expecting(pp, cur_token, "identifier");
  700. go_to_newline(&cur_token);
  701. goto exit;
  702. }
  703. append_space(pp, &def.tokens.da, NULL);
  704. cf_token_copy(&def.name, cur_token);
  705. if (!next_token(&cur_token, true))
  706. goto complete;
  707. /* process macro */
  708. if (*cur_token->str.array == '(') {
  709. if (!cf_preprocess_macro_params(pp, &def, &cur_token))
  710. goto error;
  711. }
  712. while (cur_token->type != CFTOKEN_NEWLINE &&
  713. cur_token->type != CFTOKEN_NONE)
  714. cf_def_addtoken(&def, cur_token++);
  715. complete:
  716. append_end_token(&def.tokens.da);
  717. append_space(pp, &def.tokens.da, NULL);
  718. da_push_back(pp->defines, &def);
  719. goto exit;
  720. error:
  721. cf_def_free(&def);
  722. exit:
  723. *p_cur_token = cur_token;
  724. }
  725. static inline void cf_preprocess_remove_def_strref(struct cf_preprocessor *pp,
  726. const struct strref *ref)
  727. {
  728. size_t def_idx = cf_preprocess_get_def_idx(pp, ref);
  729. if (def_idx != INVALID_INDEX) {
  730. struct cf_def *array = pp->defines.array;
  731. cf_def_free(array + def_idx);
  732. da_erase(pp->defines, def_idx);
  733. }
  734. }
  735. static void cf_preprocess_undef(struct cf_preprocessor *pp,
  736. struct cf_token **p_cur_token)
  737. {
  738. struct cf_token *cur_token = *p_cur_token;
  739. if (pp->ignore_state) {
  740. go_to_newline(p_cur_token);
  741. return;
  742. }
  743. next_token(&cur_token, true);
  744. if (cur_token->type != CFTOKEN_NAME) {
  745. cf_adderror_expecting(pp, cur_token, "identifier");
  746. go_to_newline(&cur_token);
  747. goto exit;
  748. }
  749. cf_preprocess_remove_def_strref(pp, &cur_token->str);
  750. cur_token++;
  751. exit:
  752. *p_cur_token = cur_token;
  753. }
  754. /* Processes an #ifdef/#ifndef/#if/#else/#elif sub block recursively */
  755. static inline bool cf_preprocess_subblock(struct cf_preprocessor *pp,
  756. bool ignore,
  757. struct cf_token **p_cur_token)
  758. {
  759. bool eof;
  760. if (!next_token(p_cur_token, true))
  761. return false;
  762. if (!pp->ignore_state) {
  763. pp->ignore_state = ignore;
  764. cf_preprocess_tokens(pp, true, p_cur_token);
  765. pp->ignore_state = false;
  766. } else {
  767. cf_preprocess_tokens(pp, true, p_cur_token);
  768. }
  769. eof = ((*p_cur_token)->type == CFTOKEN_NONE);
  770. if (eof)
  771. cf_adderror_unexpected_endif_eof(pp, *p_cur_token);
  772. return !eof;
  773. }
  774. static void cf_preprocess_ifdef(struct cf_preprocessor *pp, bool ifnot,
  775. struct cf_token **p_cur_token)
  776. {
  777. struct cf_token *cur_token = *p_cur_token;
  778. struct cf_def *def;
  779. bool is_true;
  780. next_token(&cur_token, true);
  781. if (cur_token->type != CFTOKEN_NAME) {
  782. cf_adderror_expecting(pp, cur_token, "identifier");
  783. go_to_newline(&cur_token);
  784. goto exit;
  785. }
  786. def = cf_preprocess_get_def(pp, &cur_token->str);
  787. is_true = (def == NULL) == ifnot;
  788. if (!cf_preprocess_subblock(pp, !is_true, &cur_token))
  789. goto exit;
  790. if (strref_cmp(&cur_token->str, "else") == 0) {
  791. if (!cf_preprocess_subblock(pp, is_true, &cur_token))
  792. goto exit;
  793. /*} else if (strref_cmp(&cur_token->str, "elif") == 0) {*/
  794. }
  795. cur_token++;
  796. exit:
  797. *p_cur_token = cur_token;
  798. }
  799. static bool cf_preprocessor(struct cf_preprocessor *pp, bool if_block,
  800. struct cf_token **p_cur_token)
  801. {
  802. struct cf_token *cur_token = *p_cur_token;
  803. if (strref_cmp(&cur_token->str, "include") == 0) {
  804. cf_preprocess_include(pp, p_cur_token);
  805. } else if (strref_cmp(&cur_token->str, "define") == 0) {
  806. cf_preprocess_define(pp, p_cur_token);
  807. } else if (strref_cmp(&cur_token->str, "undef") == 0) {
  808. cf_preprocess_undef(pp, p_cur_token);
  809. } else if (strref_cmp(&cur_token->str, "ifdef") == 0) {
  810. cf_preprocess_ifdef(pp, false, p_cur_token);
  811. } else if (strref_cmp(&cur_token->str, "ifndef") == 0) {
  812. cf_preprocess_ifdef(pp, true, p_cur_token);
  813. /*} else if (strref_cmp(&cur_token->str, "if") == 0) {
  814. TODO;*/
  815. } else if (strref_cmp(&cur_token->str, "else") == 0 ||
  816. /*strref_cmp(&cur_token->str, "elif") == 0 ||*/
  817. strref_cmp(&cur_token->str, "endif") == 0) {
  818. if (!if_block) {
  819. struct dstr name;
  820. dstr_init_copy_strref(&name, &cur_token->str);
  821. cf_adderror(pp, cur_token,
  822. "#$1 outside of "
  823. "#if/#ifdef/#ifndef block",
  824. name.array, NULL, NULL);
  825. dstr_free(&name);
  826. (*p_cur_token)++;
  827. return true;
  828. }
  829. return false;
  830. } else if (cur_token->type != CFTOKEN_NEWLINE &&
  831. cur_token->type != CFTOKEN_NONE) {
  832. /*
  833. * TODO: language-specific preprocessor stuff should be sent to
  834. * handler of some sort
  835. */
  836. (*p_cur_token)++;
  837. }
  838. return true;
  839. }
  840. static void cf_preprocess_addtoken(struct cf_preprocessor *pp,
  841. struct darray *dst, /* struct cf_token */
  842. struct cf_token **p_cur_token,
  843. const struct cf_token *base,
  844. const struct macro_params *params);
  845. /*
  846. * collects tokens for a macro parameter
  847. *
  848. * note that it is important to make sure that any usage of function calls
  849. * within a macro parameter is preserved, example MACRO(func(1, 2), 3), do not
  850. * let it stop on the comma at "1,"
  851. */
  852. static void cf_preprocess_save_macro_param(
  853. struct cf_preprocessor *pp, struct cf_token **p_cur_token,
  854. struct macro_param *param, const struct cf_token *base,
  855. const struct macro_params *cur_params)
  856. {
  857. struct cf_token *cur_token = *p_cur_token;
  858. int brace_count = 0;
  859. append_space(pp, &param->tokens.da, base);
  860. while (cur_token->type != CFTOKEN_NONE) {
  861. if (*cur_token->str.array == '(') {
  862. brace_count++;
  863. } else if (*cur_token->str.array == ')') {
  864. if (brace_count)
  865. brace_count--;
  866. else
  867. break;
  868. } else if (*cur_token->str.array == ',') {
  869. if (!brace_count)
  870. break;
  871. }
  872. cf_preprocess_addtoken(pp, &param->tokens.da, &cur_token, base,
  873. cur_params);
  874. }
  875. if (cur_token->type == CFTOKEN_NONE)
  876. cf_adderror_unexpected_eof(pp, cur_token);
  877. append_space(pp, &param->tokens.da, base);
  878. append_end_token(&param->tokens.da);
  879. *p_cur_token = cur_token;
  880. }
  881. static inline bool param_is_whitespace(const struct macro_param *param)
  882. {
  883. struct cf_token *array = param->tokens.array;
  884. size_t i;
  885. for (i = 0; i < param->tokens.num; i++)
  886. if (array[i].type != CFTOKEN_NONE &&
  887. array[i].type != CFTOKEN_SPACETAB &&
  888. array[i].type != CFTOKEN_NEWLINE)
  889. return false;
  890. return true;
  891. }
  892. /* collects parameter tokens of a used macro and stores them for the unwrap */
  893. static void cf_preprocess_save_macro_params(
  894. struct cf_preprocessor *pp, struct cf_token **p_cur_token,
  895. const struct cf_def *def, const struct cf_token *base,
  896. const struct macro_params *cur_params, struct macro_params *dst)
  897. {
  898. struct cf_token *cur_token = *p_cur_token;
  899. size_t count = 0;
  900. next_token(&cur_token, false);
  901. if (cur_token->type != CFTOKEN_OTHER || *cur_token->str.array != '(') {
  902. cf_adderror_expecting(pp, cur_token, "'('");
  903. goto exit;
  904. }
  905. do {
  906. struct macro_param param;
  907. macro_param_init(&param);
  908. cur_token++;
  909. count++;
  910. cf_preprocess_save_macro_param(pp, &cur_token, &param, base,
  911. cur_params);
  912. if (cur_token->type != CFTOKEN_OTHER ||
  913. (*cur_token->str.array != ',' &&
  914. *cur_token->str.array != ')')) {
  915. macro_param_free(&param);
  916. cf_adderror_expecting(pp, cur_token, "',' or ')'");
  917. goto exit;
  918. }
  919. if (param_is_whitespace(&param)) {
  920. /* if 0-param macro, ignore first entry */
  921. if (count == 1 && !def->params.num &&
  922. *cur_token->str.array == ')') {
  923. macro_param_free(&param);
  924. break;
  925. }
  926. }
  927. if (count <= def->params.num) {
  928. cf_token_copy(&param.name,
  929. cf_def_getparam(def, count - 1));
  930. da_push_back(dst->params, &param);
  931. } else {
  932. macro_param_free(&param);
  933. }
  934. } while (*cur_token->str.array != ')');
  935. if (count != def->params.num)
  936. cf_adderror(pp, cur_token,
  937. "Mismatching number of macro parameters", NULL,
  938. NULL, NULL);
  939. exit:
  940. *p_cur_token = cur_token;
  941. }
  942. static inline void cf_preprocess_unwrap_param(
  943. struct cf_preprocessor *pp, struct darray *dst, /* struct cf_token */
  944. struct cf_token **p_cur_token, const struct cf_token *base,
  945. const struct macro_param *param)
  946. {
  947. struct cf_token *cur_token = *p_cur_token;
  948. struct cf_token *cur_param_token = param->tokens.array;
  949. while (cur_param_token->type != CFTOKEN_NONE)
  950. cf_preprocess_addtoken(pp, dst, &cur_param_token, base, NULL);
  951. cur_token++;
  952. *p_cur_token = cur_token;
  953. }
  954. static inline void cf_preprocess_unwrap_define(
  955. struct cf_preprocessor *pp, struct darray *dst, /* struct cf_token */
  956. struct cf_token **p_cur_token, const struct cf_token *base,
  957. const struct cf_def *def, const struct macro_params *cur_params)
  958. {
  959. struct cf_token *cur_token = *p_cur_token;
  960. struct macro_params new_params;
  961. struct cf_token *cur_def_token = def->tokens.array;
  962. macro_params_init(&new_params);
  963. if (def->macro)
  964. cf_preprocess_save_macro_params(pp, &cur_token, def, base,
  965. cur_params, &new_params);
  966. while (cur_def_token->type != CFTOKEN_NONE)
  967. cf_preprocess_addtoken(pp, dst, &cur_def_token, base,
  968. &new_params);
  969. macro_params_free(&new_params);
  970. cur_token++;
  971. *p_cur_token = cur_token;
  972. }
  973. static void cf_preprocess_addtoken(struct cf_preprocessor *pp,
  974. struct darray *dst, /* struct cf_token */
  975. struct cf_token **p_cur_token,
  976. const struct cf_token *base,
  977. const struct macro_params *params)
  978. {
  979. struct cf_token *cur_token = *p_cur_token;
  980. if (pp->ignore_state)
  981. goto ignore;
  982. if (!base)
  983. base = cur_token;
  984. if (cur_token->type == CFTOKEN_NAME) {
  985. struct cf_def *def;
  986. struct macro_param *param;
  987. param = get_macro_param(params, &cur_token->str);
  988. if (param) {
  989. cf_preprocess_unwrap_param(pp, dst, &cur_token, base,
  990. param);
  991. goto exit;
  992. }
  993. def = cf_preprocess_get_def(pp, &cur_token->str);
  994. if (def) {
  995. cf_preprocess_unwrap_define(pp, dst, &cur_token, base,
  996. def, params);
  997. goto exit;
  998. }
  999. }
  1000. darray_push_back(sizeof(struct cf_token), dst, cur_token);
  1001. ignore:
  1002. cur_token++;
  1003. exit:
  1004. *p_cur_token = cur_token;
  1005. }
  1006. static void cf_preprocess_tokens(struct cf_preprocessor *pp, bool if_block,
  1007. struct cf_token **p_cur_token)
  1008. {
  1009. bool newline = true;
  1010. bool preprocessor_line = if_block;
  1011. struct cf_token *cur_token = *p_cur_token;
  1012. while (cur_token->type != CFTOKEN_NONE) {
  1013. if (cur_token->type != CFTOKEN_SPACETAB &&
  1014. cur_token->type != CFTOKEN_NEWLINE) {
  1015. if (preprocessor_line) {
  1016. cf_adderror_expected_newline(pp, cur_token);
  1017. if (!go_to_newline(&cur_token))
  1018. break;
  1019. }
  1020. if (newline && *cur_token->str.array == '#') {
  1021. next_token(&cur_token, true);
  1022. preprocessor_line = true;
  1023. if (!cf_preprocessor(pp, if_block, &cur_token))
  1024. break;
  1025. continue;
  1026. }
  1027. newline = false;
  1028. }
  1029. if (cur_token->type == CFTOKEN_NEWLINE) {
  1030. newline = true;
  1031. preprocessor_line = false;
  1032. } else if (cur_token->type == CFTOKEN_NONE) {
  1033. break;
  1034. }
  1035. cf_preprocess_addtoken(pp, &pp->tokens.da, &cur_token, NULL,
  1036. NULL);
  1037. }
  1038. *p_cur_token = cur_token;
  1039. }
  1040. void cf_preprocessor_init(struct cf_preprocessor *pp)
  1041. {
  1042. da_init(pp->defines);
  1043. da_init(pp->sys_include_dirs);
  1044. da_init(pp->dependencies);
  1045. da_init(pp->tokens);
  1046. pp->lex = NULL;
  1047. pp->ed = NULL;
  1048. pp->ignore_state = false;
  1049. }
  1050. void cf_preprocessor_free(struct cf_preprocessor *pp)
  1051. {
  1052. struct cf_lexer *dependencies = pp->dependencies.array;
  1053. char **sys_include_dirs = pp->sys_include_dirs.array;
  1054. struct cf_def *defs = pp->defines.array;
  1055. size_t i;
  1056. for (i = 0; i < pp->defines.num; i++)
  1057. cf_def_free(defs + i);
  1058. for (i = 0; i < pp->sys_include_dirs.num; i++)
  1059. bfree(sys_include_dirs[i]);
  1060. for (i = 0; i < pp->dependencies.num; i++)
  1061. cf_lexer_free(dependencies + i);
  1062. da_free(pp->defines);
  1063. da_free(pp->sys_include_dirs);
  1064. da_free(pp->dependencies);
  1065. da_free(pp->tokens);
  1066. pp->lex = NULL;
  1067. pp->ed = NULL;
  1068. pp->ignore_state = false;
  1069. }
  1070. bool cf_preprocess(struct cf_preprocessor *pp, struct cf_lexer *lex,
  1071. struct error_data *ed)
  1072. {
  1073. struct cf_token *token = cf_lexer_get_tokens(lex);
  1074. if (!token)
  1075. return false;
  1076. pp->ed = ed;
  1077. pp->lex = lex;
  1078. cf_preprocess_tokens(pp, false, &token);
  1079. da_push_back(pp->tokens, token);
  1080. return !lex->unexpected_eof;
  1081. }
  1082. void cf_preprocessor_add_def(struct cf_preprocessor *pp, struct cf_def *def)
  1083. {
  1084. struct cf_def *existing = cf_preprocess_get_def(pp, &def->name.str);
  1085. if (existing) {
  1086. struct dstr name;
  1087. dstr_init_copy_strref(&name, &def->name.str);
  1088. cf_addwarning(pp, &def->name, "Token $1 already defined",
  1089. name.array, NULL, NULL);
  1090. cf_addwarning(pp, &existing->name,
  1091. "Previous definition of $1 is here", name.array,
  1092. NULL, NULL);
  1093. cf_def_free(existing);
  1094. memcpy(existing, def, sizeof(struct cf_def));
  1095. } else {
  1096. da_push_back(pp->defines, def);
  1097. }
  1098. }
  1099. void cf_preprocessor_remove_def(struct cf_preprocessor *pp,
  1100. const char *def_name)
  1101. {
  1102. struct strref ref;
  1103. ref.array = def_name;
  1104. ref.len = strlen(def_name);
  1105. cf_preprocess_remove_def_strref(pp, &ref);
  1106. }