cf-lexer.c 31 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330
  1. /*
  2. * Copyright (c) 2013 Hugh Bailey <[email protected]>
  3. *
  4. * Permission to use, copy, modify, and distribute this software for any
  5. * purpose with or without fee is hereby granted, provided that the above
  6. * copyright notice and this permission notice appear in all copies.
  7. *
  8. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  10. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  11. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  12. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  13. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  14. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  15. */
  16. #include <ctype.h>
  17. #include <stdio.h>
  18. #include "platform.h"
  19. #include "cf-lexer.h"
  20. static inline void cf_convert_from_escape_literal(char **p_dst,
  21. const char **p_src)
  22. {
  23. char *dst = *p_dst;
  24. const char *src = *p_src;
  25. switch (*(src++)) {
  26. case '\'': *(dst++) = '\''; break;
  27. case '\"': *(dst++) = '\"'; break;
  28. case '\?': *(dst++) = '\?'; break;
  29. case '\\': *(dst++) = '\\'; break;
  30. case '0': *(dst++) = '\0'; break;
  31. case 'a': *(dst++) = '\a'; break;
  32. case 'b': *(dst++) = '\b'; break;
  33. case 'f': *(dst++) = '\f'; break;
  34. case 'n': *(dst++) = '\n'; break;
  35. case 'r': *(dst++) = '\r'; break;
  36. case 't': *(dst++) = '\t'; break;
  37. case 'v': *(dst++) = '\v'; break;
  38. /* hex */
  39. case 'X':
  40. case 'x':
  41. *(dst++) = (char)strtoul(src, NULL, 16);
  42. src += 2;
  43. break;
  44. /* oct */
  45. default:
  46. if (isdigit(*src)) {
  47. *(dst++) = (char)strtoul(src, NULL, 8);
  48. src += 3;
  49. }
  50. /* case 'u':
  51. case 'U': */
  52. }
  53. *p_dst = dst;
  54. *p_src = src;
  55. }
  56. char *cf_literal_to_str(const char *literal, size_t count)
  57. {
  58. const char *temp_src;
  59. char *str, *temp_dst;
  60. if (!count)
  61. count = strlen(literal);
  62. if (count < 2)
  63. return NULL;
  64. if (literal[0] != literal[count-1])
  65. return NULL;
  66. if (literal[0] != '\"' && literal[0] != '\'')
  67. return NULL;
  68. /* strip leading and trailing quote characters */
  69. str = bzalloc(--count);
  70. temp_src = literal + 1;
  71. temp_dst = str;
  72. while (*temp_src && --count > 0) {
  73. if (*temp_src == '\\') {
  74. temp_src++;
  75. cf_convert_from_escape_literal(&temp_dst, &temp_src);
  76. } else {
  77. *(temp_dst++) = *(temp_src++);
  78. }
  79. }
  80. *temp_dst = 0;
  81. return str;
  82. }
  83. static bool cf_is_token_break(struct base_token *start_token,
  84. const struct base_token *token)
  85. {
  86. switch (start_token->type) {
  87. case BASETOKEN_ALPHA:
  88. if (token->type == BASETOKEN_OTHER ||
  89. token->type == BASETOKEN_WHITESPACE)
  90. return true;
  91. break;
  92. case BASETOKEN_DIGIT:
  93. if (token->type == BASETOKEN_WHITESPACE
  94. || (token->type == BASETOKEN_OTHER
  95. && *token->text.array != '.'))
  96. return true;
  97. break;
  98. case BASETOKEN_WHITESPACE:
  99. /* lump all non-newline whitespace together when possible */
  100. if (is_space_or_tab(*start_token->text.array) &&
  101. is_space_or_tab(*token->text.array))
  102. break;
  103. return true;
  104. case BASETOKEN_OTHER:
  105. if (*start_token->text.array == '.' &&
  106. token->type == BASETOKEN_DIGIT) {
  107. start_token->type = BASETOKEN_DIGIT;
  108. break;
  109. }
  110. /* Falls through. */
  111. case BASETOKEN_NONE:
  112. return true;
  113. }
  114. return false;
  115. }
  116. static inline bool cf_is_splice(const char *array)
  117. {
  118. return (*array == '\\' && is_newline(array[1]));
  119. }
  120. static inline void cf_pass_any_splices(const char **parray)
  121. {
  122. while (cf_is_splice(*parray))
  123. *parray += 1 + newline_size((*parray)+1);
  124. }
  125. static inline bool cf_is_comment(const char *array)
  126. {
  127. const char *offset = array;
  128. if (*offset++ == '/') {
  129. cf_pass_any_splices(&offset);
  130. return (*offset == '*' || *offset == '/');
  131. }
  132. return false;
  133. }
  134. static bool cf_lexer_process_comment(struct cf_lexer *lex,
  135. struct cf_token *out_token)
  136. {
  137. const char *offset;
  138. if (!cf_is_comment(out_token->unmerged_str.array))
  139. return false;
  140. offset = lex->base_lexer.offset;
  141. cf_pass_any_splices(&offset);
  142. strcpy(lex->write_offset++, " ");
  143. out_token->str.len = 1;
  144. if (*offset == '/') {
  145. while (*++offset && !is_newline(*offset))
  146. cf_pass_any_splices(&offset);
  147. } else if (*offset == '*') {
  148. bool was_star = false;
  149. lex->unexpected_eof = true;
  150. while (*++offset) {
  151. cf_pass_any_splices(&offset);
  152. if (was_star && *offset == '/') {
  153. offset++;
  154. lex->unexpected_eof = false;
  155. break;
  156. } else {
  157. was_star = (*offset == '*');
  158. }
  159. }
  160. }
  161. out_token->unmerged_str.len +=
  162. (size_t)(offset - out_token->unmerged_str.array);
  163. out_token->type = CFTOKEN_SPACETAB;
  164. lex->base_lexer.offset = offset;
  165. return true;
  166. }
  167. static inline void cf_lexer_write_strref(struct cf_lexer *lex,
  168. const struct strref *ref)
  169. {
  170. strncpy(lex->write_offset, ref->array, ref->len);
  171. lex->write_offset[ref->len] = 0;
  172. lex->write_offset += ref->len;
  173. }
  174. static bool cf_lexer_is_include(struct cf_lexer *lex)
  175. {
  176. bool found_include_import = false;
  177. bool found_preprocessor = false;
  178. size_t i;
  179. for (i = lex->tokens.num; i > 0; i--) {
  180. struct cf_token *token = lex->tokens.array+(i-1);
  181. if (is_space_or_tab(*token->str.array))
  182. continue;
  183. if (!found_include_import) {
  184. if (strref_cmp(&token->str, "include") != 0 &&
  185. strref_cmp(&token->str, "import") != 0)
  186. break;
  187. found_include_import = true;
  188. } else if (!found_preprocessor) {
  189. if (*token->str.array != '#')
  190. break;
  191. found_preprocessor = true;
  192. } else {
  193. return is_newline(*token->str.array);
  194. }
  195. }
  196. /* if starting line */
  197. return found_preprocessor && found_include_import;
  198. }
  199. static void cf_lexer_getstrtoken(struct cf_lexer *lex,
  200. struct cf_token *out_token, char delimiter,
  201. bool allow_escaped_delimiters)
  202. {
  203. const char *offset = lex->base_lexer.offset;
  204. bool escaped = false;
  205. out_token->unmerged_str.len++;
  206. out_token->str.len++;
  207. cf_lexer_write_strref(lex, &out_token->unmerged_str);
  208. while (*offset) {
  209. cf_pass_any_splices(&offset);
  210. if (*offset == delimiter) {
  211. if (!escaped) {
  212. *lex->write_offset++ = *offset;
  213. out_token->str.len++;
  214. offset++;
  215. break;
  216. }
  217. } else if (is_newline(*offset)) {
  218. break;
  219. }
  220. *lex->write_offset++ = *offset;
  221. out_token->str.len++;
  222. escaped = (allow_escaped_delimiters && *offset == '\\');
  223. offset++;
  224. }
  225. *lex->write_offset = 0;
  226. out_token->unmerged_str.len +=
  227. (size_t)(offset - out_token->unmerged_str.array);
  228. out_token->type = CFTOKEN_STRING;
  229. lex->base_lexer.offset = offset;
  230. }
  231. static bool cf_lexer_process_string(struct cf_lexer *lex,
  232. struct cf_token *out_token)
  233. {
  234. char ch = *out_token->unmerged_str.array;
  235. if (ch == '<' && cf_lexer_is_include(lex)) {
  236. cf_lexer_getstrtoken(lex, out_token, '>', false);
  237. return true;
  238. } else if (ch == '"' || ch == '\'') {
  239. cf_lexer_getstrtoken(lex, out_token, ch,
  240. !cf_lexer_is_include(lex));
  241. return true;
  242. }
  243. return false;
  244. }
  245. static inline enum cf_token_type cf_get_token_type(const struct cf_token *token,
  246. const struct base_token *start_token)
  247. {
  248. switch (start_token->type) {
  249. case BASETOKEN_ALPHA:
  250. return CFTOKEN_NAME;
  251. case BASETOKEN_DIGIT:
  252. return CFTOKEN_NUM;
  253. case BASETOKEN_WHITESPACE:
  254. if (is_newline(*token->str.array))
  255. return CFTOKEN_NEWLINE;
  256. else
  257. return CFTOKEN_SPACETAB;
  258. case BASETOKEN_NONE:
  259. case BASETOKEN_OTHER:
  260. break;
  261. }
  262. return CFTOKEN_OTHER;
  263. }
  264. static bool cf_lexer_nexttoken(struct cf_lexer *lex, struct cf_token *out_token)
  265. {
  266. struct base_token token, start_token;
  267. bool wrote_data = false;
  268. base_token_clear(&token);
  269. base_token_clear(&start_token);
  270. cf_token_clear(out_token);
  271. while (lexer_getbasetoken(&lex->base_lexer, &token, PARSE_WHITESPACE)) {
  272. /* reclassify underscore as alpha for alnum tokens */
  273. if (*token.text.array == '_')
  274. token.type = BASETOKEN_ALPHA;
  275. /* ignore escaped newlines to merge spliced lines */
  276. if (cf_is_splice(token.text.array)) {
  277. lex->base_lexer.offset +=
  278. newline_size(token.text.array+1);
  279. continue;
  280. }
  281. if (!wrote_data) {
  282. out_token->unmerged_str.array = token.text.array;
  283. out_token->str.array = lex->write_offset;
  284. /* if comment then output a space */
  285. if (cf_lexer_process_comment(lex, out_token))
  286. return true;
  287. /* process string tokens if any */
  288. if (cf_lexer_process_string(lex, out_token))
  289. return true;
  290. base_token_copy(&start_token, &token);
  291. wrote_data = true;
  292. } else if (cf_is_token_break(&start_token, &token)) {
  293. lex->base_lexer.offset -= token.text.len;
  294. break;
  295. }
  296. /* write token to CF lexer to account for splicing/comments */
  297. cf_lexer_write_strref(lex, &token.text);
  298. out_token->str.len += token.text.len;
  299. }
  300. if (wrote_data) {
  301. out_token->unmerged_str.len = (size_t)(lex->base_lexer.offset -
  302. out_token->unmerged_str.array);
  303. out_token->type = cf_get_token_type(out_token, &start_token);
  304. }
  305. return wrote_data;
  306. }
  307. void cf_lexer_init(struct cf_lexer *lex)
  308. {
  309. lexer_init(&lex->base_lexer);
  310. da_init(lex->tokens);
  311. lex->file = NULL;
  312. lex->reformatted = NULL;
  313. lex->write_offset = NULL;
  314. lex->unexpected_eof = false;
  315. }
  316. void cf_lexer_free(struct cf_lexer *lex)
  317. {
  318. bfree(lex->file);
  319. bfree(lex->reformatted);
  320. lexer_free(&lex->base_lexer);
  321. da_free(lex->tokens);
  322. lex->file = NULL;
  323. lex->reformatted = NULL;
  324. lex->write_offset = NULL;
  325. lex->unexpected_eof = false;
  326. }
  327. bool cf_lexer_lex(struct cf_lexer *lex, const char *str, const char *file)
  328. {
  329. struct cf_token token;
  330. struct cf_token *last_token = NULL;
  331. cf_lexer_free(lex);
  332. if (!str || !*str)
  333. return false;
  334. if (file)
  335. lex->file = bstrdup(file);
  336. lexer_start(&lex->base_lexer, str);
  337. cf_token_clear(&token);
  338. lex->reformatted = bmalloc(strlen(str) + 1);
  339. lex->reformatted[0] = 0;
  340. lex->write_offset = lex->reformatted;
  341. while (cf_lexer_nexttoken(lex, &token)) {
  342. if (last_token &&
  343. is_space_or_tab(*last_token->str.array) &&
  344. is_space_or_tab(*token.str.array)) {
  345. cf_token_add(last_token, &token);
  346. continue;
  347. }
  348. token.lex = lex;
  349. last_token = da_push_back_new(lex->tokens);
  350. memcpy(last_token, &token, sizeof(struct cf_token));
  351. }
  352. cf_token_clear(&token);
  353. token.str.array = lex->write_offset;
  354. token.unmerged_str.array = lex->base_lexer.offset;
  355. token.lex = lex;
  356. da_push_back(lex->tokens, &token);
  357. return !lex->unexpected_eof;
  358. }
  359. /* ------------------------------------------------------------------------- */
  360. struct macro_param {
  361. struct cf_token name;
  362. DARRAY(struct cf_token) tokens;
  363. };
  364. static inline void macro_param_init(struct macro_param *param)
  365. {
  366. cf_token_clear(&param->name);
  367. da_init(param->tokens);
  368. }
  369. static inline void macro_param_free(struct macro_param *param)
  370. {
  371. cf_token_clear(&param->name);
  372. da_free(param->tokens);
  373. }
  374. /* ------------------------------------------------------------------------- */
  375. struct macro_params {
  376. DARRAY(struct macro_param) params;
  377. };
  378. static inline void macro_params_init(struct macro_params *params)
  379. {
  380. da_init(params->params);
  381. }
  382. static inline void macro_params_free(struct macro_params *params)
  383. {
  384. size_t i;
  385. for (i = 0; i < params->params.num; i++)
  386. macro_param_free(params->params.array+i);
  387. da_free(params->params);
  388. }
  389. static inline struct macro_param *get_macro_param(
  390. const struct macro_params *params,
  391. const struct strref *name)
  392. {
  393. size_t i;
  394. if (!params)
  395. return NULL;
  396. for (i = 0; i < params->params.num; i++) {
  397. struct macro_param *param = params->params.array+i;
  398. if (strref_cmp_strref(&param->name.str, name) == 0)
  399. return param;
  400. }
  401. return NULL;
  402. }
  403. /* ------------------------------------------------------------------------- */
  404. static bool cf_preprocessor(struct cf_preprocessor *pp,
  405. bool if_block, struct cf_token **p_cur_token);
  406. static void cf_preprocess_tokens(struct cf_preprocessor *pp,
  407. bool if_block, struct cf_token **p_cur_token);
  408. static inline bool go_to_newline(struct cf_token **p_cur_token)
  409. {
  410. struct cf_token *cur_token = *p_cur_token;
  411. while (cur_token->type != CFTOKEN_NEWLINE &&
  412. cur_token->type != CFTOKEN_NONE)
  413. cur_token++;
  414. *p_cur_token = cur_token;
  415. return cur_token->type != CFTOKEN_NONE;
  416. }
  417. static inline bool next_token(struct cf_token **p_cur_token, bool preprocessor)
  418. {
  419. struct cf_token *cur_token = *p_cur_token;
  420. if (cur_token->type != CFTOKEN_NONE)
  421. cur_token++;
  422. /* if preprocessor, stop at newline */
  423. while (cur_token->type == CFTOKEN_SPACETAB &&
  424. (preprocessor || cur_token->type == CFTOKEN_NEWLINE))
  425. cur_token++;
  426. *p_cur_token = cur_token;
  427. return cur_token->type != CFTOKEN_NONE;
  428. }
  429. static inline void cf_gettokenoffset(struct cf_preprocessor *pp,
  430. const struct cf_token *token, uint32_t *row, uint32_t *col)
  431. {
  432. lexer_getstroffset(&pp->lex->base_lexer,
  433. token->unmerged_str.array, row, col);
  434. }
  435. static void cf_addew(struct cf_preprocessor *pp, const struct cf_token *token,
  436. const char *message, int error_level,
  437. const char *val1, const char *val2, const char *val3)
  438. {
  439. uint32_t row, col;
  440. cf_gettokenoffset(pp, token, &row, &col);
  441. if (!val1 && !val2 && !val3) {
  442. error_data_add(pp->ed, token->lex->file, row, col,
  443. message, error_level);
  444. } else {
  445. struct dstr formatted;
  446. dstr_init(&formatted);
  447. dstr_safe_printf(&formatted, message, val1, val2, val3, NULL);
  448. error_data_add(pp->ed, token->lex->file, row, col,
  449. formatted.array, error_level);
  450. dstr_free(&formatted);
  451. }
  452. }
  453. static inline void cf_adderror(struct cf_preprocessor *pp,
  454. const struct cf_token *token, const char *error,
  455. const char *val1, const char *val2, const char *val3)
  456. {
  457. cf_addew(pp, token, error, LEX_ERROR, val1, val2, val3);
  458. }
  459. static inline void cf_addwarning(struct cf_preprocessor *pp,
  460. const struct cf_token *token, const char *warning,
  461. const char *val1, const char *val2, const char *val3)
  462. {
  463. cf_addew(pp, token, warning, LEX_WARNING, val1, val2, val3);
  464. }
  465. static inline void cf_adderror_expecting(struct cf_preprocessor *pp,
  466. const struct cf_token *token, const char *expecting)
  467. {
  468. cf_adderror(pp, token, "Expected $1", expecting,
  469. NULL, NULL);
  470. }
  471. static inline void cf_adderror_expected_newline(struct cf_preprocessor *pp,
  472. const struct cf_token *token)
  473. {
  474. cf_adderror(pp, token,
  475. "Unexpected token after preprocessor, expected "
  476. "newline",
  477. NULL, NULL, NULL);
  478. }
  479. static inline void cf_adderror_unexpected_endif_eof(struct cf_preprocessor *pp,
  480. const struct cf_token *token)
  481. {
  482. cf_adderror(pp, token, "Unexpected end of file before #endif",
  483. NULL, NULL, NULL);
  484. }
  485. static inline void cf_adderror_unexpected_eof(struct cf_preprocessor *pp,
  486. const struct cf_token *token)
  487. {
  488. cf_adderror(pp, token, "Unexpected end of file",
  489. NULL, NULL, NULL);
  490. }
  491. static inline void insert_path(struct cf_preprocessor *pp,
  492. struct dstr *str_file)
  493. {
  494. const char *file;
  495. const char *slash;
  496. if (pp && pp->lex && pp->lex->file) {
  497. file = pp->lex->file;
  498. slash = strrchr(file, '/');
  499. if (slash) {
  500. struct dstr path = {0};
  501. dstr_ncopy(&path, file, slash - file + 1);
  502. dstr_insert_dstr(str_file, 0, &path);
  503. dstr_free(&path);
  504. }
  505. }
  506. }
  507. static void cf_include_file(struct cf_preprocessor *pp,
  508. const struct cf_token *file_token)
  509. {
  510. struct cf_lexer new_lex;
  511. struct dstr str_file;
  512. FILE *file;
  513. char *file_data;
  514. struct cf_token *tokens;
  515. size_t i;
  516. dstr_init(&str_file);
  517. dstr_copy_strref(&str_file, &file_token->str);
  518. dstr_mid(&str_file, &str_file, 1, str_file.len-2);
  519. insert_path(pp, &str_file);
  520. /* if dependency already exists, run preprocessor on it */
  521. for (i = 0; i < pp->dependencies.num; i++) {
  522. struct cf_lexer *dep = pp->dependencies.array+i;
  523. if (strcmp(dep->file, str_file.array) == 0) {
  524. tokens = cf_lexer_get_tokens(dep);
  525. cf_preprocess_tokens(pp, false, &tokens);
  526. goto exit;
  527. }
  528. }
  529. file = os_fopen(str_file.array, "rb");
  530. if (!file) {
  531. cf_adderror(pp, file_token, "Could not open file '$1'",
  532. file_token->str.array, NULL, NULL);
  533. goto exit;
  534. }
  535. os_fread_utf8(file, &file_data);
  536. fclose(file);
  537. cf_lexer_init(&new_lex);
  538. cf_lexer_lex(&new_lex, file_data, str_file.array);
  539. tokens = cf_lexer_get_tokens(&new_lex);
  540. cf_preprocess_tokens(pp, false, &tokens);
  541. bfree(file_data);
  542. da_push_back(pp->dependencies, &new_lex);
  543. exit:
  544. dstr_free(&str_file);
  545. }
  546. static inline bool is_sys_include(struct strref *ref)
  547. {
  548. return ref->len >= 2 &&
  549. ref->array[0] == '<' && ref->array[ref->len-1] == '>';
  550. }
  551. static inline bool is_loc_include(struct strref *ref)
  552. {
  553. return ref->len >= 2 &&
  554. ref->array[0] == '"' && ref->array[ref->len-1] == '"';
  555. }
  556. static void cf_preprocess_include(struct cf_preprocessor *pp,
  557. struct cf_token **p_cur_token)
  558. {
  559. struct cf_token *cur_token = *p_cur_token;
  560. if (pp->ignore_state) {
  561. go_to_newline(p_cur_token);
  562. return;
  563. }
  564. next_token(&cur_token, true);
  565. if (cur_token->type != CFTOKEN_STRING) {
  566. cf_adderror_expecting(pp, cur_token, "string");
  567. go_to_newline(&cur_token);
  568. goto exit;
  569. }
  570. if (is_sys_include(&cur_token->str)) {
  571. /* TODO */
  572. } else if (is_loc_include(&cur_token->str)) {
  573. if (!pp->ignore_state)
  574. cf_include_file(pp, cur_token);
  575. } else {
  576. cf_adderror(pp, cur_token, "Invalid or incomplete string",
  577. NULL, NULL, NULL);
  578. go_to_newline(&cur_token);
  579. goto exit;
  580. }
  581. cur_token++;
  582. exit:
  583. *p_cur_token = cur_token;
  584. }
  585. static bool cf_preprocess_macro_params(struct cf_preprocessor *pp,
  586. struct cf_def *def, struct cf_token **p_cur_token)
  587. {
  588. struct cf_token *cur_token = *p_cur_token;
  589. bool success = false;
  590. def->macro = true;
  591. do {
  592. next_token(&cur_token, true);
  593. if (cur_token->type != CFTOKEN_NAME) {
  594. cf_adderror_expecting(pp, cur_token, "identifier");
  595. go_to_newline(&cur_token);
  596. goto exit;
  597. }
  598. cf_def_addparam(def, cur_token);
  599. next_token(&cur_token, true);
  600. if (cur_token->type != CFTOKEN_OTHER
  601. || (*cur_token->str.array != ','
  602. && *cur_token->str.array != ')')) {
  603. cf_adderror_expecting(pp, cur_token, "',' or ')'");
  604. go_to_newline(&cur_token);
  605. goto exit;
  606. }
  607. } while (*cur_token->str.array != ')');
  608. /* ended properly, now go to first define token (or newline) */
  609. next_token(&cur_token, true);
  610. success = true;
  611. exit:
  612. *p_cur_token = cur_token;
  613. return success;
  614. }
  615. #define INVALID_INDEX ((size_t)-1)
  616. static inline size_t cf_preprocess_get_def_idx(struct cf_preprocessor *pp,
  617. const struct strref *def_name)
  618. {
  619. struct cf_def *array = pp->defines.array;
  620. size_t i;
  621. for (i = 0; i < pp->defines.num; i++) {
  622. struct cf_def *cur_def = array+i;
  623. if (strref_cmp_strref(&cur_def->name.str, def_name) == 0)
  624. return i;
  625. }
  626. return INVALID_INDEX;
  627. }
  628. static inline struct cf_def *cf_preprocess_get_def(struct cf_preprocessor *pp,
  629. const struct strref *def_name)
  630. {
  631. size_t idx = cf_preprocess_get_def_idx(pp, def_name);
  632. if (idx == INVALID_INDEX)
  633. return NULL;
  634. return pp->defines.array+idx;
  635. }
  636. static char space_filler[2] = " ";
  637. static inline void append_space(struct cf_preprocessor *pp,
  638. struct darray *tokens, const struct cf_token *base)
  639. {
  640. struct cf_token token;
  641. strref_set(&token.str, space_filler, 1);
  642. token.type = CFTOKEN_SPACETAB;
  643. if (base) {
  644. token.lex = base->lex;
  645. strref_copy(&token.unmerged_str, &base->unmerged_str);
  646. } else {
  647. token.lex = pp->lex;
  648. strref_copy(&token.unmerged_str, &token.str);
  649. }
  650. darray_push_back(sizeof(struct cf_token), tokens, &token);
  651. }
  652. static inline void append_end_token(struct darray *tokens)
  653. {
  654. struct cf_token end;
  655. cf_token_clear(&end);
  656. darray_push_back(sizeof(struct cf_token), tokens, &end);
  657. }
  658. static void cf_preprocess_define(struct cf_preprocessor *pp,
  659. struct cf_token **p_cur_token)
  660. {
  661. struct cf_token *cur_token = *p_cur_token;
  662. struct cf_def def;
  663. if (pp->ignore_state) {
  664. go_to_newline(p_cur_token);
  665. return;
  666. }
  667. cf_def_init(&def);
  668. next_token(&cur_token, true);
  669. if (cur_token->type != CFTOKEN_NAME) {
  670. cf_adderror_expecting(pp, cur_token, "identifier");
  671. go_to_newline(&cur_token);
  672. goto exit;
  673. }
  674. append_space(pp, &def.tokens.da, NULL);
  675. cf_token_copy(&def.name, cur_token);
  676. if (!next_token(&cur_token, true))
  677. goto complete;
  678. /* process macro */
  679. if (*cur_token->str.array == '(') {
  680. if (!cf_preprocess_macro_params(pp, &def, &cur_token))
  681. goto error;
  682. }
  683. while (cur_token->type != CFTOKEN_NEWLINE &&
  684. cur_token->type != CFTOKEN_NONE)
  685. cf_def_addtoken(&def, cur_token++);
  686. complete:
  687. append_end_token(&def.tokens.da);
  688. append_space(pp, &def.tokens.da, NULL);
  689. da_push_back(pp->defines, &def);
  690. goto exit;
  691. error:
  692. cf_def_free(&def);
  693. exit:
  694. *p_cur_token = cur_token;
  695. }
  696. static inline void cf_preprocess_remove_def_strref(struct cf_preprocessor *pp,
  697. const struct strref *ref)
  698. {
  699. size_t def_idx = cf_preprocess_get_def_idx(pp, ref);
  700. if (def_idx != INVALID_INDEX) {
  701. struct cf_def *array = pp->defines.array;
  702. cf_def_free(array+def_idx);
  703. da_erase(pp->defines, def_idx);
  704. }
  705. }
  706. static void cf_preprocess_undef(struct cf_preprocessor *pp,
  707. struct cf_token **p_cur_token)
  708. {
  709. struct cf_token *cur_token = *p_cur_token;
  710. if (pp->ignore_state) {
  711. go_to_newline(p_cur_token);
  712. return;
  713. }
  714. next_token(&cur_token, true);
  715. if (cur_token->type != CFTOKEN_NAME) {
  716. cf_adderror_expecting(pp, cur_token, "identifier");
  717. go_to_newline(&cur_token);
  718. goto exit;
  719. }
  720. cf_preprocess_remove_def_strref(pp, &cur_token->str);
  721. cur_token++;
  722. exit:
  723. *p_cur_token = cur_token;
  724. }
  725. /* Processes an #ifdef/#ifndef/#if/#else/#elif sub block recursively */
  726. static inline bool cf_preprocess_subblock(struct cf_preprocessor *pp,
  727. bool ignore, struct cf_token **p_cur_token)
  728. {
  729. bool eof;
  730. if (!next_token(p_cur_token, true))
  731. return false;
  732. if (!pp->ignore_state) {
  733. pp->ignore_state = ignore;
  734. cf_preprocess_tokens(pp, true, p_cur_token);
  735. pp->ignore_state = false;
  736. } else {
  737. cf_preprocess_tokens(pp, true, p_cur_token);
  738. }
  739. eof = ((*p_cur_token)->type == CFTOKEN_NONE);
  740. if (eof)
  741. cf_adderror_unexpected_endif_eof(pp, *p_cur_token);
  742. return !eof;
  743. }
  744. static void cf_preprocess_ifdef(struct cf_preprocessor *pp,
  745. bool ifnot, struct cf_token **p_cur_token)
  746. {
  747. struct cf_token *cur_token = *p_cur_token;
  748. struct cf_def *def;
  749. bool is_true;
  750. next_token(&cur_token, true);
  751. if (cur_token->type != CFTOKEN_NAME) {
  752. cf_adderror_expecting(pp, cur_token, "identifier");
  753. go_to_newline(&cur_token);
  754. goto exit;
  755. }
  756. def = cf_preprocess_get_def(pp, &cur_token->str);
  757. is_true = (def == NULL) == ifnot;
  758. if (!cf_preprocess_subblock(pp, !is_true, &cur_token))
  759. goto exit;
  760. if (strref_cmp(&cur_token->str, "else") == 0) {
  761. if (!cf_preprocess_subblock(pp, is_true, &cur_token))
  762. goto exit;
  763. /*} else if (strref_cmp(&cur_token->str, "elif") == 0) {*/
  764. }
  765. cur_token++;
  766. exit:
  767. *p_cur_token = cur_token;
  768. }
  769. static bool cf_preprocessor(struct cf_preprocessor *pp,
  770. bool if_block, struct cf_token **p_cur_token)
  771. {
  772. struct cf_token *cur_token = *p_cur_token;
  773. if (strref_cmp(&cur_token->str, "include") == 0) {
  774. cf_preprocess_include(pp, p_cur_token);
  775. } else if (strref_cmp(&cur_token->str, "define") == 0) {
  776. cf_preprocess_define(pp, p_cur_token);
  777. } else if (strref_cmp(&cur_token->str, "undef") == 0) {
  778. cf_preprocess_undef(pp, p_cur_token);
  779. } else if (strref_cmp(&cur_token->str, "ifdef") == 0) {
  780. cf_preprocess_ifdef(pp, false, p_cur_token);
  781. } else if (strref_cmp(&cur_token->str, "ifndef") == 0) {
  782. cf_preprocess_ifdef(pp, true, p_cur_token);
  783. /*} else if (strref_cmp(&cur_token->str, "if") == 0) {
  784. TODO;*/
  785. } else if (strref_cmp(&cur_token->str, "else") == 0 ||
  786. /*strref_cmp(&cur_token->str, "elif") == 0 ||*/
  787. strref_cmp(&cur_token->str, "endif") == 0) {
  788. if (!if_block) {
  789. struct dstr name;
  790. dstr_init_copy_strref(&name, &cur_token->str);
  791. cf_adderror(pp, cur_token,"#$1 outside of "
  792. "#if/#ifdef/#ifndef block",
  793. name.array, NULL, NULL);
  794. dstr_free(&name);
  795. (*p_cur_token)++;
  796. return true;
  797. }
  798. return false;
  799. } else if (cur_token->type != CFTOKEN_NEWLINE &&
  800. cur_token->type != CFTOKEN_NONE) {
  801. /*
  802. * TODO: language-specific preprocessor stuff should be sent to
  803. * handler of some sort
  804. */
  805. (*p_cur_token)++;
  806. }
  807. return true;
  808. }
  809. static void cf_preprocess_addtoken(struct cf_preprocessor *pp,
  810. struct darray *dst, /* struct cf_token */
  811. struct cf_token **p_cur_token,
  812. const struct cf_token *base,
  813. const struct macro_params *params);
  814. /*
  815. * collects tokens for a macro parameter
  816. *
  817. * note that it is important to make sure that any usage of function calls
  818. * within a macro parameter is preserved, example MACRO(func(1, 2), 3), do not
  819. * let it stop on the comma at "1,"
  820. */
  821. static void cf_preprocess_save_macro_param(struct cf_preprocessor *pp,
  822. struct cf_token **p_cur_token, struct macro_param *param,
  823. const struct cf_token *base,
  824. const struct macro_params *cur_params)
  825. {
  826. struct cf_token *cur_token = *p_cur_token;
  827. int brace_count = 0;
  828. append_space(pp, &param->tokens.da, base);
  829. while (cur_token->type != CFTOKEN_NONE) {
  830. if (*cur_token->str.array == '(') {
  831. brace_count++;
  832. } else if (*cur_token->str.array == ')') {
  833. if (brace_count)
  834. brace_count--;
  835. else
  836. break;
  837. } else if (*cur_token->str.array == ',') {
  838. if (!brace_count)
  839. break;
  840. }
  841. cf_preprocess_addtoken(pp, &param->tokens.da, &cur_token, base,
  842. cur_params);
  843. }
  844. if (cur_token->type == CFTOKEN_NONE)
  845. cf_adderror_unexpected_eof(pp, cur_token);
  846. append_space(pp, &param->tokens.da, base);
  847. append_end_token(&param->tokens.da);
  848. *p_cur_token = cur_token;
  849. }
  850. static inline bool param_is_whitespace(const struct macro_param *param)
  851. {
  852. struct cf_token *array = param->tokens.array;
  853. size_t i;
  854. for (i = 0; i < param->tokens.num; i++)
  855. if (array[i].type != CFTOKEN_NONE &&
  856. array[i].type != CFTOKEN_SPACETAB &&
  857. array[i].type != CFTOKEN_NEWLINE)
  858. return false;
  859. return true;
  860. }
  861. /* collects parameter tokens of a used macro and stores them for the unwrap */
  862. static void cf_preprocess_save_macro_params(struct cf_preprocessor *pp,
  863. struct cf_token **p_cur_token, const struct cf_def *def,
  864. const struct cf_token *base,
  865. const struct macro_params *cur_params,
  866. struct macro_params *dst)
  867. {
  868. struct cf_token *cur_token = *p_cur_token;
  869. size_t count = 0;
  870. next_token(&cur_token, false);
  871. if (cur_token->type != CFTOKEN_OTHER || *cur_token->str.array != '(') {
  872. cf_adderror_expecting(pp, cur_token, "'('");
  873. goto exit;
  874. }
  875. do {
  876. struct macro_param param;
  877. macro_param_init(&param);
  878. cur_token++;
  879. count++;
  880. cf_preprocess_save_macro_param(pp, &cur_token, &param, base,
  881. cur_params);
  882. if (cur_token->type != CFTOKEN_OTHER
  883. || (*cur_token->str.array != ','
  884. && *cur_token->str.array != ')')) {
  885. macro_param_free(&param);
  886. cf_adderror_expecting(pp, cur_token, "',' or ')'");
  887. goto exit;
  888. }
  889. if (param_is_whitespace(&param)) {
  890. /* if 0-param macro, ignore first entry */
  891. if (count == 1 && !def->params.num &&
  892. *cur_token->str.array == ')') {
  893. macro_param_free(&param);
  894. break;
  895. }
  896. }
  897. if (count <= def->params.num) {
  898. cf_token_copy(&param.name,
  899. cf_def_getparam(def, count-1));
  900. da_push_back(dst->params, &param);
  901. } else {
  902. macro_param_free(&param);
  903. }
  904. } while (*cur_token->str.array != ')');
  905. if (count != def->params.num)
  906. cf_adderror(pp, cur_token,
  907. "Mismatching number of macro parameters",
  908. NULL, NULL, NULL);
  909. exit:
  910. *p_cur_token = cur_token;
  911. }
  912. static inline void cf_preprocess_unwrap_param(struct cf_preprocessor *pp,
  913. struct darray *dst, /* struct cf_token */
  914. struct cf_token **p_cur_token,
  915. const struct cf_token *base,
  916. const struct macro_param *param)
  917. {
  918. struct cf_token *cur_token = *p_cur_token;
  919. struct cf_token *cur_param_token = param->tokens.array;
  920. while (cur_param_token->type != CFTOKEN_NONE)
  921. cf_preprocess_addtoken(pp, dst, &cur_param_token, base, NULL);
  922. cur_token++;
  923. *p_cur_token = cur_token;
  924. }
  925. static inline void cf_preprocess_unwrap_define(struct cf_preprocessor *pp,
  926. struct darray *dst, /* struct cf_token */
  927. struct cf_token **p_cur_token,
  928. const struct cf_token *base,
  929. const struct cf_def *def,
  930. const struct macro_params *cur_params)
  931. {
  932. struct cf_token *cur_token = *p_cur_token;
  933. struct macro_params new_params;
  934. struct cf_token *cur_def_token = def->tokens.array;
  935. macro_params_init(&new_params);
  936. if (def->macro)
  937. cf_preprocess_save_macro_params(pp, &cur_token, def, base,
  938. cur_params, &new_params);
  939. while (cur_def_token->type != CFTOKEN_NONE)
  940. cf_preprocess_addtoken(pp, dst, &cur_def_token, base,
  941. &new_params);
  942. macro_params_free(&new_params);
  943. cur_token++;
  944. *p_cur_token = cur_token;
  945. }
  946. static void cf_preprocess_addtoken(struct cf_preprocessor *pp,
  947. struct darray *dst, /* struct cf_token */
  948. struct cf_token **p_cur_token,
  949. const struct cf_token *base,
  950. const struct macro_params *params)
  951. {
  952. struct cf_token *cur_token = *p_cur_token;
  953. if (pp->ignore_state)
  954. goto ignore;
  955. if (!base)
  956. base = cur_token;
  957. if (cur_token->type == CFTOKEN_NAME) {
  958. struct cf_def *def;
  959. struct macro_param *param;
  960. param = get_macro_param(params, &cur_token->str);
  961. if (param) {
  962. cf_preprocess_unwrap_param(pp, dst, &cur_token, base,
  963. param);
  964. goto exit;
  965. }
  966. def = cf_preprocess_get_def(pp, &cur_token->str);
  967. if (def) {
  968. cf_preprocess_unwrap_define(pp, dst, &cur_token, base,
  969. def, params);
  970. goto exit;
  971. }
  972. }
  973. darray_push_back(sizeof(struct cf_token), dst, cur_token);
  974. ignore:
  975. cur_token++;
  976. exit:
  977. *p_cur_token = cur_token;
  978. }
  979. static void cf_preprocess_tokens(struct cf_preprocessor *pp,
  980. bool if_block, struct cf_token **p_cur_token)
  981. {
  982. bool newline = true;
  983. bool preprocessor_line = if_block;
  984. struct cf_token *cur_token = *p_cur_token;
  985. while (cur_token->type != CFTOKEN_NONE) {
  986. if(cur_token->type != CFTOKEN_SPACETAB &&
  987. cur_token->type != CFTOKEN_NEWLINE) {
  988. if (preprocessor_line) {
  989. cf_adderror_expected_newline(pp, cur_token);
  990. if (!go_to_newline(&cur_token))
  991. break;
  992. }
  993. if (newline && *cur_token->str.array == '#') {
  994. next_token(&cur_token, true);
  995. preprocessor_line = true;
  996. if (!cf_preprocessor(pp, if_block, &cur_token))
  997. break;
  998. continue;
  999. }
  1000. newline = false;
  1001. }
  1002. if (cur_token->type == CFTOKEN_NEWLINE) {
  1003. newline = true;
  1004. preprocessor_line = false;
  1005. } else if (cur_token->type == CFTOKEN_NONE) {
  1006. break;
  1007. }
  1008. cf_preprocess_addtoken(pp, &pp->tokens.da, &cur_token, NULL,
  1009. NULL);
  1010. }
  1011. *p_cur_token = cur_token;
  1012. }
  1013. void cf_preprocessor_init(struct cf_preprocessor *pp)
  1014. {
  1015. da_init(pp->defines);
  1016. da_init(pp->sys_include_dirs);
  1017. da_init(pp->dependencies);
  1018. da_init(pp->tokens);
  1019. pp->lex = NULL;
  1020. pp->ed = NULL;
  1021. pp->ignore_state = false;
  1022. }
  1023. void cf_preprocessor_free(struct cf_preprocessor *pp)
  1024. {
  1025. struct cf_lexer *dependencies = pp->dependencies.array;
  1026. char **sys_include_dirs = pp->sys_include_dirs.array;
  1027. struct cf_def *defs = pp->defines.array;
  1028. size_t i;
  1029. for (i = 0; i <pp->defines.num; i++)
  1030. cf_def_free(defs+i);
  1031. for (i = 0; i < pp->sys_include_dirs.num; i++)
  1032. bfree(sys_include_dirs[i]);
  1033. for (i = 0; i < pp->dependencies.num; i++)
  1034. cf_lexer_free(dependencies+i);
  1035. da_free(pp->defines);
  1036. da_free(pp->sys_include_dirs);
  1037. da_free(pp->dependencies);
  1038. da_free(pp->tokens);
  1039. pp->lex = NULL;
  1040. pp->ed = NULL;
  1041. pp->ignore_state = false;
  1042. }
  1043. bool cf_preprocess(struct cf_preprocessor *pp, struct cf_lexer *lex,
  1044. struct error_data *ed)
  1045. {
  1046. struct cf_token *token = cf_lexer_get_tokens(lex);
  1047. if (!token)
  1048. return false;
  1049. pp->ed = ed;
  1050. pp->lex = lex;
  1051. cf_preprocess_tokens(pp, false, &token);
  1052. da_push_back(pp->tokens, token);
  1053. return !lex->unexpected_eof;
  1054. }
  1055. void cf_preprocessor_add_def(struct cf_preprocessor *pp, struct cf_def *def)
  1056. {
  1057. struct cf_def *existing = cf_preprocess_get_def(pp, &def->name.str);
  1058. if (existing) {
  1059. struct dstr name;
  1060. dstr_init_copy_strref(&name, &def->name.str);
  1061. cf_addwarning(pp, &def->name, "Token $1 already defined",
  1062. name.array, NULL, NULL);
  1063. cf_addwarning(pp, &existing->name,
  1064. "Previous definition of $1 is here",
  1065. name.array, NULL, NULL);
  1066. cf_def_free(existing);
  1067. memcpy(existing, def, sizeof(struct cf_def));
  1068. } else {
  1069. da_push_back(pp->defines, def);
  1070. }
  1071. }
  1072. void cf_preprocessor_remove_def(struct cf_preprocessor *pp,
  1073. const char *def_name)
  1074. {
  1075. struct strref ref;
  1076. ref.array = def_name;
  1077. ref.len = strlen(def_name);
  1078. cf_preprocess_remove_def_strref(pp, &ref);
  1079. }