cf-lexer.c 31 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309
  1. /*
  2. * Copyright (c) 2013 Hugh Bailey <[email protected]>
  3. *
  4. * Permission to use, copy, modify, and distribute this software for any
  5. * purpose with or without fee is hereby granted, provided that the above
  6. * copyright notice and this permission notice appear in all copies.
  7. *
  8. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  10. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  11. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  12. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  13. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  14. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  15. */
  16. #include <ctype.h>
  17. #include <stdio.h>
  18. #include "platform.h"
  19. #include "cf-lexer.h"
  20. static inline void cf_convert_from_escape_literal(char **p_dst,
  21. const char **p_src)
  22. {
  23. char *dst = *p_dst;
  24. const char *src = *p_src;
  25. switch (*(src++)) {
  26. case '\'': *(dst++) = '\''; break;
  27. case '\"': *(dst++) = '\"'; break;
  28. case '\?': *(dst++) = '\?'; break;
  29. case '\\': *(dst++) = '\\'; break;
  30. case '0': *(dst++) = '\0'; break;
  31. case 'a': *(dst++) = '\a'; break;
  32. case 'b': *(dst++) = '\b'; break;
  33. case 'f': *(dst++) = '\f'; break;
  34. case 'n': *(dst++) = '\n'; break;
  35. case 'r': *(dst++) = '\r'; break;
  36. case 't': *(dst++) = '\t'; break;
  37. case 'v': *(dst++) = '\v'; break;
  38. /* hex */
  39. case 'X':
  40. case 'x':
  41. *(dst++) = (char)strtoul(src, NULL, 16);
  42. src += 2;
  43. break;
  44. /* oct */
  45. default:
  46. if (isdigit(*src)) {
  47. *(dst++) = (char)strtoul(src, NULL, 8);
  48. src += 3;
  49. }
  50. /* case 'u':
  51. case 'U': */
  52. }
  53. *p_dst = dst;
  54. *p_src = src;
  55. }
  56. char *cf_literal_to_str(const char *literal, size_t count)
  57. {
  58. const char *temp_src;
  59. char *str, *temp_dst;
  60. if (!count)
  61. count = strlen(literal);
  62. if (count < 2)
  63. return NULL;
  64. if (literal[0] != literal[count-1])
  65. return NULL;
  66. if (literal[0] != '\"' && literal[0] != '\'')
  67. return NULL;
  68. str = bmalloc(count - 1);
  69. temp_src = literal;
  70. temp_dst = str;
  71. while (*temp_src) {
  72. if (*temp_src == '\\') {
  73. temp_src++;
  74. cf_convert_from_escape_literal(&temp_dst, &temp_src);
  75. } else {
  76. *(temp_dst++) = *(temp_src++);
  77. }
  78. }
  79. *temp_dst = 0;
  80. return str;
  81. }
  82. static bool cf_is_token_break(struct base_token *start_token,
  83. const struct base_token *token)
  84. {
  85. switch (start_token->type) {
  86. case BASETOKEN_ALPHA:
  87. if (token->type == BASETOKEN_OTHER ||
  88. token->type == BASETOKEN_WHITESPACE)
  89. return true;
  90. break;
  91. case BASETOKEN_DIGIT:
  92. if (token->type == BASETOKEN_WHITESPACE
  93. || (token->type == BASETOKEN_OTHER
  94. && *token->text.array != '.'))
  95. return true;
  96. break;
  97. case BASETOKEN_WHITESPACE:
  98. /* lump all non-newline whitespace together when possible */
  99. if (is_space_or_tab(*start_token->text.array) &&
  100. is_space_or_tab(*token->text.array))
  101. break;
  102. return true;
  103. case BASETOKEN_OTHER:
  104. if (*start_token->text.array == '.' &&
  105. token->type == BASETOKEN_DIGIT) {
  106. start_token->type = BASETOKEN_DIGIT;
  107. break;
  108. }
  109. case BASETOKEN_NONE:
  110. return true;
  111. }
  112. return false;
  113. }
  114. static inline bool cf_is_splice(const char *array)
  115. {
  116. return (*array == '\\' && is_newline(array[1]));
  117. }
  118. static inline void cf_pass_any_splices(const char **parray)
  119. {
  120. while (cf_is_splice(*parray))
  121. *parray += 1 + newline_size((*parray)+1);
  122. }
  123. static inline bool cf_is_comment(const char *array)
  124. {
  125. const char *offset = array;
  126. if (*offset++ == '/') {
  127. cf_pass_any_splices(&offset);
  128. return (*offset == '*' || *offset == '/');
  129. }
  130. return false;
  131. }
  132. static bool cf_lexer_process_comment(struct cf_lexer *lex,
  133. struct cf_token *out_token)
  134. {
  135. const char *offset;
  136. if (!cf_is_comment(out_token->unmerged_str.array))
  137. return false;
  138. offset = lex->base_lexer.offset;
  139. cf_pass_any_splices(&offset);
  140. strcpy(lex->write_offset++, " ");
  141. out_token->str.len = 1;
  142. if (*offset == '/') {
  143. while (*++offset && !is_newline(*offset))
  144. cf_pass_any_splices(&offset);
  145. } else if (*offset == '*') {
  146. bool was_star = false;
  147. lex->unexpected_eof = true;
  148. while (*++offset) {
  149. cf_pass_any_splices(&offset);
  150. if (was_star && *offset == '/') {
  151. offset++;
  152. lex->unexpected_eof = false;
  153. break;
  154. } else {
  155. was_star = (*offset == '*');
  156. }
  157. }
  158. }
  159. out_token->unmerged_str.len +=
  160. (size_t)(offset - out_token->unmerged_str.array);
  161. out_token->type = CFTOKEN_SPACETAB;
  162. lex->base_lexer.offset = offset;
  163. return true;
  164. }
  165. static inline void cf_lexer_write_strref(struct cf_lexer *lex,
  166. const struct strref *ref)
  167. {
  168. strncpy(lex->write_offset, ref->array, ref->len);
  169. lex->write_offset[ref->len] = 0;
  170. lex->write_offset += ref->len;
  171. }
  172. static bool cf_lexer_is_include(struct cf_lexer *lex)
  173. {
  174. bool found_include_import = false;
  175. bool found_preprocessor = false;
  176. size_t i;
  177. for (i = lex->tokens.num; i > 0; i--) {
  178. struct cf_token *token = lex->tokens.array+(i-1);
  179. if (is_space_or_tab(*token->str.array))
  180. continue;
  181. if (!found_include_import) {
  182. if (strref_cmp(&token->str, "include") != 0 &&
  183. strref_cmp(&token->str, "import") != 0)
  184. break;
  185. found_include_import = true;
  186. } else if (!found_preprocessor) {
  187. if (*token->str.array != '#')
  188. break;
  189. found_preprocessor = true;
  190. } else {
  191. return is_newline(*token->str.array);
  192. }
  193. }
  194. /* if starting line */
  195. return found_preprocessor && found_include_import;
  196. }
  197. static void cf_lexer_getstrtoken(struct cf_lexer *lex,
  198. struct cf_token *out_token, char delimiter,
  199. bool allow_escaped_delimiters)
  200. {
  201. const char *offset = lex->base_lexer.offset;
  202. bool escaped = false;
  203. out_token->unmerged_str.len++;
  204. out_token->str.len++;
  205. cf_lexer_write_strref(lex, &out_token->unmerged_str);
  206. while (*offset) {
  207. cf_pass_any_splices(&offset);
  208. if (*offset == delimiter) {
  209. if (!escaped) {
  210. *lex->write_offset++ = *offset;
  211. out_token->str.len++;
  212. offset++;
  213. break;
  214. }
  215. } else if (is_newline(*offset)) {
  216. break;
  217. }
  218. *lex->write_offset++ = *offset;
  219. out_token->str.len++;
  220. escaped = (allow_escaped_delimiters && *offset == '\\');
  221. offset++;
  222. }
  223. *lex->write_offset = 0;
  224. out_token->unmerged_str.len +=
  225. (size_t)(offset - out_token->unmerged_str.array);
  226. out_token->type = CFTOKEN_STRING;
  227. lex->base_lexer.offset = offset;
  228. }
  229. static bool cf_lexer_process_string(struct cf_lexer *lex,
  230. struct cf_token *out_token)
  231. {
  232. char ch = *out_token->unmerged_str.array;
  233. if (ch == '<' && cf_lexer_is_include(lex)) {
  234. cf_lexer_getstrtoken(lex, out_token, '>', false);
  235. return true;
  236. } else if (ch == '"' || ch == '\'') {
  237. cf_lexer_getstrtoken(lex, out_token, ch,
  238. !cf_lexer_is_include(lex));
  239. return true;
  240. }
  241. return false;
  242. }
  243. static inline enum cf_token_type cf_get_token_type(const struct cf_token *token,
  244. const struct base_token *start_token)
  245. {
  246. switch (start_token->type) {
  247. case BASETOKEN_ALPHA:
  248. return CFTOKEN_NAME;
  249. case BASETOKEN_DIGIT:
  250. return CFTOKEN_NUM;
  251. case BASETOKEN_WHITESPACE:
  252. if (is_newline(*token->str.array))
  253. return CFTOKEN_NEWLINE;
  254. else
  255. return CFTOKEN_SPACETAB;
  256. case BASETOKEN_NONE:
  257. case BASETOKEN_OTHER:
  258. break;
  259. }
  260. return CFTOKEN_OTHER;
  261. }
  262. static bool cf_lexer_nexttoken(struct cf_lexer *lex, struct cf_token *out_token)
  263. {
  264. struct base_token token, start_token;
  265. bool wrote_data = false;
  266. base_token_clear(&token);
  267. base_token_clear(&start_token);
  268. cf_token_clear(out_token);
  269. while (lexer_getbasetoken(&lex->base_lexer, &token, PARSE_WHITESPACE)) {
  270. /* reclassify underscore as alpha for alnum tokens */
  271. if (*token.text.array == '_')
  272. token.type = BASETOKEN_ALPHA;
  273. /* ignore escaped newlines to merge spliced lines */
  274. if (cf_is_splice(token.text.array)) {
  275. lex->base_lexer.offset +=
  276. newline_size(token.text.array+1);
  277. continue;
  278. }
  279. if (!wrote_data) {
  280. out_token->unmerged_str.array = token.text.array;
  281. out_token->str.array = lex->write_offset;
  282. /* if comment then output a space */
  283. if (cf_lexer_process_comment(lex, out_token))
  284. return true;
  285. /* process string tokens if any */
  286. if (cf_lexer_process_string(lex, out_token))
  287. return true;
  288. base_token_copy(&start_token, &token);
  289. wrote_data = true;
  290. } else if (cf_is_token_break(&start_token, &token)) {
  291. lex->base_lexer.offset -= token.text.len;
  292. break;
  293. }
  294. /* write token to CF lexer to account for splicing/comments */
  295. cf_lexer_write_strref(lex, &token.text);
  296. out_token->str.len += token.text.len;
  297. }
  298. if (wrote_data) {
  299. out_token->unmerged_str.len = (size_t)(lex->base_lexer.offset -
  300. out_token->unmerged_str.array);
  301. out_token->type = cf_get_token_type(out_token, &start_token);
  302. }
  303. return wrote_data;
  304. }
  305. void cf_lexer_init(struct cf_lexer *lex)
  306. {
  307. lexer_init(&lex->base_lexer);
  308. da_init(lex->tokens);
  309. lex->file = NULL;
  310. lex->reformatted = NULL;
  311. lex->write_offset = NULL;
  312. lex->unexpected_eof = false;
  313. }
  314. void cf_lexer_free(struct cf_lexer *lex)
  315. {
  316. bfree(lex->file);
  317. bfree(lex->reformatted);
  318. lexer_free(&lex->base_lexer);
  319. da_free(lex->tokens);
  320. lex->file = NULL;
  321. lex->reformatted = NULL;
  322. lex->write_offset = NULL;
  323. lex->unexpected_eof = false;
  324. }
  325. bool cf_lexer_lex(struct cf_lexer *lex, const char *str, const char *file)
  326. {
  327. struct cf_token token;
  328. struct cf_token *last_token = NULL;
  329. cf_lexer_free(lex);
  330. if (!str || !*str)
  331. return false;
  332. if (file)
  333. lex->file = bstrdup(file);
  334. lexer_start(&lex->base_lexer, str);
  335. cf_token_clear(&token);
  336. lex->reformatted = bmalloc(strlen(str) + 1);
  337. lex->reformatted[0] = 0;
  338. lex->write_offset = lex->reformatted;
  339. while (cf_lexer_nexttoken(lex, &token)) {
  340. if (last_token &&
  341. is_space_or_tab(*last_token->str.array) &&
  342. is_space_or_tab(*token.str.array)) {
  343. cf_token_add(last_token, &token);
  344. continue;
  345. }
  346. token.lex = lex;
  347. last_token = da_push_back_new(lex->tokens);
  348. memcpy(last_token, &token, sizeof(struct cf_token));
  349. }
  350. cf_token_clear(&token);
  351. token.str.array = lex->write_offset;
  352. token.unmerged_str.array = lex->base_lexer.offset;
  353. token.lex = lex;
  354. da_push_back(lex->tokens, &token);
  355. return !lex->unexpected_eof;
  356. }
  357. /* ------------------------------------------------------------------------- */
  358. struct macro_param {
  359. struct cf_token name;
  360. DARRAY(struct cf_token) tokens;
  361. };
  362. static inline void macro_param_init(struct macro_param *param)
  363. {
  364. cf_token_clear(&param->name);
  365. da_init(param->tokens);
  366. }
  367. static inline void macro_param_free(struct macro_param *param)
  368. {
  369. cf_token_clear(&param->name);
  370. da_free(param->tokens);
  371. }
  372. /* ------------------------------------------------------------------------- */
  373. struct macro_params {
  374. DARRAY(struct macro_param) params;
  375. };
  376. static inline void macro_params_init(struct macro_params *params)
  377. {
  378. da_init(params->params);
  379. }
  380. static inline void macro_params_free(struct macro_params *params)
  381. {
  382. size_t i;
  383. for (i = 0; i < params->params.num; i++)
  384. macro_param_free(params->params.array+i);
  385. da_free(params->params);
  386. }
  387. static inline struct macro_param *get_macro_param(
  388. const struct macro_params *params,
  389. const struct strref *name)
  390. {
  391. size_t i;
  392. if (!params)
  393. return NULL;
  394. for (i = 0; i < params->params.num; i++) {
  395. struct macro_param *param = params->params.array+i;
  396. if (strref_cmp_strref(&param->name.str, name) == 0)
  397. return param;
  398. }
  399. return NULL;
  400. }
  401. /* ------------------------------------------------------------------------- */
  402. static bool cf_preprocessor(struct cf_preprocessor *pp,
  403. bool if_block, struct cf_token **p_cur_token);
  404. static void cf_preprocess_tokens(struct cf_preprocessor *pp,
  405. bool if_block, struct cf_token **p_cur_token);
  406. static inline bool go_to_newline(struct cf_token **p_cur_token)
  407. {
  408. struct cf_token *cur_token = *p_cur_token;
  409. while (cur_token->type != CFTOKEN_NEWLINE &&
  410. cur_token->type != CFTOKEN_NONE)
  411. cur_token++;
  412. *p_cur_token = cur_token;
  413. return cur_token->type != CFTOKEN_NONE;
  414. }
  415. static inline bool next_token(struct cf_token **p_cur_token, bool preprocessor)
  416. {
  417. struct cf_token *cur_token = *p_cur_token;
  418. if (cur_token->type != CFTOKEN_NONE)
  419. cur_token++;
  420. /* if preprocessor, stop at newline */
  421. while (cur_token->type == CFTOKEN_SPACETAB &&
  422. (preprocessor || cur_token->type == CFTOKEN_NEWLINE))
  423. cur_token++;
  424. *p_cur_token = cur_token;
  425. return cur_token->type != CFTOKEN_NONE;
  426. }
  427. static inline void cf_gettokenoffset(struct cf_preprocessor *pp,
  428. const struct cf_token *token, uint32_t *row, uint32_t *col)
  429. {
  430. lexer_getstroffset(&pp->lex->base_lexer,
  431. token->unmerged_str.array, row, col);
  432. }
  433. static void cf_addew(struct cf_preprocessor *pp, const struct cf_token *token,
  434. const char *message, int error_level,
  435. const char *val1, const char *val2, const char *val3)
  436. {
  437. uint32_t row, col;
  438. cf_gettokenoffset(pp, token, &row, &col);
  439. if (!val1 && !val2 && !val3) {
  440. error_data_add(pp->ed, token->lex->file, row, col,
  441. message, error_level);
  442. } else {
  443. struct dstr formatted;
  444. dstr_init(&formatted);
  445. dstr_safe_printf(&formatted, message, val1, val2, val3, NULL);
  446. error_data_add(pp->ed, token->lex->file, row, col,
  447. formatted.array, error_level);
  448. dstr_free(&formatted);
  449. }
  450. }
  451. static inline void cf_adderror(struct cf_preprocessor *pp,
  452. const struct cf_token *token, const char *error,
  453. const char *val1, const char *val2, const char *val3)
  454. {
  455. cf_addew(pp, token, error, LEX_ERROR, val1, val2, val3);
  456. }
  457. static inline void cf_addwarning(struct cf_preprocessor *pp,
  458. const struct cf_token *token, const char *warning,
  459. const char *val1, const char *val2, const char *val3)
  460. {
  461. cf_addew(pp, token, warning, LEX_WARNING, val1, val2, val3);
  462. }
  463. static inline void cf_adderror_expecting(struct cf_preprocessor *pp,
  464. const struct cf_token *token, const char *expecting)
  465. {
  466. cf_adderror(pp, token, "Expected $1", expecting,
  467. NULL, NULL);
  468. }
  469. static inline void cf_adderror_expected_newline(struct cf_preprocessor *pp,
  470. const struct cf_token *token)
  471. {
  472. cf_adderror(pp, token,
  473. "Unexpected token after preprocessor, expected "
  474. "newline",
  475. NULL, NULL, NULL);
  476. }
  477. static inline void cf_adderror_unexpected_endif_eof(struct cf_preprocessor *pp,
  478. const struct cf_token *token)
  479. {
  480. cf_adderror(pp, token, "Unexpected end of file before #endif",
  481. NULL, NULL, NULL);
  482. }
  483. static inline void cf_adderror_unexpected_eof(struct cf_preprocessor *pp,
  484. const struct cf_token *token)
  485. {
  486. cf_adderror(pp, token, "Unexpected end of file",
  487. NULL, NULL, NULL);
  488. }
  489. static void cf_include_file(struct cf_preprocessor *pp,
  490. const struct cf_token *file_token)
  491. {
  492. struct cf_lexer new_lex;
  493. struct dstr str_file;
  494. FILE *file;
  495. char *file_data;
  496. struct cf_token *tokens;
  497. size_t i;
  498. dstr_init(&str_file);
  499. dstr_copy_strref(&str_file, &file_token->str);
  500. dstr_mid(&str_file, &str_file, 1, str_file.len-2);
  501. /* if dependency already exists, run preprocessor on it */
  502. for (i = 0; i < pp->dependencies.num; i++) {
  503. struct cf_lexer *dep = pp->dependencies.array+i;
  504. if (strcmp(dep->file, str_file.array) == 0) {
  505. tokens = cf_lexer_get_tokens(dep);
  506. cf_preprocess_tokens(pp, false, &tokens);
  507. goto exit;
  508. }
  509. }
  510. file = os_fopen(str_file.array, "rb");
  511. if (!file) {
  512. cf_adderror(pp, file_token, "Could not open file '$1'",
  513. file_token->str.array, NULL, NULL);
  514. goto exit;
  515. }
  516. os_fread_utf8(file, &file_data);
  517. fclose(file);
  518. cf_lexer_init(&new_lex);
  519. cf_lexer_lex(&new_lex, file_data, str_file.array);
  520. tokens = cf_lexer_get_tokens(&new_lex);
  521. cf_preprocess_tokens(pp, false, &tokens);
  522. bfree(file_data);
  523. da_push_back(pp->dependencies, &new_lex);
  524. exit:
  525. dstr_free(&str_file);
  526. }
  527. static inline bool is_sys_include(struct strref *ref)
  528. {
  529. return ref->len >= 2 &&
  530. ref->array[0] == '<' && ref->array[ref->len-1] == '>';
  531. }
  532. static inline bool is_loc_include(struct strref *ref)
  533. {
  534. return ref->len >= 2 &&
  535. ref->array[0] == '"' && ref->array[ref->len-1] == '"';
  536. }
  537. static void cf_preprocess_include(struct cf_preprocessor *pp,
  538. struct cf_token **p_cur_token)
  539. {
  540. struct cf_token *cur_token = *p_cur_token;
  541. if (pp->ignore_state) {
  542. go_to_newline(p_cur_token);
  543. return;
  544. }
  545. next_token(&cur_token, true);
  546. if (cur_token->type != CFTOKEN_STRING) {
  547. cf_adderror_expecting(pp, cur_token, "string");
  548. go_to_newline(&cur_token);
  549. goto exit;
  550. }
  551. if (is_sys_include(&cur_token->str)) {
  552. /* TODO */
  553. } else if (is_loc_include(&cur_token->str)) {
  554. if (!pp->ignore_state)
  555. cf_include_file(pp, cur_token);
  556. } else {
  557. cf_adderror(pp, cur_token, "Invalid or incomplete string",
  558. NULL, NULL, NULL);
  559. go_to_newline(&cur_token);
  560. goto exit;
  561. }
  562. cur_token++;
  563. exit:
  564. *p_cur_token = cur_token;
  565. }
  566. static bool cf_preprocess_macro_params(struct cf_preprocessor *pp,
  567. struct cf_def *def, struct cf_token **p_cur_token)
  568. {
  569. struct cf_token *cur_token = *p_cur_token;
  570. bool success = false;
  571. def->macro = true;
  572. do {
  573. next_token(&cur_token, true);
  574. if (cur_token->type != CFTOKEN_NAME) {
  575. cf_adderror_expecting(pp, cur_token, "identifier");
  576. go_to_newline(&cur_token);
  577. goto exit;
  578. }
  579. cf_def_addparam(def, cur_token);
  580. next_token(&cur_token, true);
  581. if (cur_token->type != CFTOKEN_OTHER
  582. || (*cur_token->str.array != ','
  583. && *cur_token->str.array != ')')) {
  584. cf_adderror_expecting(pp, cur_token, "',' or ')'");
  585. go_to_newline(&cur_token);
  586. goto exit;
  587. }
  588. } while (*cur_token->str.array != ')');
  589. /* ended properly, now go to first define token (or newline) */
  590. next_token(&cur_token, true);
  591. success = true;
  592. exit:
  593. *p_cur_token = cur_token;
  594. return success;
  595. }
  596. #define INVALID_INDEX ((size_t)-1)
  597. static inline size_t cf_preprocess_get_def_idx(struct cf_preprocessor *pp,
  598. const struct strref *def_name)
  599. {
  600. struct cf_def *array = pp->defines.array;
  601. size_t i;
  602. for (i = 0; i < pp->defines.num; i++) {
  603. struct cf_def *cur_def = array+i;
  604. if (strref_cmp_strref(&cur_def->name.str, def_name) == 0)
  605. return i;
  606. }
  607. return INVALID_INDEX;
  608. }
  609. static inline struct cf_def *cf_preprocess_get_def(struct cf_preprocessor *pp,
  610. const struct strref *def_name)
  611. {
  612. size_t idx = cf_preprocess_get_def_idx(pp, def_name);
  613. if (idx == INVALID_INDEX)
  614. return NULL;
  615. return pp->defines.array+idx;
  616. }
  617. static char space_filler[2] = " ";
  618. static inline void append_space(struct cf_preprocessor *pp,
  619. struct darray *tokens, const struct cf_token *base)
  620. {
  621. struct cf_token token;
  622. strref_set(&token.str, space_filler, 1);
  623. token.type = CFTOKEN_SPACETAB;
  624. if (base) {
  625. token.lex = base->lex;
  626. strref_copy(&token.unmerged_str, &base->unmerged_str);
  627. } else {
  628. token.lex = pp->lex;
  629. strref_copy(&token.unmerged_str, &token.str);
  630. }
  631. darray_push_back(sizeof(struct cf_token), tokens, &token);
  632. }
  633. static inline void append_end_token(struct darray *tokens)
  634. {
  635. struct cf_token end;
  636. cf_token_clear(&end);
  637. darray_push_back(sizeof(struct cf_token), tokens, &end);
  638. }
  639. static void cf_preprocess_define(struct cf_preprocessor *pp,
  640. struct cf_token **p_cur_token)
  641. {
  642. struct cf_token *cur_token = *p_cur_token;
  643. struct cf_def def;
  644. if (pp->ignore_state) {
  645. go_to_newline(p_cur_token);
  646. return;
  647. }
  648. cf_def_init(&def);
  649. next_token(&cur_token, true);
  650. if (cur_token->type != CFTOKEN_NAME) {
  651. cf_adderror_expecting(pp, cur_token, "identifier");
  652. go_to_newline(&cur_token);
  653. goto exit;
  654. }
  655. append_space(pp, &def.tokens.da, NULL);
  656. cf_token_copy(&def.name, cur_token);
  657. if (!next_token(&cur_token, true))
  658. goto complete;
  659. /* process macro */
  660. if (*cur_token->str.array == '(') {
  661. if (!cf_preprocess_macro_params(pp, &def, &cur_token))
  662. goto error;
  663. }
  664. while (cur_token->type != CFTOKEN_NEWLINE &&
  665. cur_token->type != CFTOKEN_NONE)
  666. cf_def_addtoken(&def, cur_token++);
  667. complete:
  668. append_end_token(&def.tokens.da);
  669. append_space(pp, &def.tokens.da, NULL);
  670. da_push_back(pp->defines, &def);
  671. goto exit;
  672. error:
  673. cf_def_free(&def);
  674. exit:
  675. *p_cur_token = cur_token;
  676. }
  677. static inline void cf_preprocess_remove_def_strref(struct cf_preprocessor *pp,
  678. const struct strref *ref)
  679. {
  680. size_t def_idx = cf_preprocess_get_def_idx(pp, ref);
  681. if (def_idx != INVALID_INDEX) {
  682. struct cf_def *array = pp->defines.array;
  683. cf_def_free(array+def_idx);
  684. da_erase(pp->defines, def_idx);
  685. }
  686. }
  687. static void cf_preprocess_undef(struct cf_preprocessor *pp,
  688. struct cf_token **p_cur_token)
  689. {
  690. struct cf_token *cur_token = *p_cur_token;
  691. if (pp->ignore_state) {
  692. go_to_newline(p_cur_token);
  693. return;
  694. }
  695. next_token(&cur_token, true);
  696. if (cur_token->type != CFTOKEN_NAME) {
  697. cf_adderror_expecting(pp, cur_token, "identifier");
  698. go_to_newline(&cur_token);
  699. goto exit;
  700. }
  701. cf_preprocess_remove_def_strref(pp, &cur_token->str);
  702. cur_token++;
  703. exit:
  704. *p_cur_token = cur_token;
  705. }
  706. /* Processes an #ifdef/#ifndef/#if/#else/#elif sub block recursively */
  707. static inline bool cf_preprocess_subblock(struct cf_preprocessor *pp,
  708. bool ignore, struct cf_token **p_cur_token)
  709. {
  710. bool eof;
  711. if (!next_token(p_cur_token, true))
  712. return false;
  713. if (!pp->ignore_state) {
  714. pp->ignore_state = ignore;
  715. cf_preprocess_tokens(pp, true, p_cur_token);
  716. pp->ignore_state = false;
  717. } else {
  718. cf_preprocess_tokens(pp, true, p_cur_token);
  719. }
  720. eof = ((*p_cur_token)->type == CFTOKEN_NONE);
  721. if (eof)
  722. cf_adderror_unexpected_endif_eof(pp, *p_cur_token);
  723. return !eof;
  724. }
  725. static void cf_preprocess_ifdef(struct cf_preprocessor *pp,
  726. bool ifnot, struct cf_token **p_cur_token)
  727. {
  728. struct cf_token *cur_token = *p_cur_token;
  729. struct cf_def *def;
  730. bool is_true;
  731. next_token(&cur_token, true);
  732. if (cur_token->type != CFTOKEN_NAME) {
  733. cf_adderror_expecting(pp, cur_token, "identifier");
  734. go_to_newline(&cur_token);
  735. goto exit;
  736. }
  737. def = cf_preprocess_get_def(pp, &cur_token->str);
  738. is_true = (def == NULL) == ifnot;
  739. if (!cf_preprocess_subblock(pp, !is_true, &cur_token))
  740. goto exit;
  741. if (strref_cmp(&cur_token->str, "else") == 0) {
  742. if (!cf_preprocess_subblock(pp, is_true, &cur_token))
  743. goto exit;
  744. /*} else if (strref_cmp(&cur_token->str, "elif") == 0) {*/
  745. }
  746. cur_token++;
  747. exit:
  748. *p_cur_token = cur_token;
  749. }
  750. static bool cf_preprocessor(struct cf_preprocessor *pp,
  751. bool if_block, struct cf_token **p_cur_token)
  752. {
  753. struct cf_token *cur_token = *p_cur_token;
  754. if (strref_cmp(&cur_token->str, "include") == 0) {
  755. cf_preprocess_include(pp, p_cur_token);
  756. } else if (strref_cmp(&cur_token->str, "define") == 0) {
  757. cf_preprocess_define(pp, p_cur_token);
  758. } else if (strref_cmp(&cur_token->str, "undef") == 0) {
  759. cf_preprocess_undef(pp, p_cur_token);
  760. } else if (strref_cmp(&cur_token->str, "ifdef") == 0) {
  761. cf_preprocess_ifdef(pp, false, p_cur_token);
  762. } else if (strref_cmp(&cur_token->str, "ifndef") == 0) {
  763. cf_preprocess_ifdef(pp, true, p_cur_token);
  764. /*} else if (strref_cmp(&cur_token->str, "if") == 0) {
  765. TODO;*/
  766. } else if (strref_cmp(&cur_token->str, "else") == 0 ||
  767. /*strref_cmp(&cur_token->str, "elif") == 0 ||*/
  768. strref_cmp(&cur_token->str, "endif") == 0) {
  769. if (!if_block) {
  770. struct dstr name;
  771. dstr_init_copy_strref(&name, &cur_token->str);
  772. cf_adderror(pp, cur_token,"#$1 outside of "
  773. "#if/#ifdef/#ifndef block",
  774. name.array, NULL, NULL);
  775. dstr_free(&name);
  776. (*p_cur_token)++;
  777. return true;
  778. }
  779. return false;
  780. } else if (cur_token->type != CFTOKEN_NEWLINE &&
  781. cur_token->type != CFTOKEN_NONE) {
  782. /*
  783. * TODO: language-specific preprocessor stuff should be sent to
  784. * handler of some sort
  785. */
  786. (*p_cur_token)++;
  787. }
  788. return true;
  789. }
  790. static void cf_preprocess_addtoken(struct cf_preprocessor *pp,
  791. struct darray *dst, /* struct cf_token */
  792. struct cf_token **p_cur_token,
  793. const struct cf_token *base,
  794. const struct macro_params *params);
  795. /*
  796. * collects tokens for a macro parameter
  797. *
  798. * note that it is important to make sure that any usage of function calls
  799. * within a macro parameter is preserved, example MACRO(func(1, 2), 3), do not
  800. * let it stop on the comma at "1,"
  801. */
  802. static void cf_preprocess_save_macro_param(struct cf_preprocessor *pp,
  803. struct cf_token **p_cur_token, struct macro_param *param,
  804. const struct cf_token *base,
  805. const struct macro_params *cur_params)
  806. {
  807. struct cf_token *cur_token = *p_cur_token;
  808. int brace_count = 0;
  809. append_space(pp, &param->tokens.da, base);
  810. while (cur_token->type != CFTOKEN_NONE) {
  811. if (*cur_token->str.array == '(') {
  812. brace_count++;
  813. } else if (*cur_token->str.array == ')') {
  814. if (brace_count)
  815. brace_count--;
  816. else
  817. break;
  818. } else if (*cur_token->str.array == ',') {
  819. if (!brace_count)
  820. break;
  821. }
  822. cf_preprocess_addtoken(pp, &param->tokens.da, &cur_token, base,
  823. cur_params);
  824. }
  825. if (cur_token->type == CFTOKEN_NONE)
  826. cf_adderror_unexpected_eof(pp, cur_token);
  827. append_space(pp, &param->tokens.da, base);
  828. append_end_token(&param->tokens.da);
  829. *p_cur_token = cur_token;
  830. }
  831. static inline bool param_is_whitespace(const struct macro_param *param)
  832. {
  833. struct cf_token *array = param->tokens.array;
  834. size_t i;
  835. for (i = 0; i < param->tokens.num; i++)
  836. if (array[i].type != CFTOKEN_NONE &&
  837. array[i].type != CFTOKEN_SPACETAB &&
  838. array[i].type != CFTOKEN_NEWLINE)
  839. return false;
  840. return true;
  841. }
  842. /* collects parameter tokens of a used macro and stores them for the unwrap */
  843. static void cf_preprocess_save_macro_params(struct cf_preprocessor *pp,
  844. struct cf_token **p_cur_token, const struct cf_def *def,
  845. const struct cf_token *base,
  846. const struct macro_params *cur_params,
  847. struct macro_params *dst)
  848. {
  849. struct cf_token *cur_token = *p_cur_token;
  850. size_t count = 0;
  851. next_token(&cur_token, false);
  852. if (cur_token->type != CFTOKEN_OTHER || *cur_token->str.array != '(') {
  853. cf_adderror_expecting(pp, cur_token, "'('");
  854. goto exit;
  855. }
  856. do {
  857. struct macro_param param;
  858. macro_param_init(&param);
  859. cur_token++;
  860. count++;
  861. cf_preprocess_save_macro_param(pp, &cur_token, &param, base,
  862. cur_params);
  863. if (cur_token->type != CFTOKEN_OTHER
  864. || (*cur_token->str.array != ','
  865. && *cur_token->str.array != ')')) {
  866. macro_param_free(&param);
  867. cf_adderror_expecting(pp, cur_token, "',' or ')'");
  868. goto exit;
  869. }
  870. if (param_is_whitespace(&param)) {
  871. /* if 0-param macro, ignore first entry */
  872. if (count == 1 && !def->params.num &&
  873. *cur_token->str.array == ')') {
  874. macro_param_free(&param);
  875. break;
  876. }
  877. }
  878. if (count <= def->params.num) {
  879. cf_token_copy(&param.name,
  880. cf_def_getparam(def, count-1));
  881. da_push_back(dst->params, &param);
  882. } else {
  883. macro_param_free(&param);
  884. }
  885. } while (*cur_token->str.array != ')');
  886. if (count != def->params.num)
  887. cf_adderror(pp, cur_token,
  888. "Mismatching number of macro parameters",
  889. NULL, NULL, NULL);
  890. exit:
  891. *p_cur_token = cur_token;
  892. }
  893. static inline void cf_preprocess_unwrap_param(struct cf_preprocessor *pp,
  894. struct darray *dst, /* struct cf_token */
  895. struct cf_token **p_cur_token,
  896. const struct cf_token *base,
  897. const struct macro_param *param)
  898. {
  899. struct cf_token *cur_token = *p_cur_token;
  900. struct cf_token *cur_param_token = param->tokens.array;
  901. while (cur_param_token->type != CFTOKEN_NONE)
  902. cf_preprocess_addtoken(pp, dst, &cur_param_token, base, NULL);
  903. cur_token++;
  904. *p_cur_token = cur_token;
  905. }
  906. static inline void cf_preprocess_unwrap_define(struct cf_preprocessor *pp,
  907. struct darray *dst, /* struct cf_token */
  908. struct cf_token **p_cur_token,
  909. const struct cf_token *base,
  910. const struct cf_def *def,
  911. const struct macro_params *cur_params)
  912. {
  913. struct cf_token *cur_token = *p_cur_token;
  914. struct macro_params new_params;
  915. struct cf_token *cur_def_token = def->tokens.array;
  916. macro_params_init(&new_params);
  917. if (def->macro)
  918. cf_preprocess_save_macro_params(pp, &cur_token, def, base,
  919. cur_params, &new_params);
  920. while (cur_def_token->type != CFTOKEN_NONE)
  921. cf_preprocess_addtoken(pp, dst, &cur_def_token, base,
  922. &new_params);
  923. macro_params_free(&new_params);
  924. cur_token++;
  925. *p_cur_token = cur_token;
  926. }
  927. static void cf_preprocess_addtoken(struct cf_preprocessor *pp,
  928. struct darray *dst, /* struct cf_token */
  929. struct cf_token **p_cur_token,
  930. const struct cf_token *base,
  931. const struct macro_params *params)
  932. {
  933. struct cf_token *cur_token = *p_cur_token;
  934. if (pp->ignore_state)
  935. goto ignore;
  936. if (!base)
  937. base = cur_token;
  938. if (cur_token->type == CFTOKEN_NAME) {
  939. struct cf_def *def;
  940. struct macro_param *param;
  941. param = get_macro_param(params, &cur_token->str);
  942. if (param) {
  943. cf_preprocess_unwrap_param(pp, dst, &cur_token, base,
  944. param);
  945. goto exit;
  946. }
  947. def = cf_preprocess_get_def(pp, &cur_token->str);
  948. if (def) {
  949. cf_preprocess_unwrap_define(pp, dst, &cur_token, base,
  950. def, params);
  951. goto exit;
  952. }
  953. }
  954. darray_push_back(sizeof(struct cf_token), dst, cur_token);
  955. ignore:
  956. cur_token++;
  957. exit:
  958. *p_cur_token = cur_token;
  959. }
  960. static void cf_preprocess_tokens(struct cf_preprocessor *pp,
  961. bool if_block, struct cf_token **p_cur_token)
  962. {
  963. bool newline = true;
  964. bool preprocessor_line = if_block;
  965. struct cf_token *cur_token = *p_cur_token;
  966. while (cur_token->type != CFTOKEN_NONE) {
  967. if(cur_token->type != CFTOKEN_SPACETAB &&
  968. cur_token->type != CFTOKEN_NEWLINE) {
  969. if (preprocessor_line) {
  970. cf_adderror_expected_newline(pp, cur_token);
  971. if (!go_to_newline(&cur_token))
  972. break;
  973. }
  974. if (newline && *cur_token->str.array == '#') {
  975. next_token(&cur_token, true);
  976. preprocessor_line = true;
  977. if (!cf_preprocessor(pp, if_block, &cur_token))
  978. break;
  979. continue;
  980. }
  981. newline = false;
  982. }
  983. if (cur_token->type == CFTOKEN_NEWLINE) {
  984. newline = true;
  985. preprocessor_line = false;
  986. } else if (cur_token->type == CFTOKEN_NONE) {
  987. break;
  988. }
  989. cf_preprocess_addtoken(pp, &pp->tokens.da, &cur_token, NULL,
  990. NULL);
  991. }
  992. *p_cur_token = cur_token;
  993. }
  994. void cf_preprocessor_init(struct cf_preprocessor *pp)
  995. {
  996. da_init(pp->defines);
  997. da_init(pp->sys_include_dirs);
  998. da_init(pp->dependencies);
  999. da_init(pp->tokens);
  1000. pp->lex = NULL;
  1001. pp->ed = NULL;
  1002. pp->ignore_state = false;
  1003. }
  1004. void cf_preprocessor_free(struct cf_preprocessor *pp)
  1005. {
  1006. struct cf_lexer *dependencies = pp->dependencies.array;
  1007. char **sys_include_dirs = pp->sys_include_dirs.array;
  1008. struct cf_def *defs = pp->defines.array;
  1009. size_t i;
  1010. for (i = 0; i <pp->defines.num; i++)
  1011. cf_def_free(defs+i);
  1012. for (i = 0; i < pp->sys_include_dirs.num; i++)
  1013. bfree(sys_include_dirs[i]);
  1014. for (i = 0; i < pp->dependencies.num; i++)
  1015. cf_lexer_free(dependencies+i);
  1016. da_free(pp->defines);
  1017. da_free(pp->sys_include_dirs);
  1018. da_free(pp->dependencies);
  1019. da_free(pp->tokens);
  1020. pp->lex = NULL;
  1021. pp->ed = NULL;
  1022. pp->ignore_state = false;
  1023. }
  1024. bool cf_preprocess(struct cf_preprocessor *pp, struct cf_lexer *lex,
  1025. struct error_data *ed)
  1026. {
  1027. struct cf_token *token = cf_lexer_get_tokens(lex);
  1028. if (!token)
  1029. return false;
  1030. pp->ed = ed;
  1031. pp->lex = lex;
  1032. cf_preprocess_tokens(pp, false, &token);
  1033. da_push_back(pp->tokens, token);
  1034. return !lex->unexpected_eof;
  1035. }
  1036. void cf_preprocessor_add_def(struct cf_preprocessor *pp, struct cf_def *def)
  1037. {
  1038. struct cf_def *existing = cf_preprocess_get_def(pp, &def->name.str);
  1039. if (existing) {
  1040. struct dstr name;
  1041. dstr_init_copy_strref(&name, &def->name.str);
  1042. cf_addwarning(pp, &def->name, "Token $1 already defined",
  1043. name.array, NULL, NULL);
  1044. cf_addwarning(pp, &existing->name,
  1045. "Previous definition of $1 is here",
  1046. name.array, NULL, NULL);
  1047. cf_def_free(existing);
  1048. memcpy(existing, def, sizeof(struct cf_def));
  1049. } else {
  1050. da_push_back(pp->defines, def);
  1051. }
  1052. }
  1053. void cf_preprocessor_remove_def(struct cf_preprocessor *pp,
  1054. const char *def_name)
  1055. {
  1056. struct strref ref;
  1057. ref.array = def_name;
  1058. ref.len = strlen(def_name);
  1059. cf_preprocess_remove_def_strref(pp, &ref);
  1060. }