cf-lexer.c 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329
  1. /*
  2. * Copyright (c) 2013 Hugh Bailey <[email protected]>
  3. *
  4. * Permission to use, copy, modify, and distribute this software for any
  5. * purpose with or without fee is hereby granted, provided that the above
  6. * copyright notice and this permission notice appear in all copies.
  7. *
  8. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  10. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  11. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  12. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  13. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  14. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  15. */
  16. #include <ctype.h>
  17. #include <stdio.h>
  18. #include "platform.h"
  19. #include "cf-lexer.h"
  20. static inline void cf_convert_from_escape_literal(char **p_dst,
  21. const char **p_src)
  22. {
  23. char *dst = *p_dst;
  24. const char *src = *p_src;
  25. switch (*(src++)) {
  26. case '\'': *(dst++) = '\''; break;
  27. case '\"': *(dst++) = '\"'; break;
  28. case '\?': *(dst++) = '\?'; break;
  29. case '\\': *(dst++) = '\\'; break;
  30. case '0': *(dst++) = '\0'; break;
  31. case 'a': *(dst++) = '\a'; break;
  32. case 'b': *(dst++) = '\b'; break;
  33. case 'f': *(dst++) = '\f'; break;
  34. case 'n': *(dst++) = '\n'; break;
  35. case 'r': *(dst++) = '\r'; break;
  36. case 't': *(dst++) = '\t'; break;
  37. case 'v': *(dst++) = '\v'; break;
  38. /* hex */
  39. case 'X':
  40. case 'x':
  41. *(dst++) = (char)strtoul(src, NULL, 16);
  42. src += 2;
  43. break;
  44. /* oct */
  45. default:
  46. if (isdigit(*src)) {
  47. *(dst++) = (char)strtoul(src, NULL, 8);
  48. src += 3;
  49. }
  50. /* case 'u':
  51. case 'U': */
  52. }
  53. *p_dst = dst;
  54. *p_src = src;
  55. }
  56. char *cf_literal_to_str(const char *literal, size_t count)
  57. {
  58. const char *temp_src;
  59. char *str, *temp_dst;
  60. if (!count)
  61. count = strlen(literal);
  62. if (count < 2)
  63. return NULL;
  64. if (literal[0] != literal[count-1])
  65. return NULL;
  66. if (literal[0] != '\"' && literal[0] != '\'')
  67. return NULL;
  68. str = bmalloc(count - 1);
  69. temp_src = literal;
  70. temp_dst = str;
  71. while (*temp_src) {
  72. if (*temp_src == '\\') {
  73. temp_src++;
  74. cf_convert_from_escape_literal(&temp_dst, &temp_src);
  75. } else {
  76. *(temp_dst++) = *(temp_src++);
  77. }
  78. }
  79. *temp_dst = 0;
  80. return str;
  81. }
  82. static bool cf_is_token_break(struct base_token *start_token,
  83. const struct base_token *token)
  84. {
  85. switch (start_token->type) {
  86. case BASETOKEN_ALPHA:
  87. if (token->type == BASETOKEN_OTHER ||
  88. token->type == BASETOKEN_WHITESPACE)
  89. return true;
  90. break;
  91. case BASETOKEN_DIGIT:
  92. if (token->type == BASETOKEN_WHITESPACE
  93. || (token->type == BASETOKEN_OTHER
  94. && *token->text.array != '.'))
  95. return true;
  96. break;
  97. case BASETOKEN_WHITESPACE:
  98. /* lump all non-newline whitespace together when possible */
  99. if (is_space_or_tab(*start_token->text.array) &&
  100. is_space_or_tab(*token->text.array))
  101. break;
  102. return true;
  103. case BASETOKEN_OTHER:
  104. if (*start_token->text.array == '.' &&
  105. token->type == BASETOKEN_DIGIT) {
  106. start_token->type = BASETOKEN_DIGIT;
  107. break;
  108. }
  109. /* Falls through. */
  110. case BASETOKEN_NONE:
  111. return true;
  112. }
  113. return false;
  114. }
  115. static inline bool cf_is_splice(const char *array)
  116. {
  117. return (*array == '\\' && is_newline(array[1]));
  118. }
  119. static inline void cf_pass_any_splices(const char **parray)
  120. {
  121. while (cf_is_splice(*parray))
  122. *parray += 1 + newline_size((*parray)+1);
  123. }
  124. static inline bool cf_is_comment(const char *array)
  125. {
  126. const char *offset = array;
  127. if (*offset++ == '/') {
  128. cf_pass_any_splices(&offset);
  129. return (*offset == '*' || *offset == '/');
  130. }
  131. return false;
  132. }
  133. static bool cf_lexer_process_comment(struct cf_lexer *lex,
  134. struct cf_token *out_token)
  135. {
  136. const char *offset;
  137. if (!cf_is_comment(out_token->unmerged_str.array))
  138. return false;
  139. offset = lex->base_lexer.offset;
  140. cf_pass_any_splices(&offset);
  141. strcpy(lex->write_offset++, " ");
  142. out_token->str.len = 1;
  143. if (*offset == '/') {
  144. while (*++offset && !is_newline(*offset))
  145. cf_pass_any_splices(&offset);
  146. } else if (*offset == '*') {
  147. bool was_star = false;
  148. lex->unexpected_eof = true;
  149. while (*++offset) {
  150. cf_pass_any_splices(&offset);
  151. if (was_star && *offset == '/') {
  152. offset++;
  153. lex->unexpected_eof = false;
  154. break;
  155. } else {
  156. was_star = (*offset == '*');
  157. }
  158. }
  159. }
  160. out_token->unmerged_str.len +=
  161. (size_t)(offset - out_token->unmerged_str.array);
  162. out_token->type = CFTOKEN_SPACETAB;
  163. lex->base_lexer.offset = offset;
  164. return true;
  165. }
  166. static inline void cf_lexer_write_strref(struct cf_lexer *lex,
  167. const struct strref *ref)
  168. {
  169. strncpy(lex->write_offset, ref->array, ref->len);
  170. lex->write_offset[ref->len] = 0;
  171. lex->write_offset += ref->len;
  172. }
  173. static bool cf_lexer_is_include(struct cf_lexer *lex)
  174. {
  175. bool found_include_import = false;
  176. bool found_preprocessor = false;
  177. size_t i;
  178. for (i = lex->tokens.num; i > 0; i--) {
  179. struct cf_token *token = lex->tokens.array+(i-1);
  180. if (is_space_or_tab(*token->str.array))
  181. continue;
  182. if (!found_include_import) {
  183. if (strref_cmp(&token->str, "include") != 0 &&
  184. strref_cmp(&token->str, "import") != 0)
  185. break;
  186. found_include_import = true;
  187. } else if (!found_preprocessor) {
  188. if (*token->str.array != '#')
  189. break;
  190. found_preprocessor = true;
  191. } else {
  192. return is_newline(*token->str.array);
  193. }
  194. }
  195. /* if starting line */
  196. return found_preprocessor && found_include_import;
  197. }
  198. static void cf_lexer_getstrtoken(struct cf_lexer *lex,
  199. struct cf_token *out_token, char delimiter,
  200. bool allow_escaped_delimiters)
  201. {
  202. const char *offset = lex->base_lexer.offset;
  203. bool escaped = false;
  204. out_token->unmerged_str.len++;
  205. out_token->str.len++;
  206. cf_lexer_write_strref(lex, &out_token->unmerged_str);
  207. while (*offset) {
  208. cf_pass_any_splices(&offset);
  209. if (*offset == delimiter) {
  210. if (!escaped) {
  211. *lex->write_offset++ = *offset;
  212. out_token->str.len++;
  213. offset++;
  214. break;
  215. }
  216. } else if (is_newline(*offset)) {
  217. break;
  218. }
  219. *lex->write_offset++ = *offset;
  220. out_token->str.len++;
  221. escaped = (allow_escaped_delimiters && *offset == '\\');
  222. offset++;
  223. }
  224. *lex->write_offset = 0;
  225. out_token->unmerged_str.len +=
  226. (size_t)(offset - out_token->unmerged_str.array);
  227. out_token->type = CFTOKEN_STRING;
  228. lex->base_lexer.offset = offset;
  229. }
  230. static bool cf_lexer_process_string(struct cf_lexer *lex,
  231. struct cf_token *out_token)
  232. {
  233. char ch = *out_token->unmerged_str.array;
  234. if (ch == '<' && cf_lexer_is_include(lex)) {
  235. cf_lexer_getstrtoken(lex, out_token, '>', false);
  236. return true;
  237. } else if (ch == '"' || ch == '\'') {
  238. cf_lexer_getstrtoken(lex, out_token, ch,
  239. !cf_lexer_is_include(lex));
  240. return true;
  241. }
  242. return false;
  243. }
  244. static inline enum cf_token_type cf_get_token_type(const struct cf_token *token,
  245. const struct base_token *start_token)
  246. {
  247. switch (start_token->type) {
  248. case BASETOKEN_ALPHA:
  249. return CFTOKEN_NAME;
  250. case BASETOKEN_DIGIT:
  251. return CFTOKEN_NUM;
  252. case BASETOKEN_WHITESPACE:
  253. if (is_newline(*token->str.array))
  254. return CFTOKEN_NEWLINE;
  255. else
  256. return CFTOKEN_SPACETAB;
  257. case BASETOKEN_NONE:
  258. case BASETOKEN_OTHER:
  259. break;
  260. }
  261. return CFTOKEN_OTHER;
  262. }
  263. static bool cf_lexer_nexttoken(struct cf_lexer *lex, struct cf_token *out_token)
  264. {
  265. struct base_token token, start_token;
  266. bool wrote_data = false;
  267. base_token_clear(&token);
  268. base_token_clear(&start_token);
  269. cf_token_clear(out_token);
  270. while (lexer_getbasetoken(&lex->base_lexer, &token, PARSE_WHITESPACE)) {
  271. /* reclassify underscore as alpha for alnum tokens */
  272. if (*token.text.array == '_')
  273. token.type = BASETOKEN_ALPHA;
  274. /* ignore escaped newlines to merge spliced lines */
  275. if (cf_is_splice(token.text.array)) {
  276. lex->base_lexer.offset +=
  277. newline_size(token.text.array+1);
  278. continue;
  279. }
  280. if (!wrote_data) {
  281. out_token->unmerged_str.array = token.text.array;
  282. out_token->str.array = lex->write_offset;
  283. /* if comment then output a space */
  284. if (cf_lexer_process_comment(lex, out_token))
  285. return true;
  286. /* process string tokens if any */
  287. if (cf_lexer_process_string(lex, out_token))
  288. return true;
  289. base_token_copy(&start_token, &token);
  290. wrote_data = true;
  291. } else if (cf_is_token_break(&start_token, &token)) {
  292. lex->base_lexer.offset -= token.text.len;
  293. break;
  294. }
  295. /* write token to CF lexer to account for splicing/comments */
  296. cf_lexer_write_strref(lex, &token.text);
  297. out_token->str.len += token.text.len;
  298. }
  299. if (wrote_data) {
  300. out_token->unmerged_str.len = (size_t)(lex->base_lexer.offset -
  301. out_token->unmerged_str.array);
  302. out_token->type = cf_get_token_type(out_token, &start_token);
  303. }
  304. return wrote_data;
  305. }
  306. void cf_lexer_init(struct cf_lexer *lex)
  307. {
  308. lexer_init(&lex->base_lexer);
  309. da_init(lex->tokens);
  310. lex->file = NULL;
  311. lex->reformatted = NULL;
  312. lex->write_offset = NULL;
  313. lex->unexpected_eof = false;
  314. }
  315. void cf_lexer_free(struct cf_lexer *lex)
  316. {
  317. bfree(lex->file);
  318. bfree(lex->reformatted);
  319. lexer_free(&lex->base_lexer);
  320. da_free(lex->tokens);
  321. lex->file = NULL;
  322. lex->reformatted = NULL;
  323. lex->write_offset = NULL;
  324. lex->unexpected_eof = false;
  325. }
  326. bool cf_lexer_lex(struct cf_lexer *lex, const char *str, const char *file)
  327. {
  328. struct cf_token token;
  329. struct cf_token *last_token = NULL;
  330. cf_lexer_free(lex);
  331. if (!str || !*str)
  332. return false;
  333. if (file)
  334. lex->file = bstrdup(file);
  335. lexer_start(&lex->base_lexer, str);
  336. cf_token_clear(&token);
  337. lex->reformatted = bmalloc(strlen(str) + 1);
  338. lex->reformatted[0] = 0;
  339. lex->write_offset = lex->reformatted;
  340. while (cf_lexer_nexttoken(lex, &token)) {
  341. if (last_token &&
  342. is_space_or_tab(*last_token->str.array) &&
  343. is_space_or_tab(*token.str.array)) {
  344. cf_token_add(last_token, &token);
  345. continue;
  346. }
  347. token.lex = lex;
  348. last_token = da_push_back_new(lex->tokens);
  349. memcpy(last_token, &token, sizeof(struct cf_token));
  350. }
  351. cf_token_clear(&token);
  352. token.str.array = lex->write_offset;
  353. token.unmerged_str.array = lex->base_lexer.offset;
  354. token.lex = lex;
  355. da_push_back(lex->tokens, &token);
  356. return !lex->unexpected_eof;
  357. }
  358. /* ------------------------------------------------------------------------- */
  359. struct macro_param {
  360. struct cf_token name;
  361. DARRAY(struct cf_token) tokens;
  362. };
  363. static inline void macro_param_init(struct macro_param *param)
  364. {
  365. cf_token_clear(&param->name);
  366. da_init(param->tokens);
  367. }
  368. static inline void macro_param_free(struct macro_param *param)
  369. {
  370. cf_token_clear(&param->name);
  371. da_free(param->tokens);
  372. }
  373. /* ------------------------------------------------------------------------- */
  374. struct macro_params {
  375. DARRAY(struct macro_param) params;
  376. };
  377. static inline void macro_params_init(struct macro_params *params)
  378. {
  379. da_init(params->params);
  380. }
  381. static inline void macro_params_free(struct macro_params *params)
  382. {
  383. size_t i;
  384. for (i = 0; i < params->params.num; i++)
  385. macro_param_free(params->params.array+i);
  386. da_free(params->params);
  387. }
  388. static inline struct macro_param *get_macro_param(
  389. const struct macro_params *params,
  390. const struct strref *name)
  391. {
  392. size_t i;
  393. if (!params)
  394. return NULL;
  395. for (i = 0; i < params->params.num; i++) {
  396. struct macro_param *param = params->params.array+i;
  397. if (strref_cmp_strref(&param->name.str, name) == 0)
  398. return param;
  399. }
  400. return NULL;
  401. }
  402. /* ------------------------------------------------------------------------- */
  403. static bool cf_preprocessor(struct cf_preprocessor *pp,
  404. bool if_block, struct cf_token **p_cur_token);
  405. static void cf_preprocess_tokens(struct cf_preprocessor *pp,
  406. bool if_block, struct cf_token **p_cur_token);
  407. static inline bool go_to_newline(struct cf_token **p_cur_token)
  408. {
  409. struct cf_token *cur_token = *p_cur_token;
  410. while (cur_token->type != CFTOKEN_NEWLINE &&
  411. cur_token->type != CFTOKEN_NONE)
  412. cur_token++;
  413. *p_cur_token = cur_token;
  414. return cur_token->type != CFTOKEN_NONE;
  415. }
  416. static inline bool next_token(struct cf_token **p_cur_token, bool preprocessor)
  417. {
  418. struct cf_token *cur_token = *p_cur_token;
  419. if (cur_token->type != CFTOKEN_NONE)
  420. cur_token++;
  421. /* if preprocessor, stop at newline */
  422. while (cur_token->type == CFTOKEN_SPACETAB &&
  423. (preprocessor || cur_token->type == CFTOKEN_NEWLINE))
  424. cur_token++;
  425. *p_cur_token = cur_token;
  426. return cur_token->type != CFTOKEN_NONE;
  427. }
  428. static inline void cf_gettokenoffset(struct cf_preprocessor *pp,
  429. const struct cf_token *token, uint32_t *row, uint32_t *col)
  430. {
  431. lexer_getstroffset(&pp->lex->base_lexer,
  432. token->unmerged_str.array, row, col);
  433. }
  434. static void cf_addew(struct cf_preprocessor *pp, const struct cf_token *token,
  435. const char *message, int error_level,
  436. const char *val1, const char *val2, const char *val3)
  437. {
  438. uint32_t row, col;
  439. cf_gettokenoffset(pp, token, &row, &col);
  440. if (!val1 && !val2 && !val3) {
  441. error_data_add(pp->ed, token->lex->file, row, col,
  442. message, error_level);
  443. } else {
  444. struct dstr formatted;
  445. dstr_init(&formatted);
  446. dstr_safe_printf(&formatted, message, val1, val2, val3, NULL);
  447. error_data_add(pp->ed, token->lex->file, row, col,
  448. formatted.array, error_level);
  449. dstr_free(&formatted);
  450. }
  451. }
  452. static inline void cf_adderror(struct cf_preprocessor *pp,
  453. const struct cf_token *token, const char *error,
  454. const char *val1, const char *val2, const char *val3)
  455. {
  456. cf_addew(pp, token, error, LEX_ERROR, val1, val2, val3);
  457. }
  458. static inline void cf_addwarning(struct cf_preprocessor *pp,
  459. const struct cf_token *token, const char *warning,
  460. const char *val1, const char *val2, const char *val3)
  461. {
  462. cf_addew(pp, token, warning, LEX_WARNING, val1, val2, val3);
  463. }
  464. static inline void cf_adderror_expecting(struct cf_preprocessor *pp,
  465. const struct cf_token *token, const char *expecting)
  466. {
  467. cf_adderror(pp, token, "Expected $1", expecting,
  468. NULL, NULL);
  469. }
  470. static inline void cf_adderror_expected_newline(struct cf_preprocessor *pp,
  471. const struct cf_token *token)
  472. {
  473. cf_adderror(pp, token,
  474. "Unexpected token after preprocessor, expected "
  475. "newline",
  476. NULL, NULL, NULL);
  477. }
  478. static inline void cf_adderror_unexpected_endif_eof(struct cf_preprocessor *pp,
  479. const struct cf_token *token)
  480. {
  481. cf_adderror(pp, token, "Unexpected end of file before #endif",
  482. NULL, NULL, NULL);
  483. }
  484. static inline void cf_adderror_unexpected_eof(struct cf_preprocessor *pp,
  485. const struct cf_token *token)
  486. {
  487. cf_adderror(pp, token, "Unexpected end of file",
  488. NULL, NULL, NULL);
  489. }
  490. static inline void insert_path(struct cf_preprocessor *pp,
  491. struct dstr *str_file)
  492. {
  493. const char *file;
  494. const char *slash;
  495. if (pp && pp->lex && pp->lex->file) {
  496. file = pp->lex->file;
  497. slash = strrchr(file, '/');
  498. if (slash) {
  499. struct dstr path = {0};
  500. dstr_ncopy(&path, file, slash - file + 1);
  501. dstr_insert_dstr(str_file, 0, &path);
  502. dstr_free(&path);
  503. }
  504. }
  505. }
  506. static void cf_include_file(struct cf_preprocessor *pp,
  507. const struct cf_token *file_token)
  508. {
  509. struct cf_lexer new_lex;
  510. struct dstr str_file;
  511. FILE *file;
  512. char *file_data;
  513. struct cf_token *tokens;
  514. size_t i;
  515. dstr_init(&str_file);
  516. dstr_copy_strref(&str_file, &file_token->str);
  517. dstr_mid(&str_file, &str_file, 1, str_file.len-2);
  518. insert_path(pp, &str_file);
  519. /* if dependency already exists, run preprocessor on it */
  520. for (i = 0; i < pp->dependencies.num; i++) {
  521. struct cf_lexer *dep = pp->dependencies.array+i;
  522. if (strcmp(dep->file, str_file.array) == 0) {
  523. tokens = cf_lexer_get_tokens(dep);
  524. cf_preprocess_tokens(pp, false, &tokens);
  525. goto exit;
  526. }
  527. }
  528. file = os_fopen(str_file.array, "rb");
  529. if (!file) {
  530. cf_adderror(pp, file_token, "Could not open file '$1'",
  531. file_token->str.array, NULL, NULL);
  532. goto exit;
  533. }
  534. os_fread_utf8(file, &file_data);
  535. fclose(file);
  536. cf_lexer_init(&new_lex);
  537. cf_lexer_lex(&new_lex, file_data, str_file.array);
  538. tokens = cf_lexer_get_tokens(&new_lex);
  539. cf_preprocess_tokens(pp, false, &tokens);
  540. bfree(file_data);
  541. da_push_back(pp->dependencies, &new_lex);
  542. exit:
  543. dstr_free(&str_file);
  544. }
  545. static inline bool is_sys_include(struct strref *ref)
  546. {
  547. return ref->len >= 2 &&
  548. ref->array[0] == '<' && ref->array[ref->len-1] == '>';
  549. }
  550. static inline bool is_loc_include(struct strref *ref)
  551. {
  552. return ref->len >= 2 &&
  553. ref->array[0] == '"' && ref->array[ref->len-1] == '"';
  554. }
  555. static void cf_preprocess_include(struct cf_preprocessor *pp,
  556. struct cf_token **p_cur_token)
  557. {
  558. struct cf_token *cur_token = *p_cur_token;
  559. if (pp->ignore_state) {
  560. go_to_newline(p_cur_token);
  561. return;
  562. }
  563. next_token(&cur_token, true);
  564. if (cur_token->type != CFTOKEN_STRING) {
  565. cf_adderror_expecting(pp, cur_token, "string");
  566. go_to_newline(&cur_token);
  567. goto exit;
  568. }
  569. if (is_sys_include(&cur_token->str)) {
  570. /* TODO */
  571. } else if (is_loc_include(&cur_token->str)) {
  572. if (!pp->ignore_state)
  573. cf_include_file(pp, cur_token);
  574. } else {
  575. cf_adderror(pp, cur_token, "Invalid or incomplete string",
  576. NULL, NULL, NULL);
  577. go_to_newline(&cur_token);
  578. goto exit;
  579. }
  580. cur_token++;
  581. exit:
  582. *p_cur_token = cur_token;
  583. }
  584. static bool cf_preprocess_macro_params(struct cf_preprocessor *pp,
  585. struct cf_def *def, struct cf_token **p_cur_token)
  586. {
  587. struct cf_token *cur_token = *p_cur_token;
  588. bool success = false;
  589. def->macro = true;
  590. do {
  591. next_token(&cur_token, true);
  592. if (cur_token->type != CFTOKEN_NAME) {
  593. cf_adderror_expecting(pp, cur_token, "identifier");
  594. go_to_newline(&cur_token);
  595. goto exit;
  596. }
  597. cf_def_addparam(def, cur_token);
  598. next_token(&cur_token, true);
  599. if (cur_token->type != CFTOKEN_OTHER
  600. || (*cur_token->str.array != ','
  601. && *cur_token->str.array != ')')) {
  602. cf_adderror_expecting(pp, cur_token, "',' or ')'");
  603. go_to_newline(&cur_token);
  604. goto exit;
  605. }
  606. } while (*cur_token->str.array != ')');
  607. /* ended properly, now go to first define token (or newline) */
  608. next_token(&cur_token, true);
  609. success = true;
  610. exit:
  611. *p_cur_token = cur_token;
  612. return success;
  613. }
  614. #define INVALID_INDEX ((size_t)-1)
  615. static inline size_t cf_preprocess_get_def_idx(struct cf_preprocessor *pp,
  616. const struct strref *def_name)
  617. {
  618. struct cf_def *array = pp->defines.array;
  619. size_t i;
  620. for (i = 0; i < pp->defines.num; i++) {
  621. struct cf_def *cur_def = array+i;
  622. if (strref_cmp_strref(&cur_def->name.str, def_name) == 0)
  623. return i;
  624. }
  625. return INVALID_INDEX;
  626. }
  627. static inline struct cf_def *cf_preprocess_get_def(struct cf_preprocessor *pp,
  628. const struct strref *def_name)
  629. {
  630. size_t idx = cf_preprocess_get_def_idx(pp, def_name);
  631. if (idx == INVALID_INDEX)
  632. return NULL;
  633. return pp->defines.array+idx;
  634. }
  635. static char space_filler[2] = " ";
  636. static inline void append_space(struct cf_preprocessor *pp,
  637. struct darray *tokens, const struct cf_token *base)
  638. {
  639. struct cf_token token;
  640. strref_set(&token.str, space_filler, 1);
  641. token.type = CFTOKEN_SPACETAB;
  642. if (base) {
  643. token.lex = base->lex;
  644. strref_copy(&token.unmerged_str, &base->unmerged_str);
  645. } else {
  646. token.lex = pp->lex;
  647. strref_copy(&token.unmerged_str, &token.str);
  648. }
  649. darray_push_back(sizeof(struct cf_token), tokens, &token);
  650. }
  651. static inline void append_end_token(struct darray *tokens)
  652. {
  653. struct cf_token end;
  654. cf_token_clear(&end);
  655. darray_push_back(sizeof(struct cf_token), tokens, &end);
  656. }
  657. static void cf_preprocess_define(struct cf_preprocessor *pp,
  658. struct cf_token **p_cur_token)
  659. {
  660. struct cf_token *cur_token = *p_cur_token;
  661. struct cf_def def;
  662. if (pp->ignore_state) {
  663. go_to_newline(p_cur_token);
  664. return;
  665. }
  666. cf_def_init(&def);
  667. next_token(&cur_token, true);
  668. if (cur_token->type != CFTOKEN_NAME) {
  669. cf_adderror_expecting(pp, cur_token, "identifier");
  670. go_to_newline(&cur_token);
  671. goto exit;
  672. }
  673. append_space(pp, &def.tokens.da, NULL);
  674. cf_token_copy(&def.name, cur_token);
  675. if (!next_token(&cur_token, true))
  676. goto complete;
  677. /* process macro */
  678. if (*cur_token->str.array == '(') {
  679. if (!cf_preprocess_macro_params(pp, &def, &cur_token))
  680. goto error;
  681. }
  682. while (cur_token->type != CFTOKEN_NEWLINE &&
  683. cur_token->type != CFTOKEN_NONE)
  684. cf_def_addtoken(&def, cur_token++);
  685. complete:
  686. append_end_token(&def.tokens.da);
  687. append_space(pp, &def.tokens.da, NULL);
  688. da_push_back(pp->defines, &def);
  689. goto exit;
  690. error:
  691. cf_def_free(&def);
  692. exit:
  693. *p_cur_token = cur_token;
  694. }
  695. static inline void cf_preprocess_remove_def_strref(struct cf_preprocessor *pp,
  696. const struct strref *ref)
  697. {
  698. size_t def_idx = cf_preprocess_get_def_idx(pp, ref);
  699. if (def_idx != INVALID_INDEX) {
  700. struct cf_def *array = pp->defines.array;
  701. cf_def_free(array+def_idx);
  702. da_erase(pp->defines, def_idx);
  703. }
  704. }
  705. static void cf_preprocess_undef(struct cf_preprocessor *pp,
  706. struct cf_token **p_cur_token)
  707. {
  708. struct cf_token *cur_token = *p_cur_token;
  709. if (pp->ignore_state) {
  710. go_to_newline(p_cur_token);
  711. return;
  712. }
  713. next_token(&cur_token, true);
  714. if (cur_token->type != CFTOKEN_NAME) {
  715. cf_adderror_expecting(pp, cur_token, "identifier");
  716. go_to_newline(&cur_token);
  717. goto exit;
  718. }
  719. cf_preprocess_remove_def_strref(pp, &cur_token->str);
  720. cur_token++;
  721. exit:
  722. *p_cur_token = cur_token;
  723. }
  724. /* Processes an #ifdef/#ifndef/#if/#else/#elif sub block recursively */
  725. static inline bool cf_preprocess_subblock(struct cf_preprocessor *pp,
  726. bool ignore, struct cf_token **p_cur_token)
  727. {
  728. bool eof;
  729. if (!next_token(p_cur_token, true))
  730. return false;
  731. if (!pp->ignore_state) {
  732. pp->ignore_state = ignore;
  733. cf_preprocess_tokens(pp, true, p_cur_token);
  734. pp->ignore_state = false;
  735. } else {
  736. cf_preprocess_tokens(pp, true, p_cur_token);
  737. }
  738. eof = ((*p_cur_token)->type == CFTOKEN_NONE);
  739. if (eof)
  740. cf_adderror_unexpected_endif_eof(pp, *p_cur_token);
  741. return !eof;
  742. }
  743. static void cf_preprocess_ifdef(struct cf_preprocessor *pp,
  744. bool ifnot, struct cf_token **p_cur_token)
  745. {
  746. struct cf_token *cur_token = *p_cur_token;
  747. struct cf_def *def;
  748. bool is_true;
  749. next_token(&cur_token, true);
  750. if (cur_token->type != CFTOKEN_NAME) {
  751. cf_adderror_expecting(pp, cur_token, "identifier");
  752. go_to_newline(&cur_token);
  753. goto exit;
  754. }
  755. def = cf_preprocess_get_def(pp, &cur_token->str);
  756. is_true = (def == NULL) == ifnot;
  757. if (!cf_preprocess_subblock(pp, !is_true, &cur_token))
  758. goto exit;
  759. if (strref_cmp(&cur_token->str, "else") == 0) {
  760. if (!cf_preprocess_subblock(pp, is_true, &cur_token))
  761. goto exit;
  762. /*} else if (strref_cmp(&cur_token->str, "elif") == 0) {*/
  763. }
  764. cur_token++;
  765. exit:
  766. *p_cur_token = cur_token;
  767. }
  768. static bool cf_preprocessor(struct cf_preprocessor *pp,
  769. bool if_block, struct cf_token **p_cur_token)
  770. {
  771. struct cf_token *cur_token = *p_cur_token;
  772. if (strref_cmp(&cur_token->str, "include") == 0) {
  773. cf_preprocess_include(pp, p_cur_token);
  774. } else if (strref_cmp(&cur_token->str, "define") == 0) {
  775. cf_preprocess_define(pp, p_cur_token);
  776. } else if (strref_cmp(&cur_token->str, "undef") == 0) {
  777. cf_preprocess_undef(pp, p_cur_token);
  778. } else if (strref_cmp(&cur_token->str, "ifdef") == 0) {
  779. cf_preprocess_ifdef(pp, false, p_cur_token);
  780. } else if (strref_cmp(&cur_token->str, "ifndef") == 0) {
  781. cf_preprocess_ifdef(pp, true, p_cur_token);
  782. /*} else if (strref_cmp(&cur_token->str, "if") == 0) {
  783. TODO;*/
  784. } else if (strref_cmp(&cur_token->str, "else") == 0 ||
  785. /*strref_cmp(&cur_token->str, "elif") == 0 ||*/
  786. strref_cmp(&cur_token->str, "endif") == 0) {
  787. if (!if_block) {
  788. struct dstr name;
  789. dstr_init_copy_strref(&name, &cur_token->str);
  790. cf_adderror(pp, cur_token,"#$1 outside of "
  791. "#if/#ifdef/#ifndef block",
  792. name.array, NULL, NULL);
  793. dstr_free(&name);
  794. (*p_cur_token)++;
  795. return true;
  796. }
  797. return false;
  798. } else if (cur_token->type != CFTOKEN_NEWLINE &&
  799. cur_token->type != CFTOKEN_NONE) {
  800. /*
  801. * TODO: language-specific preprocessor stuff should be sent to
  802. * handler of some sort
  803. */
  804. (*p_cur_token)++;
  805. }
  806. return true;
  807. }
  808. static void cf_preprocess_addtoken(struct cf_preprocessor *pp,
  809. struct darray *dst, /* struct cf_token */
  810. struct cf_token **p_cur_token,
  811. const struct cf_token *base,
  812. const struct macro_params *params);
  813. /*
  814. * collects tokens for a macro parameter
  815. *
  816. * note that it is important to make sure that any usage of function calls
  817. * within a macro parameter is preserved, example MACRO(func(1, 2), 3), do not
  818. * let it stop on the comma at "1,"
  819. */
  820. static void cf_preprocess_save_macro_param(struct cf_preprocessor *pp,
  821. struct cf_token **p_cur_token, struct macro_param *param,
  822. const struct cf_token *base,
  823. const struct macro_params *cur_params)
  824. {
  825. struct cf_token *cur_token = *p_cur_token;
  826. int brace_count = 0;
  827. append_space(pp, &param->tokens.da, base);
  828. while (cur_token->type != CFTOKEN_NONE) {
  829. if (*cur_token->str.array == '(') {
  830. brace_count++;
  831. } else if (*cur_token->str.array == ')') {
  832. if (brace_count)
  833. brace_count--;
  834. else
  835. break;
  836. } else if (*cur_token->str.array == ',') {
  837. if (!brace_count)
  838. break;
  839. }
  840. cf_preprocess_addtoken(pp, &param->tokens.da, &cur_token, base,
  841. cur_params);
  842. }
  843. if (cur_token->type == CFTOKEN_NONE)
  844. cf_adderror_unexpected_eof(pp, cur_token);
  845. append_space(pp, &param->tokens.da, base);
  846. append_end_token(&param->tokens.da);
  847. *p_cur_token = cur_token;
  848. }
  849. static inline bool param_is_whitespace(const struct macro_param *param)
  850. {
  851. struct cf_token *array = param->tokens.array;
  852. size_t i;
  853. for (i = 0; i < param->tokens.num; i++)
  854. if (array[i].type != CFTOKEN_NONE &&
  855. array[i].type != CFTOKEN_SPACETAB &&
  856. array[i].type != CFTOKEN_NEWLINE)
  857. return false;
  858. return true;
  859. }
  860. /* collects parameter tokens of a used macro and stores them for the unwrap */
  861. static void cf_preprocess_save_macro_params(struct cf_preprocessor *pp,
  862. struct cf_token **p_cur_token, const struct cf_def *def,
  863. const struct cf_token *base,
  864. const struct macro_params *cur_params,
  865. struct macro_params *dst)
  866. {
  867. struct cf_token *cur_token = *p_cur_token;
  868. size_t count = 0;
  869. next_token(&cur_token, false);
  870. if (cur_token->type != CFTOKEN_OTHER || *cur_token->str.array != '(') {
  871. cf_adderror_expecting(pp, cur_token, "'('");
  872. goto exit;
  873. }
  874. do {
  875. struct macro_param param;
  876. macro_param_init(&param);
  877. cur_token++;
  878. count++;
  879. cf_preprocess_save_macro_param(pp, &cur_token, &param, base,
  880. cur_params);
  881. if (cur_token->type != CFTOKEN_OTHER
  882. || (*cur_token->str.array != ','
  883. && *cur_token->str.array != ')')) {
  884. macro_param_free(&param);
  885. cf_adderror_expecting(pp, cur_token, "',' or ')'");
  886. goto exit;
  887. }
  888. if (param_is_whitespace(&param)) {
  889. /* if 0-param macro, ignore first entry */
  890. if (count == 1 && !def->params.num &&
  891. *cur_token->str.array == ')') {
  892. macro_param_free(&param);
  893. break;
  894. }
  895. }
  896. if (count <= def->params.num) {
  897. cf_token_copy(&param.name,
  898. cf_def_getparam(def, count-1));
  899. da_push_back(dst->params, &param);
  900. } else {
  901. macro_param_free(&param);
  902. }
  903. } while (*cur_token->str.array != ')');
  904. if (count != def->params.num)
  905. cf_adderror(pp, cur_token,
  906. "Mismatching number of macro parameters",
  907. NULL, NULL, NULL);
  908. exit:
  909. *p_cur_token = cur_token;
  910. }
  911. static inline void cf_preprocess_unwrap_param(struct cf_preprocessor *pp,
  912. struct darray *dst, /* struct cf_token */
  913. struct cf_token **p_cur_token,
  914. const struct cf_token *base,
  915. const struct macro_param *param)
  916. {
  917. struct cf_token *cur_token = *p_cur_token;
  918. struct cf_token *cur_param_token = param->tokens.array;
  919. while (cur_param_token->type != CFTOKEN_NONE)
  920. cf_preprocess_addtoken(pp, dst, &cur_param_token, base, NULL);
  921. cur_token++;
  922. *p_cur_token = cur_token;
  923. }
  924. static inline void cf_preprocess_unwrap_define(struct cf_preprocessor *pp,
  925. struct darray *dst, /* struct cf_token */
  926. struct cf_token **p_cur_token,
  927. const struct cf_token *base,
  928. const struct cf_def *def,
  929. const struct macro_params *cur_params)
  930. {
  931. struct cf_token *cur_token = *p_cur_token;
  932. struct macro_params new_params;
  933. struct cf_token *cur_def_token = def->tokens.array;
  934. macro_params_init(&new_params);
  935. if (def->macro)
  936. cf_preprocess_save_macro_params(pp, &cur_token, def, base,
  937. cur_params, &new_params);
  938. while (cur_def_token->type != CFTOKEN_NONE)
  939. cf_preprocess_addtoken(pp, dst, &cur_def_token, base,
  940. &new_params);
  941. macro_params_free(&new_params);
  942. cur_token++;
  943. *p_cur_token = cur_token;
  944. }
  945. static void cf_preprocess_addtoken(struct cf_preprocessor *pp,
  946. struct darray *dst, /* struct cf_token */
  947. struct cf_token **p_cur_token,
  948. const struct cf_token *base,
  949. const struct macro_params *params)
  950. {
  951. struct cf_token *cur_token = *p_cur_token;
  952. if (pp->ignore_state)
  953. goto ignore;
  954. if (!base)
  955. base = cur_token;
  956. if (cur_token->type == CFTOKEN_NAME) {
  957. struct cf_def *def;
  958. struct macro_param *param;
  959. param = get_macro_param(params, &cur_token->str);
  960. if (param) {
  961. cf_preprocess_unwrap_param(pp, dst, &cur_token, base,
  962. param);
  963. goto exit;
  964. }
  965. def = cf_preprocess_get_def(pp, &cur_token->str);
  966. if (def) {
  967. cf_preprocess_unwrap_define(pp, dst, &cur_token, base,
  968. def, params);
  969. goto exit;
  970. }
  971. }
  972. darray_push_back(sizeof(struct cf_token), dst, cur_token);
  973. ignore:
  974. cur_token++;
  975. exit:
  976. *p_cur_token = cur_token;
  977. }
  978. static void cf_preprocess_tokens(struct cf_preprocessor *pp,
  979. bool if_block, struct cf_token **p_cur_token)
  980. {
  981. bool newline = true;
  982. bool preprocessor_line = if_block;
  983. struct cf_token *cur_token = *p_cur_token;
  984. while (cur_token->type != CFTOKEN_NONE) {
  985. if(cur_token->type != CFTOKEN_SPACETAB &&
  986. cur_token->type != CFTOKEN_NEWLINE) {
  987. if (preprocessor_line) {
  988. cf_adderror_expected_newline(pp, cur_token);
  989. if (!go_to_newline(&cur_token))
  990. break;
  991. }
  992. if (newline && *cur_token->str.array == '#') {
  993. next_token(&cur_token, true);
  994. preprocessor_line = true;
  995. if (!cf_preprocessor(pp, if_block, &cur_token))
  996. break;
  997. continue;
  998. }
  999. newline = false;
  1000. }
  1001. if (cur_token->type == CFTOKEN_NEWLINE) {
  1002. newline = true;
  1003. preprocessor_line = false;
  1004. } else if (cur_token->type == CFTOKEN_NONE) {
  1005. break;
  1006. }
  1007. cf_preprocess_addtoken(pp, &pp->tokens.da, &cur_token, NULL,
  1008. NULL);
  1009. }
  1010. *p_cur_token = cur_token;
  1011. }
  1012. void cf_preprocessor_init(struct cf_preprocessor *pp)
  1013. {
  1014. da_init(pp->defines);
  1015. da_init(pp->sys_include_dirs);
  1016. da_init(pp->dependencies);
  1017. da_init(pp->tokens);
  1018. pp->lex = NULL;
  1019. pp->ed = NULL;
  1020. pp->ignore_state = false;
  1021. }
  1022. void cf_preprocessor_free(struct cf_preprocessor *pp)
  1023. {
  1024. struct cf_lexer *dependencies = pp->dependencies.array;
  1025. char **sys_include_dirs = pp->sys_include_dirs.array;
  1026. struct cf_def *defs = pp->defines.array;
  1027. size_t i;
  1028. for (i = 0; i <pp->defines.num; i++)
  1029. cf_def_free(defs+i);
  1030. for (i = 0; i < pp->sys_include_dirs.num; i++)
  1031. bfree(sys_include_dirs[i]);
  1032. for (i = 0; i < pp->dependencies.num; i++)
  1033. cf_lexer_free(dependencies+i);
  1034. da_free(pp->defines);
  1035. da_free(pp->sys_include_dirs);
  1036. da_free(pp->dependencies);
  1037. da_free(pp->tokens);
  1038. pp->lex = NULL;
  1039. pp->ed = NULL;
  1040. pp->ignore_state = false;
  1041. }
  1042. bool cf_preprocess(struct cf_preprocessor *pp, struct cf_lexer *lex,
  1043. struct error_data *ed)
  1044. {
  1045. struct cf_token *token = cf_lexer_get_tokens(lex);
  1046. if (!token)
  1047. return false;
  1048. pp->ed = ed;
  1049. pp->lex = lex;
  1050. cf_preprocess_tokens(pp, false, &token);
  1051. da_push_back(pp->tokens, token);
  1052. return !lex->unexpected_eof;
  1053. }
  1054. void cf_preprocessor_add_def(struct cf_preprocessor *pp, struct cf_def *def)
  1055. {
  1056. struct cf_def *existing = cf_preprocess_get_def(pp, &def->name.str);
  1057. if (existing) {
  1058. struct dstr name;
  1059. dstr_init_copy_strref(&name, &def->name.str);
  1060. cf_addwarning(pp, &def->name, "Token $1 already defined",
  1061. name.array, NULL, NULL);
  1062. cf_addwarning(pp, &existing->name,
  1063. "Previous definition of $1 is here",
  1064. name.array, NULL, NULL);
  1065. cf_def_free(existing);
  1066. memcpy(existing, def, sizeof(struct cf_def));
  1067. } else {
  1068. da_push_back(pp->defines, def);
  1069. }
  1070. }
  1071. void cf_preprocessor_remove_def(struct cf_preprocessor *pp,
  1072. const char *def_name)
  1073. {
  1074. struct strref ref;
  1075. ref.array = def_name;
  1076. ref.len = strlen(def_name);
  1077. cf_preprocess_remove_def_strref(pp, &ref);
  1078. }