cf-lexer.c 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328
  1. /*
  2. * Copyright (c) 2013 Hugh Bailey <[email protected]>
  3. *
  4. * Permission to use, copy, modify, and distribute this software for any
  5. * purpose with or without fee is hereby granted, provided that the above
  6. * copyright notice and this permission notice appear in all copies.
  7. *
  8. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  10. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  11. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  12. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  13. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  14. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  15. */
  16. #include <ctype.h>
  17. #include <stdio.h>
  18. #include "platform.h"
  19. #include "cf-lexer.h"
  20. static inline void cf_convert_from_escape_literal(char **p_dst,
  21. const char **p_src)
  22. {
  23. char *dst = *p_dst;
  24. const char *src = *p_src;
  25. switch (*(src++)) {
  26. case '\'': *(dst++) = '\''; break;
  27. case '\"': *(dst++) = '\"'; break;
  28. case '\?': *(dst++) = '\?'; break;
  29. case '\\': *(dst++) = '\\'; break;
  30. case '0': *(dst++) = '\0'; break;
  31. case 'a': *(dst++) = '\a'; break;
  32. case 'b': *(dst++) = '\b'; break;
  33. case 'f': *(dst++) = '\f'; break;
  34. case 'n': *(dst++) = '\n'; break;
  35. case 'r': *(dst++) = '\r'; break;
  36. case 't': *(dst++) = '\t'; break;
  37. case 'v': *(dst++) = '\v'; break;
  38. /* hex */
  39. case 'X':
  40. case 'x':
  41. *(dst++) = (char)strtoul(src, NULL, 16);
  42. src += 2;
  43. break;
  44. /* oct */
  45. default:
  46. if (isdigit(*src)) {
  47. *(dst++) = (char)strtoul(src, NULL, 8);
  48. src += 3;
  49. }
  50. /* case 'u':
  51. case 'U': */
  52. }
  53. *p_dst = dst;
  54. *p_src = src;
  55. }
  56. char *cf_literal_to_str(const char *literal, size_t count)
  57. {
  58. const char *temp_src;
  59. char *str, *temp_dst;
  60. if (!count)
  61. count = strlen(literal);
  62. if (count < 2)
  63. return NULL;
  64. if (literal[0] != literal[count-1])
  65. return NULL;
  66. if (literal[0] != '\"' && literal[0] != '\'')
  67. return NULL;
  68. str = bmalloc(count - 1);
  69. temp_src = literal;
  70. temp_dst = str;
  71. while (*temp_src) {
  72. if (*temp_src == '\\') {
  73. temp_src++;
  74. cf_convert_from_escape_literal(&temp_dst, &temp_src);
  75. } else {
  76. *(temp_dst++) = *(temp_src++);
  77. }
  78. }
  79. *temp_dst = 0;
  80. return str;
  81. }
  82. static bool cf_is_token_break(struct base_token *start_token,
  83. const struct base_token *token)
  84. {
  85. switch (start_token->type) {
  86. case BASETOKEN_ALPHA:
  87. if (token->type == BASETOKEN_OTHER ||
  88. token->type == BASETOKEN_WHITESPACE)
  89. return true;
  90. break;
  91. case BASETOKEN_DIGIT:
  92. if (token->type == BASETOKEN_WHITESPACE
  93. || (token->type == BASETOKEN_OTHER
  94. && *token->text.array != '.'))
  95. return true;
  96. break;
  97. case BASETOKEN_WHITESPACE:
  98. /* lump all non-newline whitespace together when possible */
  99. if (is_space_or_tab(*start_token->text.array) &&
  100. is_space_or_tab(*token->text.array))
  101. break;
  102. return true;
  103. case BASETOKEN_OTHER:
  104. if (*start_token->text.array == '.' &&
  105. token->type == BASETOKEN_DIGIT) {
  106. start_token->type = BASETOKEN_DIGIT;
  107. break;
  108. }
  109. case BASETOKEN_NONE:
  110. return true;
  111. }
  112. return false;
  113. }
  114. static inline bool cf_is_splice(const char *array)
  115. {
  116. return (*array == '\\' && is_newline(array[1]));
  117. }
  118. static inline void cf_pass_any_splices(const char **parray)
  119. {
  120. while (cf_is_splice(*parray))
  121. *parray += 1 + newline_size((*parray)+1);
  122. }
  123. static inline bool cf_is_comment(const char *array)
  124. {
  125. const char *offset = array;
  126. if (*offset++ == '/') {
  127. cf_pass_any_splices(&offset);
  128. return (*offset == '*' || *offset == '/');
  129. }
  130. return false;
  131. }
  132. static bool cf_lexer_process_comment(struct cf_lexer *lex,
  133. struct cf_token *out_token)
  134. {
  135. const char *offset;
  136. if (!cf_is_comment(out_token->unmerged_str.array))
  137. return false;
  138. offset = lex->base_lexer.offset;
  139. cf_pass_any_splices(&offset);
  140. strcpy(lex->write_offset++, " ");
  141. out_token->str.len = 1;
  142. if (*offset == '/') {
  143. while (*++offset && !is_newline(*offset))
  144. cf_pass_any_splices(&offset);
  145. } else if (*offset == '*') {
  146. bool was_star = false;
  147. lex->unexpected_eof = true;
  148. while (*++offset) {
  149. cf_pass_any_splices(&offset);
  150. if (was_star && *offset == '/') {
  151. offset++;
  152. lex->unexpected_eof = false;
  153. break;
  154. } else {
  155. was_star = (*offset == '*');
  156. }
  157. }
  158. }
  159. out_token->unmerged_str.len +=
  160. (size_t)(offset - out_token->unmerged_str.array);
  161. out_token->type = CFTOKEN_SPACETAB;
  162. lex->base_lexer.offset = offset;
  163. return true;
  164. }
  165. static inline void cf_lexer_write_strref(struct cf_lexer *lex,
  166. const struct strref *ref)
  167. {
  168. strncpy(lex->write_offset, ref->array, ref->len);
  169. lex->write_offset[ref->len] = 0;
  170. lex->write_offset += ref->len;
  171. }
  172. static bool cf_lexer_is_include(struct cf_lexer *lex)
  173. {
  174. bool found_include_import = false;
  175. bool found_preprocessor = false;
  176. size_t i;
  177. for (i = lex->tokens.num; i > 0; i--) {
  178. struct cf_token *token = lex->tokens.array+(i-1);
  179. if (is_space_or_tab(*token->str.array))
  180. continue;
  181. if (!found_include_import) {
  182. if (strref_cmp(&token->str, "include") != 0 &&
  183. strref_cmp(&token->str, "import") != 0)
  184. break;
  185. found_include_import = true;
  186. } else if (!found_preprocessor) {
  187. if (*token->str.array != '#')
  188. break;
  189. found_preprocessor = true;
  190. } else {
  191. return is_newline(*token->str.array);
  192. }
  193. }
  194. /* if starting line */
  195. return found_preprocessor && found_include_import;
  196. }
  197. static void cf_lexer_getstrtoken(struct cf_lexer *lex,
  198. struct cf_token *out_token, char delimiter,
  199. bool allow_escaped_delimiters)
  200. {
  201. const char *offset = lex->base_lexer.offset;
  202. bool escaped = false;
  203. out_token->unmerged_str.len++;
  204. out_token->str.len++;
  205. cf_lexer_write_strref(lex, &out_token->unmerged_str);
  206. while (*offset) {
  207. cf_pass_any_splices(&offset);
  208. if (*offset == delimiter) {
  209. if (!escaped) {
  210. *lex->write_offset++ = *offset;
  211. out_token->str.len++;
  212. offset++;
  213. break;
  214. }
  215. } else if (is_newline(*offset)) {
  216. break;
  217. }
  218. *lex->write_offset++ = *offset;
  219. out_token->str.len++;
  220. escaped = (allow_escaped_delimiters && *offset == '\\');
  221. offset++;
  222. }
  223. *lex->write_offset = 0;
  224. out_token->unmerged_str.len +=
  225. (size_t)(offset - out_token->unmerged_str.array);
  226. out_token->type = CFTOKEN_STRING;
  227. lex->base_lexer.offset = offset;
  228. }
  229. static bool cf_lexer_process_string(struct cf_lexer *lex,
  230. struct cf_token *out_token)
  231. {
  232. char ch = *out_token->unmerged_str.array;
  233. if (ch == '<' && cf_lexer_is_include(lex)) {
  234. cf_lexer_getstrtoken(lex, out_token, '>', false);
  235. return true;
  236. } else if (ch == '"' || ch == '\'') {
  237. cf_lexer_getstrtoken(lex, out_token, ch,
  238. !cf_lexer_is_include(lex));
  239. return true;
  240. }
  241. return false;
  242. }
  243. static inline enum cf_token_type cf_get_token_type(const struct cf_token *token,
  244. const struct base_token *start_token)
  245. {
  246. switch (start_token->type) {
  247. case BASETOKEN_ALPHA:
  248. return CFTOKEN_NAME;
  249. case BASETOKEN_DIGIT:
  250. return CFTOKEN_NUM;
  251. case BASETOKEN_WHITESPACE:
  252. if (is_newline(*token->str.array))
  253. return CFTOKEN_NEWLINE;
  254. else
  255. return CFTOKEN_SPACETAB;
  256. case BASETOKEN_NONE:
  257. case BASETOKEN_OTHER:
  258. break;
  259. }
  260. return CFTOKEN_OTHER;
  261. }
  262. static bool cf_lexer_nexttoken(struct cf_lexer *lex, struct cf_token *out_token)
  263. {
  264. struct base_token token, start_token;
  265. bool wrote_data = false;
  266. base_token_clear(&token);
  267. base_token_clear(&start_token);
  268. cf_token_clear(out_token);
  269. while (lexer_getbasetoken(&lex->base_lexer, &token, PARSE_WHITESPACE)) {
  270. /* reclassify underscore as alpha for alnum tokens */
  271. if (*token.text.array == '_')
  272. token.type = BASETOKEN_ALPHA;
  273. /* ignore escaped newlines to merge spliced lines */
  274. if (cf_is_splice(token.text.array)) {
  275. lex->base_lexer.offset +=
  276. newline_size(token.text.array+1);
  277. continue;
  278. }
  279. if (!wrote_data) {
  280. out_token->unmerged_str.array = token.text.array;
  281. out_token->str.array = lex->write_offset;
  282. /* if comment then output a space */
  283. if (cf_lexer_process_comment(lex, out_token))
  284. return true;
  285. /* process string tokens if any */
  286. if (cf_lexer_process_string(lex, out_token))
  287. return true;
  288. base_token_copy(&start_token, &token);
  289. wrote_data = true;
  290. } else if (cf_is_token_break(&start_token, &token)) {
  291. lex->base_lexer.offset -= token.text.len;
  292. break;
  293. }
  294. /* write token to CF lexer to account for splicing/comments */
  295. cf_lexer_write_strref(lex, &token.text);
  296. out_token->str.len += token.text.len;
  297. }
  298. if (wrote_data) {
  299. out_token->unmerged_str.len = (size_t)(lex->base_lexer.offset -
  300. out_token->unmerged_str.array);
  301. out_token->type = cf_get_token_type(out_token, &start_token);
  302. }
  303. return wrote_data;
  304. }
  305. void cf_lexer_init(struct cf_lexer *lex)
  306. {
  307. lexer_init(&lex->base_lexer);
  308. da_init(lex->tokens);
  309. lex->file = NULL;
  310. lex->reformatted = NULL;
  311. lex->write_offset = NULL;
  312. lex->unexpected_eof = false;
  313. }
  314. void cf_lexer_free(struct cf_lexer *lex)
  315. {
  316. bfree(lex->file);
  317. bfree(lex->reformatted);
  318. lexer_free(&lex->base_lexer);
  319. da_free(lex->tokens);
  320. lex->file = NULL;
  321. lex->reformatted = NULL;
  322. lex->write_offset = NULL;
  323. lex->unexpected_eof = false;
  324. }
  325. bool cf_lexer_lex(struct cf_lexer *lex, const char *str, const char *file)
  326. {
  327. struct cf_token token;
  328. struct cf_token *last_token = NULL;
  329. cf_lexer_free(lex);
  330. if (!str || !*str)
  331. return false;
  332. if (file)
  333. lex->file = bstrdup(file);
  334. lexer_start(&lex->base_lexer, str);
  335. cf_token_clear(&token);
  336. lex->reformatted = bmalloc(strlen(str) + 1);
  337. lex->reformatted[0] = 0;
  338. lex->write_offset = lex->reformatted;
  339. while (cf_lexer_nexttoken(lex, &token)) {
  340. if (last_token &&
  341. is_space_or_tab(*last_token->str.array) &&
  342. is_space_or_tab(*token.str.array)) {
  343. cf_token_add(last_token, &token);
  344. continue;
  345. }
  346. token.lex = lex;
  347. last_token = da_push_back_new(lex->tokens);
  348. memcpy(last_token, &token, sizeof(struct cf_token));
  349. }
  350. cf_token_clear(&token);
  351. token.str.array = lex->write_offset;
  352. token.unmerged_str.array = lex->base_lexer.offset;
  353. token.lex = lex;
  354. da_push_back(lex->tokens, &token);
  355. return !lex->unexpected_eof;
  356. }
  357. /* ------------------------------------------------------------------------- */
  358. struct macro_param {
  359. struct cf_token name;
  360. DARRAY(struct cf_token) tokens;
  361. };
  362. static inline void macro_param_init(struct macro_param *param)
  363. {
  364. cf_token_clear(&param->name);
  365. da_init(param->tokens);
  366. }
  367. static inline void macro_param_free(struct macro_param *param)
  368. {
  369. cf_token_clear(&param->name);
  370. da_free(param->tokens);
  371. }
  372. /* ------------------------------------------------------------------------- */
  373. struct macro_params {
  374. DARRAY(struct macro_param) params;
  375. };
  376. static inline void macro_params_init(struct macro_params *params)
  377. {
  378. da_init(params->params);
  379. }
  380. static inline void macro_params_free(struct macro_params *params)
  381. {
  382. size_t i;
  383. for (i = 0; i < params->params.num; i++)
  384. macro_param_free(params->params.array+i);
  385. da_free(params->params);
  386. }
  387. static inline struct macro_param *get_macro_param(
  388. const struct macro_params *params,
  389. const struct strref *name)
  390. {
  391. size_t i;
  392. if (!params)
  393. return NULL;
  394. for (i = 0; i < params->params.num; i++) {
  395. struct macro_param *param = params->params.array+i;
  396. if (strref_cmp_strref(&param->name.str, name) == 0)
  397. return param;
  398. }
  399. return NULL;
  400. }
  401. /* ------------------------------------------------------------------------- */
  402. static bool cf_preprocessor(struct cf_preprocessor *pp,
  403. bool if_block, struct cf_token **p_cur_token);
  404. static void cf_preprocess_tokens(struct cf_preprocessor *pp,
  405. bool if_block, struct cf_token **p_cur_token);
  406. static inline bool go_to_newline(struct cf_token **p_cur_token)
  407. {
  408. struct cf_token *cur_token = *p_cur_token;
  409. while (cur_token->type != CFTOKEN_NEWLINE &&
  410. cur_token->type != CFTOKEN_NONE)
  411. cur_token++;
  412. *p_cur_token = cur_token;
  413. return cur_token->type != CFTOKEN_NONE;
  414. }
  415. static inline bool next_token(struct cf_token **p_cur_token, bool preprocessor)
  416. {
  417. struct cf_token *cur_token = *p_cur_token;
  418. if (cur_token->type != CFTOKEN_NONE)
  419. cur_token++;
  420. /* if preprocessor, stop at newline */
  421. while (cur_token->type == CFTOKEN_SPACETAB &&
  422. (preprocessor || cur_token->type == CFTOKEN_NEWLINE))
  423. cur_token++;
  424. *p_cur_token = cur_token;
  425. return cur_token->type != CFTOKEN_NONE;
  426. }
  427. static inline void cf_gettokenoffset(struct cf_preprocessor *pp,
  428. const struct cf_token *token, uint32_t *row, uint32_t *col)
  429. {
  430. lexer_getstroffset(&pp->lex->base_lexer,
  431. token->unmerged_str.array, row, col);
  432. }
  433. static void cf_addew(struct cf_preprocessor *pp, const struct cf_token *token,
  434. const char *message, int error_level,
  435. const char *val1, const char *val2, const char *val3)
  436. {
  437. uint32_t row, col;
  438. cf_gettokenoffset(pp, token, &row, &col);
  439. if (!val1 && !val2 && !val3) {
  440. error_data_add(pp->ed, token->lex->file, row, col,
  441. message, error_level);
  442. } else {
  443. struct dstr formatted;
  444. dstr_init(&formatted);
  445. dstr_safe_printf(&formatted, message, val1, val2, val3, NULL);
  446. error_data_add(pp->ed, token->lex->file, row, col,
  447. formatted.array, error_level);
  448. dstr_free(&formatted);
  449. }
  450. }
  451. static inline void cf_adderror(struct cf_preprocessor *pp,
  452. const struct cf_token *token, const char *error,
  453. const char *val1, const char *val2, const char *val3)
  454. {
  455. cf_addew(pp, token, error, LEX_ERROR, val1, val2, val3);
  456. }
  457. static inline void cf_addwarning(struct cf_preprocessor *pp,
  458. const struct cf_token *token, const char *warning,
  459. const char *val1, const char *val2, const char *val3)
  460. {
  461. cf_addew(pp, token, warning, LEX_WARNING, val1, val2, val3);
  462. }
  463. static inline void cf_adderror_expecting(struct cf_preprocessor *pp,
  464. const struct cf_token *token, const char *expecting)
  465. {
  466. cf_adderror(pp, token, "Expected $1", expecting,
  467. NULL, NULL);
  468. }
  469. static inline void cf_adderror_expected_newline(struct cf_preprocessor *pp,
  470. const struct cf_token *token)
  471. {
  472. cf_adderror(pp, token,
  473. "Unexpected token after preprocessor, expected "
  474. "newline",
  475. NULL, NULL, NULL);
  476. }
  477. static inline void cf_adderror_unexpected_endif_eof(struct cf_preprocessor *pp,
  478. const struct cf_token *token)
  479. {
  480. cf_adderror(pp, token, "Unexpected end of file before #endif",
  481. NULL, NULL, NULL);
  482. }
  483. static inline void cf_adderror_unexpected_eof(struct cf_preprocessor *pp,
  484. const struct cf_token *token)
  485. {
  486. cf_adderror(pp, token, "Unexpected end of file",
  487. NULL, NULL, NULL);
  488. }
  489. static inline void insert_path(struct cf_preprocessor *pp,
  490. struct dstr *str_file)
  491. {
  492. const char *file;
  493. const char *slash;
  494. if (pp && pp->lex && pp->lex->file) {
  495. file = pp->lex->file;
  496. slash = strrchr(file, '/');
  497. if (slash) {
  498. struct dstr path = {0};
  499. dstr_ncopy(&path, file, slash - file + 1);
  500. dstr_insert_dstr(str_file, 0, &path);
  501. dstr_free(&path);
  502. }
  503. }
  504. }
  505. static void cf_include_file(struct cf_preprocessor *pp,
  506. const struct cf_token *file_token)
  507. {
  508. struct cf_lexer new_lex;
  509. struct dstr str_file;
  510. FILE *file;
  511. char *file_data;
  512. struct cf_token *tokens;
  513. size_t i;
  514. dstr_init(&str_file);
  515. dstr_copy_strref(&str_file, &file_token->str);
  516. dstr_mid(&str_file, &str_file, 1, str_file.len-2);
  517. insert_path(pp, &str_file);
  518. /* if dependency already exists, run preprocessor on it */
  519. for (i = 0; i < pp->dependencies.num; i++) {
  520. struct cf_lexer *dep = pp->dependencies.array+i;
  521. if (strcmp(dep->file, str_file.array) == 0) {
  522. tokens = cf_lexer_get_tokens(dep);
  523. cf_preprocess_tokens(pp, false, &tokens);
  524. goto exit;
  525. }
  526. }
  527. file = os_fopen(str_file.array, "rb");
  528. if (!file) {
  529. cf_adderror(pp, file_token, "Could not open file '$1'",
  530. file_token->str.array, NULL, NULL);
  531. goto exit;
  532. }
  533. os_fread_utf8(file, &file_data);
  534. fclose(file);
  535. cf_lexer_init(&new_lex);
  536. cf_lexer_lex(&new_lex, file_data, str_file.array);
  537. tokens = cf_lexer_get_tokens(&new_lex);
  538. cf_preprocess_tokens(pp, false, &tokens);
  539. bfree(file_data);
  540. da_push_back(pp->dependencies, &new_lex);
  541. exit:
  542. dstr_free(&str_file);
  543. }
  544. static inline bool is_sys_include(struct strref *ref)
  545. {
  546. return ref->len >= 2 &&
  547. ref->array[0] == '<' && ref->array[ref->len-1] == '>';
  548. }
  549. static inline bool is_loc_include(struct strref *ref)
  550. {
  551. return ref->len >= 2 &&
  552. ref->array[0] == '"' && ref->array[ref->len-1] == '"';
  553. }
  554. static void cf_preprocess_include(struct cf_preprocessor *pp,
  555. struct cf_token **p_cur_token)
  556. {
  557. struct cf_token *cur_token = *p_cur_token;
  558. if (pp->ignore_state) {
  559. go_to_newline(p_cur_token);
  560. return;
  561. }
  562. next_token(&cur_token, true);
  563. if (cur_token->type != CFTOKEN_STRING) {
  564. cf_adderror_expecting(pp, cur_token, "string");
  565. go_to_newline(&cur_token);
  566. goto exit;
  567. }
  568. if (is_sys_include(&cur_token->str)) {
  569. /* TODO */
  570. } else if (is_loc_include(&cur_token->str)) {
  571. if (!pp->ignore_state)
  572. cf_include_file(pp, cur_token);
  573. } else {
  574. cf_adderror(pp, cur_token, "Invalid or incomplete string",
  575. NULL, NULL, NULL);
  576. go_to_newline(&cur_token);
  577. goto exit;
  578. }
  579. cur_token++;
  580. exit:
  581. *p_cur_token = cur_token;
  582. }
  583. static bool cf_preprocess_macro_params(struct cf_preprocessor *pp,
  584. struct cf_def *def, struct cf_token **p_cur_token)
  585. {
  586. struct cf_token *cur_token = *p_cur_token;
  587. bool success = false;
  588. def->macro = true;
  589. do {
  590. next_token(&cur_token, true);
  591. if (cur_token->type != CFTOKEN_NAME) {
  592. cf_adderror_expecting(pp, cur_token, "identifier");
  593. go_to_newline(&cur_token);
  594. goto exit;
  595. }
  596. cf_def_addparam(def, cur_token);
  597. next_token(&cur_token, true);
  598. if (cur_token->type != CFTOKEN_OTHER
  599. || (*cur_token->str.array != ','
  600. && *cur_token->str.array != ')')) {
  601. cf_adderror_expecting(pp, cur_token, "',' or ')'");
  602. go_to_newline(&cur_token);
  603. goto exit;
  604. }
  605. } while (*cur_token->str.array != ')');
  606. /* ended properly, now go to first define token (or newline) */
  607. next_token(&cur_token, true);
  608. success = true;
  609. exit:
  610. *p_cur_token = cur_token;
  611. return success;
  612. }
  613. #define INVALID_INDEX ((size_t)-1)
  614. static inline size_t cf_preprocess_get_def_idx(struct cf_preprocessor *pp,
  615. const struct strref *def_name)
  616. {
  617. struct cf_def *array = pp->defines.array;
  618. size_t i;
  619. for (i = 0; i < pp->defines.num; i++) {
  620. struct cf_def *cur_def = array+i;
  621. if (strref_cmp_strref(&cur_def->name.str, def_name) == 0)
  622. return i;
  623. }
  624. return INVALID_INDEX;
  625. }
  626. static inline struct cf_def *cf_preprocess_get_def(struct cf_preprocessor *pp,
  627. const struct strref *def_name)
  628. {
  629. size_t idx = cf_preprocess_get_def_idx(pp, def_name);
  630. if (idx == INVALID_INDEX)
  631. return NULL;
  632. return pp->defines.array+idx;
  633. }
  634. static char space_filler[2] = " ";
  635. static inline void append_space(struct cf_preprocessor *pp,
  636. struct darray *tokens, const struct cf_token *base)
  637. {
  638. struct cf_token token;
  639. strref_set(&token.str, space_filler, 1);
  640. token.type = CFTOKEN_SPACETAB;
  641. if (base) {
  642. token.lex = base->lex;
  643. strref_copy(&token.unmerged_str, &base->unmerged_str);
  644. } else {
  645. token.lex = pp->lex;
  646. strref_copy(&token.unmerged_str, &token.str);
  647. }
  648. darray_push_back(sizeof(struct cf_token), tokens, &token);
  649. }
  650. static inline void append_end_token(struct darray *tokens)
  651. {
  652. struct cf_token end;
  653. cf_token_clear(&end);
  654. darray_push_back(sizeof(struct cf_token), tokens, &end);
  655. }
  656. static void cf_preprocess_define(struct cf_preprocessor *pp,
  657. struct cf_token **p_cur_token)
  658. {
  659. struct cf_token *cur_token = *p_cur_token;
  660. struct cf_def def;
  661. if (pp->ignore_state) {
  662. go_to_newline(p_cur_token);
  663. return;
  664. }
  665. cf_def_init(&def);
  666. next_token(&cur_token, true);
  667. if (cur_token->type != CFTOKEN_NAME) {
  668. cf_adderror_expecting(pp, cur_token, "identifier");
  669. go_to_newline(&cur_token);
  670. goto exit;
  671. }
  672. append_space(pp, &def.tokens.da, NULL);
  673. cf_token_copy(&def.name, cur_token);
  674. if (!next_token(&cur_token, true))
  675. goto complete;
  676. /* process macro */
  677. if (*cur_token->str.array == '(') {
  678. if (!cf_preprocess_macro_params(pp, &def, &cur_token))
  679. goto error;
  680. }
  681. while (cur_token->type != CFTOKEN_NEWLINE &&
  682. cur_token->type != CFTOKEN_NONE)
  683. cf_def_addtoken(&def, cur_token++);
  684. complete:
  685. append_end_token(&def.tokens.da);
  686. append_space(pp, &def.tokens.da, NULL);
  687. da_push_back(pp->defines, &def);
  688. goto exit;
  689. error:
  690. cf_def_free(&def);
  691. exit:
  692. *p_cur_token = cur_token;
  693. }
  694. static inline void cf_preprocess_remove_def_strref(struct cf_preprocessor *pp,
  695. const struct strref *ref)
  696. {
  697. size_t def_idx = cf_preprocess_get_def_idx(pp, ref);
  698. if (def_idx != INVALID_INDEX) {
  699. struct cf_def *array = pp->defines.array;
  700. cf_def_free(array+def_idx);
  701. da_erase(pp->defines, def_idx);
  702. }
  703. }
  704. static void cf_preprocess_undef(struct cf_preprocessor *pp,
  705. struct cf_token **p_cur_token)
  706. {
  707. struct cf_token *cur_token = *p_cur_token;
  708. if (pp->ignore_state) {
  709. go_to_newline(p_cur_token);
  710. return;
  711. }
  712. next_token(&cur_token, true);
  713. if (cur_token->type != CFTOKEN_NAME) {
  714. cf_adderror_expecting(pp, cur_token, "identifier");
  715. go_to_newline(&cur_token);
  716. goto exit;
  717. }
  718. cf_preprocess_remove_def_strref(pp, &cur_token->str);
  719. cur_token++;
  720. exit:
  721. *p_cur_token = cur_token;
  722. }
  723. /* Processes an #ifdef/#ifndef/#if/#else/#elif sub block recursively */
  724. static inline bool cf_preprocess_subblock(struct cf_preprocessor *pp,
  725. bool ignore, struct cf_token **p_cur_token)
  726. {
  727. bool eof;
  728. if (!next_token(p_cur_token, true))
  729. return false;
  730. if (!pp->ignore_state) {
  731. pp->ignore_state = ignore;
  732. cf_preprocess_tokens(pp, true, p_cur_token);
  733. pp->ignore_state = false;
  734. } else {
  735. cf_preprocess_tokens(pp, true, p_cur_token);
  736. }
  737. eof = ((*p_cur_token)->type == CFTOKEN_NONE);
  738. if (eof)
  739. cf_adderror_unexpected_endif_eof(pp, *p_cur_token);
  740. return !eof;
  741. }
  742. static void cf_preprocess_ifdef(struct cf_preprocessor *pp,
  743. bool ifnot, struct cf_token **p_cur_token)
  744. {
  745. struct cf_token *cur_token = *p_cur_token;
  746. struct cf_def *def;
  747. bool is_true;
  748. next_token(&cur_token, true);
  749. if (cur_token->type != CFTOKEN_NAME) {
  750. cf_adderror_expecting(pp, cur_token, "identifier");
  751. go_to_newline(&cur_token);
  752. goto exit;
  753. }
  754. def = cf_preprocess_get_def(pp, &cur_token->str);
  755. is_true = (def == NULL) == ifnot;
  756. if (!cf_preprocess_subblock(pp, !is_true, &cur_token))
  757. goto exit;
  758. if (strref_cmp(&cur_token->str, "else") == 0) {
  759. if (!cf_preprocess_subblock(pp, is_true, &cur_token))
  760. goto exit;
  761. /*} else if (strref_cmp(&cur_token->str, "elif") == 0) {*/
  762. }
  763. cur_token++;
  764. exit:
  765. *p_cur_token = cur_token;
  766. }
  767. static bool cf_preprocessor(struct cf_preprocessor *pp,
  768. bool if_block, struct cf_token **p_cur_token)
  769. {
  770. struct cf_token *cur_token = *p_cur_token;
  771. if (strref_cmp(&cur_token->str, "include") == 0) {
  772. cf_preprocess_include(pp, p_cur_token);
  773. } else if (strref_cmp(&cur_token->str, "define") == 0) {
  774. cf_preprocess_define(pp, p_cur_token);
  775. } else if (strref_cmp(&cur_token->str, "undef") == 0) {
  776. cf_preprocess_undef(pp, p_cur_token);
  777. } else if (strref_cmp(&cur_token->str, "ifdef") == 0) {
  778. cf_preprocess_ifdef(pp, false, p_cur_token);
  779. } else if (strref_cmp(&cur_token->str, "ifndef") == 0) {
  780. cf_preprocess_ifdef(pp, true, p_cur_token);
  781. /*} else if (strref_cmp(&cur_token->str, "if") == 0) {
  782. TODO;*/
  783. } else if (strref_cmp(&cur_token->str, "else") == 0 ||
  784. /*strref_cmp(&cur_token->str, "elif") == 0 ||*/
  785. strref_cmp(&cur_token->str, "endif") == 0) {
  786. if (!if_block) {
  787. struct dstr name;
  788. dstr_init_copy_strref(&name, &cur_token->str);
  789. cf_adderror(pp, cur_token,"#$1 outside of "
  790. "#if/#ifdef/#ifndef block",
  791. name.array, NULL, NULL);
  792. dstr_free(&name);
  793. (*p_cur_token)++;
  794. return true;
  795. }
  796. return false;
  797. } else if (cur_token->type != CFTOKEN_NEWLINE &&
  798. cur_token->type != CFTOKEN_NONE) {
  799. /*
  800. * TODO: language-specific preprocessor stuff should be sent to
  801. * handler of some sort
  802. */
  803. (*p_cur_token)++;
  804. }
  805. return true;
  806. }
  807. static void cf_preprocess_addtoken(struct cf_preprocessor *pp,
  808. struct darray *dst, /* struct cf_token */
  809. struct cf_token **p_cur_token,
  810. const struct cf_token *base,
  811. const struct macro_params *params);
  812. /*
  813. * collects tokens for a macro parameter
  814. *
  815. * note that it is important to make sure that any usage of function calls
  816. * within a macro parameter is preserved, example MACRO(func(1, 2), 3), do not
  817. * let it stop on the comma at "1,"
  818. */
  819. static void cf_preprocess_save_macro_param(struct cf_preprocessor *pp,
  820. struct cf_token **p_cur_token, struct macro_param *param,
  821. const struct cf_token *base,
  822. const struct macro_params *cur_params)
  823. {
  824. struct cf_token *cur_token = *p_cur_token;
  825. int brace_count = 0;
  826. append_space(pp, &param->tokens.da, base);
  827. while (cur_token->type != CFTOKEN_NONE) {
  828. if (*cur_token->str.array == '(') {
  829. brace_count++;
  830. } else if (*cur_token->str.array == ')') {
  831. if (brace_count)
  832. brace_count--;
  833. else
  834. break;
  835. } else if (*cur_token->str.array == ',') {
  836. if (!brace_count)
  837. break;
  838. }
  839. cf_preprocess_addtoken(pp, &param->tokens.da, &cur_token, base,
  840. cur_params);
  841. }
  842. if (cur_token->type == CFTOKEN_NONE)
  843. cf_adderror_unexpected_eof(pp, cur_token);
  844. append_space(pp, &param->tokens.da, base);
  845. append_end_token(&param->tokens.da);
  846. *p_cur_token = cur_token;
  847. }
  848. static inline bool param_is_whitespace(const struct macro_param *param)
  849. {
  850. struct cf_token *array = param->tokens.array;
  851. size_t i;
  852. for (i = 0; i < param->tokens.num; i++)
  853. if (array[i].type != CFTOKEN_NONE &&
  854. array[i].type != CFTOKEN_SPACETAB &&
  855. array[i].type != CFTOKEN_NEWLINE)
  856. return false;
  857. return true;
  858. }
  859. /* collects parameter tokens of a used macro and stores them for the unwrap */
  860. static void cf_preprocess_save_macro_params(struct cf_preprocessor *pp,
  861. struct cf_token **p_cur_token, const struct cf_def *def,
  862. const struct cf_token *base,
  863. const struct macro_params *cur_params,
  864. struct macro_params *dst)
  865. {
  866. struct cf_token *cur_token = *p_cur_token;
  867. size_t count = 0;
  868. next_token(&cur_token, false);
  869. if (cur_token->type != CFTOKEN_OTHER || *cur_token->str.array != '(') {
  870. cf_adderror_expecting(pp, cur_token, "'('");
  871. goto exit;
  872. }
  873. do {
  874. struct macro_param param;
  875. macro_param_init(&param);
  876. cur_token++;
  877. count++;
  878. cf_preprocess_save_macro_param(pp, &cur_token, &param, base,
  879. cur_params);
  880. if (cur_token->type != CFTOKEN_OTHER
  881. || (*cur_token->str.array != ','
  882. && *cur_token->str.array != ')')) {
  883. macro_param_free(&param);
  884. cf_adderror_expecting(pp, cur_token, "',' or ')'");
  885. goto exit;
  886. }
  887. if (param_is_whitespace(&param)) {
  888. /* if 0-param macro, ignore first entry */
  889. if (count == 1 && !def->params.num &&
  890. *cur_token->str.array == ')') {
  891. macro_param_free(&param);
  892. break;
  893. }
  894. }
  895. if (count <= def->params.num) {
  896. cf_token_copy(&param.name,
  897. cf_def_getparam(def, count-1));
  898. da_push_back(dst->params, &param);
  899. } else {
  900. macro_param_free(&param);
  901. }
  902. } while (*cur_token->str.array != ')');
  903. if (count != def->params.num)
  904. cf_adderror(pp, cur_token,
  905. "Mismatching number of macro parameters",
  906. NULL, NULL, NULL);
  907. exit:
  908. *p_cur_token = cur_token;
  909. }
  910. static inline void cf_preprocess_unwrap_param(struct cf_preprocessor *pp,
  911. struct darray *dst, /* struct cf_token */
  912. struct cf_token **p_cur_token,
  913. const struct cf_token *base,
  914. const struct macro_param *param)
  915. {
  916. struct cf_token *cur_token = *p_cur_token;
  917. struct cf_token *cur_param_token = param->tokens.array;
  918. while (cur_param_token->type != CFTOKEN_NONE)
  919. cf_preprocess_addtoken(pp, dst, &cur_param_token, base, NULL);
  920. cur_token++;
  921. *p_cur_token = cur_token;
  922. }
  923. static inline void cf_preprocess_unwrap_define(struct cf_preprocessor *pp,
  924. struct darray *dst, /* struct cf_token */
  925. struct cf_token **p_cur_token,
  926. const struct cf_token *base,
  927. const struct cf_def *def,
  928. const struct macro_params *cur_params)
  929. {
  930. struct cf_token *cur_token = *p_cur_token;
  931. struct macro_params new_params;
  932. struct cf_token *cur_def_token = def->tokens.array;
  933. macro_params_init(&new_params);
  934. if (def->macro)
  935. cf_preprocess_save_macro_params(pp, &cur_token, def, base,
  936. cur_params, &new_params);
  937. while (cur_def_token->type != CFTOKEN_NONE)
  938. cf_preprocess_addtoken(pp, dst, &cur_def_token, base,
  939. &new_params);
  940. macro_params_free(&new_params);
  941. cur_token++;
  942. *p_cur_token = cur_token;
  943. }
  944. static void cf_preprocess_addtoken(struct cf_preprocessor *pp,
  945. struct darray *dst, /* struct cf_token */
  946. struct cf_token **p_cur_token,
  947. const struct cf_token *base,
  948. const struct macro_params *params)
  949. {
  950. struct cf_token *cur_token = *p_cur_token;
  951. if (pp->ignore_state)
  952. goto ignore;
  953. if (!base)
  954. base = cur_token;
  955. if (cur_token->type == CFTOKEN_NAME) {
  956. struct cf_def *def;
  957. struct macro_param *param;
  958. param = get_macro_param(params, &cur_token->str);
  959. if (param) {
  960. cf_preprocess_unwrap_param(pp, dst, &cur_token, base,
  961. param);
  962. goto exit;
  963. }
  964. def = cf_preprocess_get_def(pp, &cur_token->str);
  965. if (def) {
  966. cf_preprocess_unwrap_define(pp, dst, &cur_token, base,
  967. def, params);
  968. goto exit;
  969. }
  970. }
  971. darray_push_back(sizeof(struct cf_token), dst, cur_token);
  972. ignore:
  973. cur_token++;
  974. exit:
  975. *p_cur_token = cur_token;
  976. }
  977. static void cf_preprocess_tokens(struct cf_preprocessor *pp,
  978. bool if_block, struct cf_token **p_cur_token)
  979. {
  980. bool newline = true;
  981. bool preprocessor_line = if_block;
  982. struct cf_token *cur_token = *p_cur_token;
  983. while (cur_token->type != CFTOKEN_NONE) {
  984. if(cur_token->type != CFTOKEN_SPACETAB &&
  985. cur_token->type != CFTOKEN_NEWLINE) {
  986. if (preprocessor_line) {
  987. cf_adderror_expected_newline(pp, cur_token);
  988. if (!go_to_newline(&cur_token))
  989. break;
  990. }
  991. if (newline && *cur_token->str.array == '#') {
  992. next_token(&cur_token, true);
  993. preprocessor_line = true;
  994. if (!cf_preprocessor(pp, if_block, &cur_token))
  995. break;
  996. continue;
  997. }
  998. newline = false;
  999. }
  1000. if (cur_token->type == CFTOKEN_NEWLINE) {
  1001. newline = true;
  1002. preprocessor_line = false;
  1003. } else if (cur_token->type == CFTOKEN_NONE) {
  1004. break;
  1005. }
  1006. cf_preprocess_addtoken(pp, &pp->tokens.da, &cur_token, NULL,
  1007. NULL);
  1008. }
  1009. *p_cur_token = cur_token;
  1010. }
  1011. void cf_preprocessor_init(struct cf_preprocessor *pp)
  1012. {
  1013. da_init(pp->defines);
  1014. da_init(pp->sys_include_dirs);
  1015. da_init(pp->dependencies);
  1016. da_init(pp->tokens);
  1017. pp->lex = NULL;
  1018. pp->ed = NULL;
  1019. pp->ignore_state = false;
  1020. }
  1021. void cf_preprocessor_free(struct cf_preprocessor *pp)
  1022. {
  1023. struct cf_lexer *dependencies = pp->dependencies.array;
  1024. char **sys_include_dirs = pp->sys_include_dirs.array;
  1025. struct cf_def *defs = pp->defines.array;
  1026. size_t i;
  1027. for (i = 0; i <pp->defines.num; i++)
  1028. cf_def_free(defs+i);
  1029. for (i = 0; i < pp->sys_include_dirs.num; i++)
  1030. bfree(sys_include_dirs[i]);
  1031. for (i = 0; i < pp->dependencies.num; i++)
  1032. cf_lexer_free(dependencies+i);
  1033. da_free(pp->defines);
  1034. da_free(pp->sys_include_dirs);
  1035. da_free(pp->dependencies);
  1036. da_free(pp->tokens);
  1037. pp->lex = NULL;
  1038. pp->ed = NULL;
  1039. pp->ignore_state = false;
  1040. }
  1041. bool cf_preprocess(struct cf_preprocessor *pp, struct cf_lexer *lex,
  1042. struct error_data *ed)
  1043. {
  1044. struct cf_token *token = cf_lexer_get_tokens(lex);
  1045. if (!token)
  1046. return false;
  1047. pp->ed = ed;
  1048. pp->lex = lex;
  1049. cf_preprocess_tokens(pp, false, &token);
  1050. da_push_back(pp->tokens, token);
  1051. return !lex->unexpected_eof;
  1052. }
  1053. void cf_preprocessor_add_def(struct cf_preprocessor *pp, struct cf_def *def)
  1054. {
  1055. struct cf_def *existing = cf_preprocess_get_def(pp, &def->name.str);
  1056. if (existing) {
  1057. struct dstr name;
  1058. dstr_init_copy_strref(&name, &def->name.str);
  1059. cf_addwarning(pp, &def->name, "Token $1 already defined",
  1060. name.array, NULL, NULL);
  1061. cf_addwarning(pp, &existing->name,
  1062. "Previous definition of $1 is here",
  1063. name.array, NULL, NULL);
  1064. cf_def_free(existing);
  1065. memcpy(existing, def, sizeof(struct cf_def));
  1066. } else {
  1067. da_push_back(pp->defines, def);
  1068. }
  1069. }
  1070. void cf_preprocessor_remove_def(struct cf_preprocessor *pp,
  1071. const char *def_name)
  1072. {
  1073. struct strref ref;
  1074. ref.array = def_name;
  1075. ref.len = strlen(def_name);
  1076. cf_preprocess_remove_def_strref(pp, &ref);
  1077. }