cmListFileLexer.in.l 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564
  1. %{
  2. /* Distributed under the OSI-approved BSD 3-Clause License. See accompanying
  3. file Copyright.txt or https://cmake.org/licensing for details. */
  4. /*
  5. This file must be translated to C and modified to build everywhere.
  6. Run flex >= 2.6 like this:
  7. flex --nounistd -DFLEXINT_H --prefix=cmListFileLexer_yy -ocmListFileLexer.c cmListFileLexer.in.l
  8. Modify cmListFileLexer.c:
  9. - remove trailing whitespace: sed -i 's/\s*$//' cmListFileLexer.c
  10. - remove blank lines at end of file
  11. - #include "cmStandardLexer.h" at the top
  12. - add cast in yy_scan_bytes for loop condition of _yybytes_len to size_t
  13. - change type of variable yyl under yy_find_action from yy_size_t to int
  14. */
  15. /* IWYU pragma: no_forward_declare yyguts_t */
  16. #ifdef WIN32
  17. #include <cmsys/Encoding.h>
  18. #endif
  19. /* Setup the proper cmListFileLexer_yylex declaration. */
  20. #define YY_EXTRA_TYPE cmListFileLexer*
  21. #define YY_DECL int cmListFileLexer_yylex (yyscan_t yyscanner, cmListFileLexer* lexer)
  22. #include "cmListFileLexer.h"
  23. /*--------------------------------------------------------------------------*/
  24. struct cmListFileLexer_s
  25. {
  26. cmListFileLexer_Token token;
  27. int bracket;
  28. int comment;
  29. int line;
  30. int column;
  31. int size;
  32. FILE* file;
  33. size_t cr;
  34. char* string_buffer;
  35. char* string_position;
  36. int string_left;
  37. yyscan_t scanner;
  38. };
  39. static void cmListFileLexerSetToken(cmListFileLexer* lexer, const char* text,
  40. int length);
  41. static void cmListFileLexerAppend(cmListFileLexer* lexer, const char* text,
  42. int length);
  43. static int cmListFileLexerInput(cmListFileLexer* lexer, char* buffer,
  44. size_t bufferSize);
  45. static void cmListFileLexerInit(cmListFileLexer* lexer);
  46. static void cmListFileLexerDestroy(cmListFileLexer* lexer);
  47. /* Replace the lexer input function. */
  48. #undef YY_INPUT
  49. #define YY_INPUT(buf, result, max_size) \
  50. { result = cmListFileLexerInput(cmListFileLexer_yyget_extra(yyscanner), buf, max_size); }
  51. /*--------------------------------------------------------------------------*/
  52. %}
  53. %option reentrant
  54. %option yylineno
  55. %option noyywrap
  56. %pointer
  57. %x STRING
  58. %x BRACKET
  59. %x BRACKETEND
  60. %x COMMENT
  61. MAKEVAR \$\([A-Za-z0-9_]*\)
  62. UNQUOTED ([^ \t\r\n\(\)#\\\"[=]|\\.)
  63. LEGACY {MAKEVAR}|{UNQUOTED}|\"({MAKEVAR}|{UNQUOTED}|[ \t[=])*\"
  64. %%
  65. <INITIAL,COMMENT>\n {
  66. lexer->token.type = cmListFileLexer_Token_Newline;
  67. cmListFileLexerSetToken(lexer, yytext, yyleng);
  68. ++lexer->line;
  69. lexer->column = 1;
  70. BEGIN(INITIAL);
  71. return 1;
  72. }
  73. #?\[=*\[\n? {
  74. const char* bracket = yytext;
  75. lexer->comment = yytext[0] == '#';
  76. if (lexer->comment) {
  77. lexer->token.type = cmListFileLexer_Token_CommentBracket;
  78. bracket += 1;
  79. } else {
  80. lexer->token.type = cmListFileLexer_Token_ArgumentBracket;
  81. }
  82. cmListFileLexerSetToken(lexer, "", 0);
  83. lexer->bracket = strchr(bracket+1, '[') - bracket;
  84. if (yytext[yyleng-1] == '\n') {
  85. ++lexer->line;
  86. lexer->column = 1;
  87. } else {
  88. lexer->column += yyleng;
  89. }
  90. BEGIN(BRACKET);
  91. }
  92. # {
  93. lexer->column += yyleng;
  94. BEGIN(COMMENT);
  95. }
  96. <COMMENT>.* {
  97. lexer->column += yyleng;
  98. }
  99. \( {
  100. lexer->token.type = cmListFileLexer_Token_ParenLeft;
  101. cmListFileLexerSetToken(lexer, yytext, yyleng);
  102. lexer->column += yyleng;
  103. return 1;
  104. }
  105. \) {
  106. lexer->token.type = cmListFileLexer_Token_ParenRight;
  107. cmListFileLexerSetToken(lexer, yytext, yyleng);
  108. lexer->column += yyleng;
  109. return 1;
  110. }
  111. [A-Za-z_][A-Za-z0-9_]* {
  112. lexer->token.type = cmListFileLexer_Token_Identifier;
  113. cmListFileLexerSetToken(lexer, yytext, yyleng);
  114. lexer->column += yyleng;
  115. return 1;
  116. }
  117. <BRACKET>\]=* {
  118. /* Handle ]]====]=======]*/
  119. cmListFileLexerAppend(lexer, yytext, yyleng);
  120. lexer->column += yyleng;
  121. if (yyleng == lexer->bracket) {
  122. BEGIN(BRACKETEND);
  123. }
  124. }
  125. <BRACKETEND>\] {
  126. lexer->column += yyleng;
  127. /* Erase the partial bracket from the token. */
  128. lexer->token.length -= lexer->bracket;
  129. lexer->token.text[lexer->token.length] = 0;
  130. BEGIN(INITIAL);
  131. return 1;
  132. }
  133. <BRACKET>([^]\n])+ {
  134. cmListFileLexerAppend(lexer, yytext, yyleng);
  135. lexer->column += yyleng;
  136. }
  137. <BRACKET,BRACKETEND>\n {
  138. cmListFileLexerAppend(lexer, yytext, yyleng);
  139. ++lexer->line;
  140. lexer->column = 1;
  141. BEGIN(BRACKET);
  142. }
  143. <BRACKET,BRACKETEND>. {
  144. cmListFileLexerAppend(lexer, yytext, yyleng);
  145. lexer->column += yyleng;
  146. BEGIN(BRACKET);
  147. }
  148. <BRACKET,BRACKETEND><<EOF>> {
  149. lexer->token.type = cmListFileLexer_Token_BadBracket;
  150. BEGIN(INITIAL);
  151. return 1;
  152. }
  153. ({UNQUOTED}|=|\[=*{UNQUOTED})({UNQUOTED}|[[=])* {
  154. lexer->token.type = cmListFileLexer_Token_ArgumentUnquoted;
  155. cmListFileLexerSetToken(lexer, yytext, yyleng);
  156. lexer->column += yyleng;
  157. return 1;
  158. }
  159. ({MAKEVAR}|{UNQUOTED}|=|\[=*{LEGACY})({LEGACY}|[[=])* {
  160. lexer->token.type = cmListFileLexer_Token_ArgumentUnquoted;
  161. cmListFileLexerSetToken(lexer, yytext, yyleng);
  162. lexer->column += yyleng;
  163. return 1;
  164. }
  165. \[ {
  166. lexer->token.type = cmListFileLexer_Token_ArgumentUnquoted;
  167. cmListFileLexerSetToken(lexer, yytext, yyleng);
  168. lexer->column += yyleng;
  169. return 1;
  170. }
  171. \" {
  172. lexer->token.type = cmListFileLexer_Token_ArgumentQuoted;
  173. cmListFileLexerSetToken(lexer, "", 0);
  174. lexer->column += yyleng;
  175. BEGIN(STRING);
  176. }
  177. <STRING>([^\\\n\"]|\\.)+ {
  178. cmListFileLexerAppend(lexer, yytext, yyleng);
  179. lexer->column += yyleng;
  180. }
  181. <STRING>\\\n {
  182. /* Continuation: text is not part of string */
  183. ++lexer->line;
  184. lexer->column = 1;
  185. }
  186. <STRING>\n {
  187. cmListFileLexerAppend(lexer, yytext, yyleng);
  188. ++lexer->line;
  189. lexer->column = 1;
  190. }
  191. <STRING>\" {
  192. lexer->column += yyleng;
  193. BEGIN(INITIAL);
  194. return 1;
  195. }
  196. <STRING>. {
  197. cmListFileLexerAppend(lexer, yytext, yyleng);
  198. lexer->column += yyleng;
  199. }
  200. <STRING><<EOF>> {
  201. lexer->token.type = cmListFileLexer_Token_BadString;
  202. BEGIN(INITIAL);
  203. return 1;
  204. }
  205. [ \t\r]+ {
  206. lexer->token.type = cmListFileLexer_Token_Space;
  207. cmListFileLexerSetToken(lexer, yytext, yyleng);
  208. lexer->column += yyleng;
  209. return 1;
  210. }
  211. . {
  212. lexer->token.type = cmListFileLexer_Token_BadCharacter;
  213. cmListFileLexerSetToken(lexer, yytext, yyleng);
  214. lexer->column += yyleng;
  215. return 1;
  216. }
  217. <<EOF>> {
  218. lexer->token.type = cmListFileLexer_Token_None;
  219. cmListFileLexerSetToken(lexer, 0, 0);
  220. return 0;
  221. }
  222. %%
  223. /*--------------------------------------------------------------------------*/
  224. static void cmListFileLexerSetToken(cmListFileLexer* lexer, const char* text,
  225. int length)
  226. {
  227. /* Set the token line and column number. */
  228. lexer->token.line = lexer->line;
  229. lexer->token.column = lexer->column;
  230. /* Use the same buffer if possible. */
  231. if (lexer->token.text) {
  232. if (text && length < lexer->size) {
  233. strcpy(lexer->token.text, text);
  234. lexer->token.length = length;
  235. return;
  236. }
  237. free(lexer->token.text);
  238. lexer->token.text = 0;
  239. lexer->size = 0;
  240. }
  241. /* Need to extend the buffer. */
  242. if (text) {
  243. lexer->token.text = strdup(text);
  244. lexer->token.length = length;
  245. lexer->size = length + 1;
  246. } else {
  247. lexer->token.length = 0;
  248. }
  249. }
  250. /*--------------------------------------------------------------------------*/
  251. static void cmListFileLexerAppend(cmListFileLexer* lexer, const char* text,
  252. int length)
  253. {
  254. char* temp;
  255. int newSize;
  256. /* If the appended text will fit in the buffer, do not reallocate. */
  257. newSize = lexer->token.length + length + 1;
  258. if (lexer->token.text && newSize <= lexer->size) {
  259. strcpy(lexer->token.text + lexer->token.length, text);
  260. lexer->token.length += length;
  261. return;
  262. }
  263. /* We need to extend the buffer. */
  264. temp = malloc(newSize);
  265. if (lexer->token.text) {
  266. memcpy(temp, lexer->token.text, lexer->token.length);
  267. free(lexer->token.text);
  268. }
  269. memcpy(temp + lexer->token.length, text, length);
  270. temp[lexer->token.length + length] = 0;
  271. lexer->token.text = temp;
  272. lexer->token.length += length;
  273. lexer->size = newSize;
  274. }
  275. /*--------------------------------------------------------------------------*/
  276. static int cmListFileLexerInput(cmListFileLexer* lexer, char* buffer,
  277. size_t bufferSize)
  278. {
  279. if (lexer) {
  280. if (lexer->file) {
  281. /* Convert CRLF -> LF explicitly. The C FILE "t"ext mode
  282. does not convert newlines on all platforms. Move any
  283. trailing CR to the start of the buffer for the next read. */
  284. size_t cr = lexer->cr;
  285. size_t n;
  286. buffer[0] = '\r';
  287. n = fread(buffer + cr, 1, bufferSize - cr, lexer->file);
  288. if (n) {
  289. char* o = buffer;
  290. const char* i = buffer;
  291. const char* e;
  292. n += cr;
  293. cr = (buffer[n - 1] == '\r') ? 1 : 0;
  294. e = buffer + n - cr;
  295. while (i != e) {
  296. if (i[0] == '\r' && i[1] == '\n') {
  297. ++i;
  298. }
  299. *o++ = *i++;
  300. }
  301. n = o - buffer;
  302. } else {
  303. n = cr;
  304. cr = 0;
  305. }
  306. lexer->cr = cr;
  307. return n;
  308. } else if (lexer->string_left) {
  309. int length = lexer->string_left;
  310. if ((int)bufferSize < length) {
  311. length = (int)bufferSize;
  312. }
  313. memcpy(buffer, lexer->string_position, length);
  314. lexer->string_position += length;
  315. lexer->string_left -= length;
  316. return length;
  317. }
  318. }
  319. return 0;
  320. }
  321. /*--------------------------------------------------------------------------*/
  322. static void cmListFileLexerInit(cmListFileLexer* lexer)
  323. {
  324. if (lexer->file || lexer->string_buffer) {
  325. cmListFileLexer_yylex_init(&lexer->scanner);
  326. cmListFileLexer_yyset_extra(lexer, lexer->scanner);
  327. }
  328. }
  329. /*--------------------------------------------------------------------------*/
  330. static void cmListFileLexerDestroy(cmListFileLexer* lexer)
  331. {
  332. cmListFileLexerSetToken(lexer, 0, 0);
  333. if (lexer->file || lexer->string_buffer) {
  334. cmListFileLexer_yylex_destroy(lexer->scanner);
  335. if (lexer->file) {
  336. fclose(lexer->file);
  337. lexer->file = 0;
  338. }
  339. if (lexer->string_buffer) {
  340. free(lexer->string_buffer);
  341. lexer->string_buffer = 0;
  342. lexer->string_left = 0;
  343. lexer->string_position = 0;
  344. }
  345. }
  346. }
  347. /*--------------------------------------------------------------------------*/
  348. cmListFileLexer* cmListFileLexer_New()
  349. {
  350. cmListFileLexer* lexer = (cmListFileLexer*)malloc(sizeof(cmListFileLexer));
  351. if (!lexer) {
  352. return 0;
  353. }
  354. memset(lexer, 0, sizeof(*lexer));
  355. lexer->line = 1;
  356. lexer->column = 1;
  357. return lexer;
  358. }
  359. /*--------------------------------------------------------------------------*/
  360. void cmListFileLexer_Delete(cmListFileLexer* lexer)
  361. {
  362. cmListFileLexer_SetFileName(lexer, 0, 0);
  363. free(lexer);
  364. }
  365. /*--------------------------------------------------------------------------*/
  366. static cmListFileLexer_BOM cmListFileLexer_ReadBOM(FILE* f)
  367. {
  368. unsigned char b[2];
  369. if (fread(b, 1, 2, f) == 2) {
  370. if (b[0] == 0xEF && b[1] == 0xBB) {
  371. if (fread(b, 1, 1, f) == 1 && b[0] == 0xBF) {
  372. return cmListFileLexer_BOM_UTF8;
  373. }
  374. } else if (b[0] == 0xFE && b[1] == 0xFF) {
  375. /* UTF-16 BE */
  376. return cmListFileLexer_BOM_UTF16BE;
  377. } else if (b[0] == 0 && b[1] == 0) {
  378. if (fread(b, 1, 2, f) == 2 && b[0] == 0xFE && b[1] == 0xFF) {
  379. return cmListFileLexer_BOM_UTF32BE;
  380. }
  381. } else if (b[0] == 0xFF && b[1] == 0xFE) {
  382. fpos_t p;
  383. fgetpos(f, &p);
  384. if (fread(b, 1, 2, f) == 2 && b[0] == 0 && b[1] == 0) {
  385. return cmListFileLexer_BOM_UTF32LE;
  386. }
  387. fsetpos(f, &p);
  388. return cmListFileLexer_BOM_UTF16LE;
  389. }
  390. }
  391. rewind(f);
  392. return cmListFileLexer_BOM_None;
  393. }
  394. /*--------------------------------------------------------------------------*/
  395. int cmListFileLexer_SetFileName(cmListFileLexer* lexer, const char* name,
  396. cmListFileLexer_BOM* bom)
  397. {
  398. int result = 1;
  399. cmListFileLexerDestroy(lexer);
  400. if (name) {
  401. #ifdef _WIN32
  402. wchar_t* wname = cmsysEncoding_DupToWide(name);
  403. lexer->file = _wfopen(wname, L"rb");
  404. free(wname);
  405. #else
  406. lexer->file = fopen(name, "rb");
  407. #endif
  408. if (lexer->file) {
  409. if (bom) {
  410. *bom = cmListFileLexer_ReadBOM(lexer->file);
  411. }
  412. } else {
  413. result = 0;
  414. }
  415. }
  416. cmListFileLexerInit(lexer);
  417. return result;
  418. }
  419. /*--------------------------------------------------------------------------*/
  420. int cmListFileLexer_SetString(cmListFileLexer* lexer, const char* text)
  421. {
  422. int result = 1;
  423. cmListFileLexerDestroy(lexer);
  424. if (text) {
  425. int length = (int)strlen(text);
  426. lexer->string_buffer = (char*)malloc(length + 1);
  427. if (lexer->string_buffer) {
  428. strcpy(lexer->string_buffer, text);
  429. lexer->string_position = lexer->string_buffer;
  430. lexer->string_left = length;
  431. } else {
  432. result = 0;
  433. }
  434. }
  435. cmListFileLexerInit(lexer);
  436. return result;
  437. }
  438. /*--------------------------------------------------------------------------*/
  439. cmListFileLexer_Token* cmListFileLexer_Scan(cmListFileLexer* lexer)
  440. {
  441. if (!lexer->file) {
  442. return 0;
  443. }
  444. if (cmListFileLexer_yylex(lexer->scanner, lexer)) {
  445. return &lexer->token;
  446. } else {
  447. cmListFileLexer_SetFileName(lexer, 0, 0);
  448. return 0;
  449. }
  450. }
  451. /*--------------------------------------------------------------------------*/
  452. long cmListFileLexer_GetCurrentLine(cmListFileLexer* lexer)
  453. {
  454. if (lexer->file) {
  455. return lexer->line;
  456. } else {
  457. return 0;
  458. }
  459. }
  460. /*--------------------------------------------------------------------------*/
  461. long cmListFileLexer_GetCurrentColumn(cmListFileLexer* lexer)
  462. {
  463. if (lexer->file) {
  464. return lexer->column;
  465. } else {
  466. return 0;
  467. }
  468. }
  469. /*--------------------------------------------------------------------------*/
  470. const char* cmListFileLexer_GetTypeAsString(cmListFileLexer* lexer,
  471. cmListFileLexer_Type type)
  472. {
  473. (void)lexer;
  474. switch (type) {
  475. case cmListFileLexer_Token_None:
  476. return "nothing";
  477. case cmListFileLexer_Token_Space:
  478. return "space";
  479. case cmListFileLexer_Token_Newline:
  480. return "newline";
  481. case cmListFileLexer_Token_Identifier:
  482. return "identifier";
  483. case cmListFileLexer_Token_ParenLeft:
  484. return "left paren";
  485. case cmListFileLexer_Token_ParenRight:
  486. return "right paren";
  487. case cmListFileLexer_Token_ArgumentUnquoted:
  488. return "unquoted argument";
  489. case cmListFileLexer_Token_ArgumentQuoted:
  490. return "quoted argument";
  491. case cmListFileLexer_Token_ArgumentBracket:
  492. return "bracket argument";
  493. case cmListFileLexer_Token_CommentBracket:
  494. return "bracket comment";
  495. case cmListFileLexer_Token_BadCharacter:
  496. return "bad character";
  497. case cmListFileLexer_Token_BadBracket:
  498. return "unterminated bracket";
  499. case cmListFileLexer_Token_BadString:
  500. return "unterminated string";
  501. }
  502. return "unknown token";
  503. }