cmListFileLexer.in.l 15 KB


  1. %{
  2. /*============================================================================
  3. CMake - Cross Platform Makefile Generator
  4. Copyright 2000-2009 Kitware, Inc., Insight Software Consortium
  5. Distributed under the OSI-approved BSD License (the "License");
  6. see accompanying file Copyright.txt for details.
  7. This software is distributed WITHOUT ANY WARRANTY; without even the
  8. implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  9. See the License for more information.
  10. ============================================================================*/
  11. /*
  12. This file must be translated to C and modified to build everywhere.
  13. Run flex like this:
  14. flex --prefix=cmListFileLexer_yy -ocmListFileLexer.c cmListFileLexer.in.l
  15. Modify cmListFileLexer.c:
  16. - remove TABs
  17. - remove use of the 'register' storage class specifier
  18. - remove the yyunput function
  19. - add a statement "(void)yyscanner;" to the top of these methods:
  20. yy_fatal_error, cmListFileLexer_yyalloc, cmListFileLexer_yyrealloc, cmListFileLexer_yyfree
  21. - remove statement "yyscanner = NULL;" from cmListFileLexer_yylex_destroy
  22. - remove all YY_BREAK lines occurring right after return statements
  23. - remove the isatty forward declaration
  24. */
  25. #include "cmStandardLexer.h"
  26. #ifdef WIN32
  27. #include <cmsys/Encoding.h>
  28. #endif
  29. /* Setup the proper cmListFileLexer_yylex declaration. */
  30. #define YY_EXTRA_TYPE cmListFileLexer*
  31. #define YY_DECL int cmListFileLexer_yylex (yyscan_t yyscanner, cmListFileLexer* lexer)
  32. #include "cmListFileLexer.h"
  33. /*--------------------------------------------------------------------------*/
  34. struct cmListFileLexer_s
  35. {
  36. cmListFileLexer_Token token;
  37. int bracket;
  38. int comment;
  39. int line;
  40. int column;
  41. int size;
  42. FILE* file;
  43. size_t cr;
  44. char* string_buffer;
  45. char* string_position;
  46. int string_left;
  47. yyscan_t scanner;
  48. };
  49. static void cmListFileLexerSetToken(cmListFileLexer* lexer, const char* text,
  50. int length);
  51. static void cmListFileLexerAppend(cmListFileLexer* lexer, const char* text,
  52. int length);
  53. static int cmListFileLexerInput(cmListFileLexer* lexer, char* buffer,
  54. size_t bufferSize);
  55. static void cmListFileLexerInit(cmListFileLexer* lexer);
  56. static void cmListFileLexerDestroy(cmListFileLexer* lexer);
  57. /* Replace the lexer input function. */
  58. #undef YY_INPUT
  59. #define YY_INPUT(buf, result, max_size) \
  60. { result = cmListFileLexerInput(cmListFileLexer_yyget_extra(yyscanner), buf, max_size); }
  61. /*--------------------------------------------------------------------------*/
  62. %}
  63. %option reentrant
  64. %option yylineno
  65. %option noyywrap
  66. %pointer
  67. %x STRING
  68. %x BRACKET
  69. %x BRACKETEND
  70. %x COMMENT
  71. MAKEVAR \$\([A-Za-z0-9_]*\)
  72. UNQUOTED ([^ \t\r\n\(\)#\\\"[=]|\\.)
  73. LEGACY {MAKEVAR}|{UNQUOTED}|\"({MAKEVAR}|{UNQUOTED}|[ \t[=])*\"
  74. %%
  75. <INITIAL,COMMENT>\n {
  76. lexer->token.type = cmListFileLexer_Token_Newline;
  77. cmListFileLexerSetToken(lexer, yytext, yyleng);
  78. ++lexer->line;
  79. lexer->column = 1;
  80. BEGIN(INITIAL);
  81. return 1;
  82. }
  83. #?\[=*\[\n? {
  84. const char* bracket = yytext;
  85. lexer->comment = yytext[0] == '#';
  86. if(lexer->comment)
  87. {
  88. lexer->token.type = cmListFileLexer_Token_CommentBracket;
  89. bracket += 1;
  90. }
  91. else
  92. {
  93. lexer->token.type = cmListFileLexer_Token_ArgumentBracket;
  94. }
  95. cmListFileLexerSetToken(lexer, "", 0);
  96. lexer->bracket = (int)(strchr(bracket+1, '[') - bracket);
  97. if(yytext[yyleng-1] == '\n')
  98. {
  99. ++lexer->line;
  100. lexer->column = 1;
  101. }
  102. else
  103. {
  104. lexer->column += yyleng;
  105. }
  106. BEGIN(BRACKET);
  107. }
  108. # {
  109. lexer->column += yyleng;
  110. BEGIN(COMMENT);
  111. }
  112. <COMMENT>.* {
  113. lexer->column += yyleng;
  114. }
  115. \( {
  116. lexer->token.type = cmListFileLexer_Token_ParenLeft;
  117. cmListFileLexerSetToken(lexer, yytext, yyleng);
  118. lexer->column += yyleng;
  119. return 1;
  120. }
  121. \) {
  122. lexer->token.type = cmListFileLexer_Token_ParenRight;
  123. cmListFileLexerSetToken(lexer, yytext, yyleng);
  124. lexer->column += yyleng;
  125. return 1;
  126. }
  127. [A-Za-z_][A-Za-z0-9_]* {
  128. lexer->token.type = cmListFileLexer_Token_Identifier;
  129. cmListFileLexerSetToken(lexer, yytext, yyleng);
  130. lexer->column += yyleng;
  131. return 1;
  132. }
  133. <BRACKET>\]=* {
  134. /* Handle ]]====]=======]*/
  135. cmListFileLexerAppend(lexer, yytext, yyleng);
  136. lexer->column += yyleng;
  137. if(yyleng == lexer->bracket)
  138. {
  139. BEGIN(BRACKETEND);
  140. }
  141. }
  142. <BRACKETEND>\] {
  143. lexer->column += yyleng;
  144. /* Erase the partial bracket from the token. */
  145. lexer->token.length -= lexer->bracket;
  146. lexer->token.text[lexer->token.length] = 0;
  147. BEGIN(INITIAL);
  148. return 1;
  149. }
  150. <BRACKET>([^]\n])+ {
  151. cmListFileLexerAppend(lexer, yytext, yyleng);
  152. lexer->column += yyleng;
  153. }
  154. <BRACKET,BRACKETEND>\n {
  155. cmListFileLexerAppend(lexer, yytext, yyleng);
  156. ++lexer->line;
  157. lexer->column = 1;
  158. BEGIN(BRACKET);
  159. }
  160. <BRACKET,BRACKETEND>. {
  161. cmListFileLexerAppend(lexer, yytext, yyleng);
  162. lexer->column += yyleng;
  163. BEGIN(BRACKET);
  164. }
  165. <BRACKET,BRACKETEND><<EOF>> {
  166. lexer->token.type = cmListFileLexer_Token_BadBracket;
  167. BEGIN(INITIAL);
  168. return 1;
  169. }
  170. ({UNQUOTED}|=|\[=*{UNQUOTED})({UNQUOTED}|[[=])* {
  171. lexer->token.type = cmListFileLexer_Token_ArgumentUnquoted;
  172. cmListFileLexerSetToken(lexer, yytext, yyleng);
  173. lexer->column += yyleng;
  174. return 1;
  175. }
  176. ({MAKEVAR}|{UNQUOTED}|=|\[=*{LEGACY})({LEGACY}|[[=])* {
  177. lexer->token.type = cmListFileLexer_Token_ArgumentUnquoted;
  178. cmListFileLexerSetToken(lexer, yytext, yyleng);
  179. lexer->column += yyleng;
  180. return 1;
  181. }
  182. \" {
  183. lexer->token.type = cmListFileLexer_Token_ArgumentQuoted;
  184. cmListFileLexerSetToken(lexer, "", 0);
  185. lexer->column += yyleng;
  186. BEGIN(STRING);
  187. }
  188. <STRING>([^\\\n\"]|\\.)+ {
  189. cmListFileLexerAppend(lexer, yytext, yyleng);
  190. lexer->column += yyleng;
  191. }
  192. <STRING>\\\n {
  193. /* Continuation: text is not part of string */
  194. ++lexer->line;
  195. lexer->column = 1;
  196. }
  197. <STRING>\n {
  198. cmListFileLexerAppend(lexer, yytext, yyleng);
  199. ++lexer->line;
  200. lexer->column = 1;
  201. }
  202. <STRING>\" {
  203. lexer->column += yyleng;
  204. BEGIN(INITIAL);
  205. return 1;
  206. }
  207. <STRING>. {
  208. cmListFileLexerAppend(lexer, yytext, yyleng);
  209. lexer->column += yyleng;
  210. }
  211. <STRING><<EOF>> {
  212. lexer->token.type = cmListFileLexer_Token_BadString;
  213. BEGIN(INITIAL);
  214. return 1;
  215. }
  216. [ \t\r]+ {
  217. lexer->token.type = cmListFileLexer_Token_Space;
  218. cmListFileLexerSetToken(lexer, yytext, yyleng);
  219. lexer->column += yyleng;
  220. return 1;
  221. }
  222. . {
  223. lexer->token.type = cmListFileLexer_Token_BadCharacter;
  224. cmListFileLexerSetToken(lexer, yytext, yyleng);
  225. lexer->column += yyleng;
  226. return 1;
  227. }
  228. <<EOF>> {
  229. lexer->token.type = cmListFileLexer_Token_None;
  230. cmListFileLexerSetToken(lexer, 0, 0);
  231. return 0;
  232. }
  233. %%
  234. /*--------------------------------------------------------------------------*/
  235. static void cmListFileLexerSetToken(cmListFileLexer* lexer, const char* text,
  236. int length)
  237. {
  238. /* Set the token line and column number. */
  239. lexer->token.line = lexer->line;
  240. lexer->token.column = lexer->column;
  241. /* Use the same buffer if possible. */
  242. if(lexer->token.text)
  243. {
  244. if(text && length < lexer->size)
  245. {
  246. strcpy(lexer->token.text, text);
  247. lexer->token.length = length;
  248. return;
  249. }
  250. free(lexer->token.text);
  251. lexer->token.text = 0;
  252. lexer->size = 0;
  253. }
  254. /* Need to extend the buffer. */
  255. if(text)
  256. {
  257. lexer->token.text = strdup(text);
  258. lexer->token.length = length;
  259. lexer->size = length+1;
  260. }
  261. else
  262. {
  263. lexer->token.length = 0;
  264. }
  265. }
  266. /*--------------------------------------------------------------------------*/
  267. static void cmListFileLexerAppend(cmListFileLexer* lexer, const char* text,
  268. int length)
  269. {
  270. char* temp;
  271. int newSize;
  272. /* If the appended text will fit in the buffer, do not reallocate. */
  273. newSize = lexer->token.length + length + 1;
  274. if(lexer->token.text && newSize <= lexer->size)
  275. {
  276. strcpy(lexer->token.text+lexer->token.length, text);
  277. lexer->token.length += length;
  278. return;
  279. }
  280. /* We need to extend the buffer. */
  281. temp = malloc(newSize);
  282. if(lexer->token.text)
  283. {
  284. memcpy(temp, lexer->token.text, lexer->token.length);
  285. free(lexer->token.text);
  286. }
  287. memcpy(temp+lexer->token.length, text, length);
  288. temp[lexer->token.length+length] = 0;
  289. lexer->token.text = temp;
  290. lexer->token.length += length;
  291. lexer->size = newSize;
  292. }
  293. /*--------------------------------------------------------------------------*/
  294. static int cmListFileLexerInput(cmListFileLexer* lexer, char* buffer,
  295. size_t bufferSize)
  296. {
  297. if(lexer)
  298. {
  299. if(lexer->file)
  300. {
  301. /* Convert CRLF -> LF explicitly. The C FILE "t"ext mode
  302. does not convert newlines on all platforms. Move any
  303. trailing CR to the start of the buffer for the next read. */
  304. size_t cr = lexer->cr;
  305. size_t n;
  306. buffer[0] = '\r';
  307. n = fread(buffer+cr, 1, bufferSize-cr, lexer->file);
  308. if(n)
  309. {
  310. char* o = buffer;
  311. const char* i = buffer;
  312. const char* e;
  313. n += cr;
  314. cr = (buffer[n-1] == '\r')? 1:0;
  315. e = buffer + n - cr;
  316. while(i != e)
  317. {
  318. if(i[0] == '\r' && i[1] == '\n')
  319. {
  320. ++i;
  321. }
  322. *o++ = *i++;
  323. }
  324. n = o - buffer;
  325. }
  326. else
  327. {
  328. n = cr;
  329. cr = 0;
  330. }
  331. lexer->cr = cr;
  332. return n;
  333. }
  334. else if(lexer->string_left)
  335. {
  336. int length = lexer->string_left;
  337. if((int)bufferSize < length) { length = (int)bufferSize; }
  338. memcpy(buffer, lexer->string_position, length);
  339. lexer->string_position += length;
  340. lexer->string_left -= length;
  341. return length;
  342. }
  343. }
  344. return 0;
  345. }
  346. /*--------------------------------------------------------------------------*/
  347. static void cmListFileLexerInit(cmListFileLexer* lexer)
  348. {
  349. if(lexer->file || lexer->string_buffer)
  350. {
  351. cmListFileLexer_yylex_init(&lexer->scanner);
  352. cmListFileLexer_yyset_extra(lexer, lexer->scanner);
  353. }
  354. }
  355. /*--------------------------------------------------------------------------*/
  356. static void cmListFileLexerDestroy(cmListFileLexer* lexer)
  357. {
  358. cmListFileLexerSetToken(lexer, 0, 0);
  359. if(lexer->file || lexer->string_buffer)
  360. {
  361. cmListFileLexer_yylex_destroy(lexer->scanner);
  362. if(lexer->file)
  363. {
  364. fclose(lexer->file);
  365. lexer->file = 0;
  366. }
  367. if(lexer->string_buffer)
  368. {
  369. free(lexer->string_buffer);
  370. lexer->string_buffer = 0;
  371. lexer->string_left = 0;
  372. lexer->string_position = 0;
  373. }
  374. }
  375. }
  376. /*--------------------------------------------------------------------------*/
  377. cmListFileLexer* cmListFileLexer_New()
  378. {
  379. cmListFileLexer* lexer = (cmListFileLexer*)malloc(sizeof(cmListFileLexer));
  380. if(!lexer)
  381. {
  382. return 0;
  383. }
  384. memset(lexer, 0, sizeof(*lexer));
  385. lexer->line = 1;
  386. lexer->column = 1;
  387. return lexer;
  388. }
  389. /*--------------------------------------------------------------------------*/
  390. void cmListFileLexer_Delete(cmListFileLexer* lexer)
  391. {
  392. cmListFileLexer_SetFileName(lexer, 0, 0);
  393. free(lexer);
  394. }
  395. /*--------------------------------------------------------------------------*/
  396. static cmListFileLexer_BOM cmListFileLexer_ReadBOM(FILE* f)
  397. {
  398. unsigned char b[2];
  399. if(fread(b, 1, 2, f) == 2)
  400. {
  401. if(b[0] == 0xEF && b[1] == 0xBB)
  402. {
  403. if(fread(b, 1, 1, f) == 1 && b[0] == 0xBF)
  404. {
  405. return cmListFileLexer_BOM_UTF8;
  406. }
  407. }
  408. else if(b[0] == 0xFE && b[1] == 0xFF)
  409. {
  410. /* UTF-16 BE */
  411. return cmListFileLexer_BOM_UTF16BE;
  412. }
  413. else if(b[0] == 0 && b[1] == 0)
  414. {
  415. if(fread(b, 1, 2, f) == 2 && b[0] == 0xFE && b[1] == 0xFF)
  416. {
  417. return cmListFileLexer_BOM_UTF32BE;
  418. }
  419. }
  420. else if(b[0] == 0xFF && b[1] == 0xFE)
  421. {
  422. fpos_t p;
  423. fgetpos(f, &p);
  424. if(fread(b, 1, 2, f) == 2 && b[0] == 0 && b[1] == 0)
  425. {
  426. return cmListFileLexer_BOM_UTF32LE;
  427. }
  428. fsetpos(f, &p);
  429. return cmListFileLexer_BOM_UTF16LE;
  430. }
  431. }
  432. rewind(f);
  433. return cmListFileLexer_BOM_None;
  434. }
  435. /*--------------------------------------------------------------------------*/
  436. int cmListFileLexer_SetFileName(cmListFileLexer* lexer, const char* name,
  437. cmListFileLexer_BOM* bom)
  438. {
  439. int result = 1;
  440. cmListFileLexerDestroy(lexer);
  441. if(name)
  442. {
  443. #ifdef _WIN32
  444. wchar_t* wname = cmsysEncoding_DupToWide(name);
  445. lexer->file = _wfopen(wname, L"rb");
  446. free(wname);
  447. #else
  448. lexer->file = fopen(name, "rb");
  449. #endif
  450. if(lexer->file)
  451. {
  452. if(bom)
  453. {
  454. *bom = cmListFileLexer_ReadBOM(lexer->file);
  455. }
  456. }
  457. else
  458. {
  459. result = 0;
  460. }
  461. }
  462. cmListFileLexerInit(lexer);
  463. return result;
  464. }
  465. /*--------------------------------------------------------------------------*/
  466. int cmListFileLexer_SetString(cmListFileLexer* lexer, const char* text)
  467. {
  468. int result = 1;
  469. cmListFileLexerDestroy(lexer);
  470. if(text)
  471. {
  472. int length = (int)strlen(text);
  473. lexer->string_buffer = (char*)malloc(length+1);
  474. if(lexer->string_buffer)
  475. {
  476. strcpy(lexer->string_buffer, text);
  477. lexer->string_position = lexer->string_buffer;
  478. lexer->string_left = length;
  479. }
  480. else
  481. {
  482. result = 0;
  483. }
  484. }
  485. cmListFileLexerInit(lexer);
  486. return result;
  487. }
  488. /*--------------------------------------------------------------------------*/
  489. cmListFileLexer_Token* cmListFileLexer_Scan(cmListFileLexer* lexer)
  490. {
  491. if(!lexer->file)
  492. {
  493. return 0;
  494. }
  495. if(cmListFileLexer_yylex(lexer->scanner, lexer))
  496. {
  497. return &lexer->token;
  498. }
  499. else
  500. {
  501. cmListFileLexer_SetFileName(lexer, 0, 0);
  502. return 0;
  503. }
  504. }
  505. /*--------------------------------------------------------------------------*/
  506. long cmListFileLexer_GetCurrentLine(cmListFileLexer* lexer)
  507. {
  508. if(lexer->file)
  509. {
  510. return lexer->line;
  511. }
  512. else
  513. {
  514. return 0;
  515. }
  516. }
  517. /*--------------------------------------------------------------------------*/
  518. long cmListFileLexer_GetCurrentColumn(cmListFileLexer* lexer)
  519. {
  520. if(lexer->file)
  521. {
  522. return lexer->column;
  523. }
  524. else
  525. {
  526. return 0;
  527. }
  528. }
  529. /*--------------------------------------------------------------------------*/
  530. const char* cmListFileLexer_GetTypeAsString(cmListFileLexer* lexer,
  531. cmListFileLexer_Type type)
  532. {
  533. (void)lexer;
  534. switch(type)
  535. {
  536. case cmListFileLexer_Token_None: return "nothing";
  537. case cmListFileLexer_Token_Space: return "space";
  538. case cmListFileLexer_Token_Newline: return "newline";
  539. case cmListFileLexer_Token_Identifier: return "identifier";
  540. case cmListFileLexer_Token_ParenLeft: return "left paren";
  541. case cmListFileLexer_Token_ParenRight: return "right paren";
  542. case cmListFileLexer_Token_ArgumentUnquoted: return "unquoted argument";
  543. case cmListFileLexer_Token_ArgumentQuoted: return "quoted argument";
  544. case cmListFileLexer_Token_ArgumentBracket: return "bracket argument";
  545. case cmListFileLexer_Token_CommentBracket: return "bracket comment";
  546. case cmListFileLexer_Token_BadCharacter: return "bad character";
  547. case cmListFileLexer_Token_BadBracket: return "unterminated bracket";
  548. case cmListFileLexer_Token_BadString: return "unterminated string";
  549. }
  550. return "unknown token";
  551. }