cf-lexer.h 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. /******************************************************************************
  2. Copyright (c) 2013 by Hugh Bailey <[email protected]>
  3. This software is provided 'as-is', without any express or implied
  4. warranty. In no event will the authors be held liable for any damages
  5. arising from the use of this software.
  6. Permission is granted to anyone to use this software for any purpose,
  7. including commercial applications, and to alter it and redistribute it
  8. freely, subject to the following restrictions:
  9. 1. The origin of this software must not be misrepresented; you must not
  10. claim that you wrote the original software. If you use this software
  11. in a product, an acknowledgment in the product documentation would be
  12. appreciated but is not required.
  13. 2. Altered source versions must be plainly marked as such, and must not be
  14. misrepresented as being the original software.
  15. 3. This notice may not be removed or altered from any source
  16. distribution.
  17. ******************************************************************************/
  18. #pragma once
  19. #include "lexer.h"
  20. #ifdef __cplusplus
  21. extern "C" {
  22. #endif
  23. EXPORT char *cf_literal_to_str(const char *literal, size_t count);
  24. /* ------------------------------------------------------------------------- */
  25. /*
  26. * A C-family lexer token is defined as:
  27. * 1.) A generic 'name' token. (abc123_def456)
  28. * 2.) A numeric sequence (usually starting with a number)
  29. * 3.) A sequence of generic whitespace defined as spaces and tabs
  30. * 4.) A newline
  31. * 5.) A string or character sequence (surrounded by single or double quotes)
  32. * 6.) A single character of a type not specified above
  33. */
  34. enum cf_token_type {
  35. CFTOKEN_NONE,
  36. CFTOKEN_NAME,
  37. CFTOKEN_NUM,
  38. CFTOKEN_SPACETAB,
  39. CFTOKEN_NEWLINE,
  40. CFTOKEN_STRING,
  41. CFTOKEN_OTHER
  42. };
  43. struct cf_token {
  44. const struct cf_lexer *lex;
  45. struct strref str;
  46. struct strref unmerged_str;
  47. enum cf_token_type type;
  48. };
  49. static inline void cf_token_clear(struct cf_token *t)
  50. {
  51. memset(t, 0, sizeof(struct cf_token));
  52. }
  53. static inline void cf_token_copy(struct cf_token *dst,
  54. const struct cf_token *src)
  55. {
  56. memcpy(dst, src, sizeof(struct cf_token));
  57. }
  58. static inline void cf_token_add(struct cf_token *dst,
  59. const struct cf_token *add)
  60. {
  61. strref_add(&dst->str, &add->str);
  62. strref_add(&dst->unmerged_str, &add->unmerged_str);
  63. }
  64. /* ------------------------------------------------------------------------- */
  65. /*
  66. * The c-family lexer is a base lexer for generating a list of string
  67. * reference tokens to be used with c-style languages.
  68. *
  69. * This base lexer is meant to be used as a stepping stone for an actual
  70. * language lexer/parser.
  71. *
  72. * It reformats the text in the two following ways:
  73. * 1.) Spliced lines (escaped newlines) are merged
  74. * 2.) All comments are converted to a single space
  75. */
  76. struct cf_lexer {
  77. char *file;
  78. struct lexer base_lexer;
  79. char *reformatted, *write_offset;
  80. DARRAY(struct cf_token) tokens;
  81. bool unexpected_eof; /* unexpected multi-line comment eof */
  82. };
  83. EXPORT void cf_lexer_init(struct cf_lexer *lex);
  84. EXPORT void cf_lexer_free(struct cf_lexer *lex);
  85. static inline struct cf_token *cf_lexer_gettokens(struct cf_lexer *lex)
  86. {
  87. return lex->tokens.array;
  88. }
  89. EXPORT bool cf_lexer_lex(struct cf_lexer *lex, const char *str,
  90. const char *file);
  91. /* ------------------------------------------------------------------------- */
  92. /* c-family preprocessor definition */
  93. struct cf_def {
  94. struct cf_token name;
  95. DARRAY(struct cf_token) params;
  96. DARRAY(struct cf_token) tokens;
  97. bool macro;
  98. };
  99. static inline void cf_def_init(struct cf_def *cfd)
  100. {
  101. cf_token_clear(&cfd->name);
  102. da_init(cfd->params);
  103. da_init(cfd->tokens);
  104. cfd->macro = false;
  105. }
  106. static inline void cf_def_addparam(struct cf_def *cfd, struct cf_token *param)
  107. {
  108. da_push_back(cfd->params, param);
  109. }
  110. static inline void cf_def_addtoken(struct cf_def *cfd, struct cf_token *token)
  111. {
  112. da_push_back(cfd->tokens, token);
  113. }
  114. static inline struct cf_token *cf_def_getparam(const struct cf_def *cfd,
  115. size_t idx)
  116. {
  117. return cfd->params.array+idx;
  118. }
  119. static inline void cf_def_free(struct cf_def *cfd)
  120. {
  121. cf_token_clear(&cfd->name);
  122. da_free(cfd->params);
  123. da_free(cfd->tokens);
  124. }
  125. /* ------------------------------------------------------------------------- */
  126. /*
  127. * C-family preprocessor
  128. *
  129. * This preprocessor allows for standard c-style preprocessor directives
  130. * to be applied to source text, such as:
  131. *
  132. * + #include
  133. * + #define/#undef
  134. * + #ifdef/#ifndef/#if/#elif/#else/#endif
  135. *
  136. * Still left to implement (TODO):
  137. * + #if/#elif
  138. * + "defined" preprocessor keyword
  139. * + system includes
  140. * + variadic macros
  141. * + custom callbacks (for things like pragma)
  142. * + option to exclude features such as #import, variadic macros, and other
  143. * features for certain language implementations
  144. * + macro parameter string operator #
  145. * + macro parameter token concactenation operator ##
  146. * + predefined macros
  147. * + restricted macros
  148. */
  149. struct cf_preprocessor {
  150. struct cf_lexer *lex;
  151. struct error_data *ed;
  152. DARRAY(struct cf_def) defines;
  153. DARRAY(char*) sys_include_dirs;
  154. DARRAY(struct cf_lexer) dependencies;
  155. DARRAY(struct cf_token) tokens;
  156. bool ignore_state;
  157. };
  158. EXPORT void cf_preprocessor_init(struct cf_preprocessor *pp);
  159. EXPORT void cf_preprocessor_free(struct cf_preprocessor *pp);
  160. EXPORT bool cf_preprocess(struct cf_preprocessor *pp, struct cf_lexer *lex,
  161. struct error_data *ed);
  162. static inline void cf_preprocessor_add_sys_include_dir(
  163. struct cf_preprocessor *pp, const char *include_dir)
  164. {
  165. if (include_dir)
  166. da_push_back(pp->sys_include_dirs, bstrdup(include_dir));
  167. }
  168. EXPORT void cf_preprocessor_add_def(struct cf_preprocessor *pp,
  169. struct cf_def *def);
  170. EXPORT void cf_preprocessor_remove_def(struct cf_preprocessor *pp,
  171. const char *def_name);
  172. static inline struct cf_token *cf_preprocessor_gettokens(
  173. struct cf_preprocessor *pp)
  174. {
  175. return pp->tokens.array;
  176. }
  177. #ifdef __cplusplus
  178. }
  179. #endif