cf-lexer.h 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. /*
  2. * Copyright (c) 2013 Hugh Bailey <[email protected]>
  3. *
  4. * Permission to use, copy, modify, and distribute this software for any
  5. * purpose with or without fee is hereby granted, provided that the above
  6. * copyright notice and this permission notice appear in all copies.
  7. *
  8. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  10. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  11. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  12. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  13. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  14. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  15. */
  16. #pragma once
  17. #include "lexer.h"
  18. #ifdef __cplusplus
  19. extern "C" {
  20. #endif
  21. EXPORT char *cf_literal_to_str(const char *literal, size_t count);
  22. /* ------------------------------------------------------------------------- */
  23. /*
  24. * A C-family lexer token is defined as:
  25. * 1.) A generic 'name' token. (abc123_def456)
  26. * 2.) A numeric sequence (usually starting with a number)
  27. * 3.) A sequence of generic whitespace defined as spaces and tabs
  28. * 4.) A newline
  29. * 5.) A string or character sequence (surrounded by single or double quotes)
  30. * 6.) A single character of a type not specified above
  31. */
  32. enum cf_token_type {
  33. CFTOKEN_NONE,
  34. CFTOKEN_NAME,
  35. CFTOKEN_NUM,
  36. CFTOKEN_SPACETAB,
  37. CFTOKEN_NEWLINE,
  38. CFTOKEN_STRING,
  39. CFTOKEN_OTHER
  40. };
  41. struct cf_token {
  42. const struct cf_lexer *lex;
  43. struct strref str;
  44. struct strref unmerged_str;
  45. enum cf_token_type type;
  46. };
  47. static inline void cf_token_clear(struct cf_token *t)
  48. {
  49. memset(t, 0, sizeof(struct cf_token));
  50. }
  51. static inline void cf_token_copy(struct cf_token *dst,
  52. const struct cf_token *src)
  53. {
  54. memcpy(dst, src, sizeof(struct cf_token));
  55. }
  56. static inline void cf_token_add(struct cf_token *dst,
  57. const struct cf_token *add)
  58. {
  59. strref_add(&dst->str, &add->str);
  60. strref_add(&dst->unmerged_str, &add->unmerged_str);
  61. }
  62. /* ------------------------------------------------------------------------- */
  63. /*
  64. * The c-family lexer is a base lexer for generating a list of string
  65. * reference tokens to be used with c-style languages.
  66. *
  67. * This base lexer is meant to be used as a stepping stone for an actual
  68. * language lexer/parser.
  69. *
  70. * It reformats the text in the two following ways:
  71. * 1.) Spliced lines (escaped newlines) are merged
  72. * 2.) All comments are converted to a single space
  73. */
  74. struct cf_lexer {
  75. char *file;
  76. struct lexer base_lexer;
  77. char *reformatted, *write_offset;
  78. DARRAY(struct cf_token) tokens;
  79. bool unexpected_eof; /* unexpected multi-line comment eof */
  80. };
  81. EXPORT void cf_lexer_init(struct cf_lexer *lex);
  82. EXPORT void cf_lexer_free(struct cf_lexer *lex);
  83. static inline struct cf_token *cf_lexer_get_tokens(struct cf_lexer *lex)
  84. {
  85. return lex->tokens.array;
  86. }
  87. EXPORT bool cf_lexer_lex(struct cf_lexer *lex, const char *str,
  88. const char *file);
  89. /* ------------------------------------------------------------------------- */
  90. /* c-family preprocessor definition */
  91. struct cf_def {
  92. struct cf_token name;
  93. DARRAY(struct cf_token) params;
  94. DARRAY(struct cf_token) tokens;
  95. bool macro;
  96. };
  97. static inline void cf_def_init(struct cf_def *cfd)
  98. {
  99. cf_token_clear(&cfd->name);
  100. da_init(cfd->params);
  101. da_init(cfd->tokens);
  102. cfd->macro = false;
  103. }
  104. static inline void cf_def_addparam(struct cf_def *cfd, struct cf_token *param)
  105. {
  106. da_push_back(cfd->params, param);
  107. }
  108. static inline void cf_def_addtoken(struct cf_def *cfd, struct cf_token *token)
  109. {
  110. da_push_back(cfd->tokens, token);
  111. }
  112. static inline struct cf_token *cf_def_getparam(const struct cf_def *cfd,
  113. size_t idx)
  114. {
  115. return cfd->params.array + idx;
  116. }
  117. static inline void cf_def_free(struct cf_def *cfd)
  118. {
  119. cf_token_clear(&cfd->name);
  120. da_free(cfd->params);
  121. da_free(cfd->tokens);
  122. }
  123. /* ------------------------------------------------------------------------- */
  124. /*
  125. * C-family preprocessor
  126. *
  127. * This preprocessor allows for standard c-style preprocessor directives
  128. * to be applied to source text, such as:
  129. *
  130. * + #include
  131. * + #define/#undef
  132. * + #ifdef/#ifndef/#if/#elif/#else/#endif
  133. *
  134. * Still left to implement (TODO):
  135. * + #if/#elif
  136. * + "defined" preprocessor keyword
  137. * + system includes
  138. * + variadic macros
  139. * + custom callbacks (for things like pragma)
  140. * + option to exclude features such as #import, variadic macros, and other
  141. * features for certain language implementations
  142. * + macro parameter string operator #
  143. * + macro parameter token concatenation operator ##
  144. * + predefined macros
  145. * + restricted macros
  146. */
  147. struct cf_preprocessor {
  148. struct cf_lexer *lex;
  149. struct error_data *ed;
  150. DARRAY(struct cf_def) defines;
  151. DARRAY(char *) sys_include_dirs;
  152. DARRAY(struct cf_lexer) dependencies;
  153. DARRAY(struct cf_token) tokens;
  154. bool ignore_state;
  155. };
  156. EXPORT void cf_preprocessor_init(struct cf_preprocessor *pp);
  157. EXPORT void cf_preprocessor_free(struct cf_preprocessor *pp);
  158. EXPORT bool cf_preprocess(struct cf_preprocessor *pp, struct cf_lexer *lex,
  159. struct error_data *ed);
  160. static inline void
  161. cf_preprocessor_add_sys_include_dir(struct cf_preprocessor *pp,
  162. const char *include_dir)
  163. {
  164. char *str = bstrdup(include_dir);
  165. if (include_dir)
  166. da_push_back(pp->sys_include_dirs, &str);
  167. }
  168. EXPORT void cf_preprocessor_add_def(struct cf_preprocessor *pp,
  169. struct cf_def *def);
  170. EXPORT void cf_preprocessor_remove_def(struct cf_preprocessor *pp,
  171. const char *def_name);
  172. static inline struct cf_token *
  173. cf_preprocessor_get_tokens(struct cf_preprocessor *pp)
  174. {
  175. return pp->tokens.array;
  176. }
  177. #ifdef __cplusplus
  178. }
  179. #endif