123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208 |
- /*
- * Copyright (c) 2013 Hugh Bailey <[email protected]>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
- #pragma once
- #include "lexer.h"
- #ifdef __cplusplus
- extern "C" {
- #endif
- EXPORT char *cf_literal_to_str(const char *literal, size_t count);
- /* ------------------------------------------------------------------------- */
- /*
- * A C-family lexer token is defined as:
- * 1.) A generic 'name' token. (abc123_def456)
- * 2.) A numeric sequence (usually starting with a number)
- * 3.) A sequence of generic whitespace defined as spaces and tabs
- * 4.) A newline
- * 5.) A string or character sequence (surrounded by single or double quotes)
- * 6.) A single character of a type not specified above
- */
- enum cf_token_type {
- CFTOKEN_NONE,
- CFTOKEN_NAME,
- CFTOKEN_NUM,
- CFTOKEN_SPACETAB,
- CFTOKEN_NEWLINE,
- CFTOKEN_STRING,
- CFTOKEN_OTHER
- };
- struct cf_token {
- const struct cf_lexer *lex;
- struct strref str;
- struct strref unmerged_str;
- enum cf_token_type type;
- };
- static inline void cf_token_clear(struct cf_token *t)
- {
- memset(t, 0, sizeof(struct cf_token));
- }
- static inline void cf_token_copy(struct cf_token *dst,
- const struct cf_token *src)
- {
- memcpy(dst, src, sizeof(struct cf_token));
- }
- static inline void cf_token_add(struct cf_token *dst,
- const struct cf_token *add)
- {
- strref_add(&dst->str, &add->str);
- strref_add(&dst->unmerged_str, &add->unmerged_str);
- }
- /* ------------------------------------------------------------------------- */
- /*
- * The c-family lexer is a base lexer for generating a list of string
- * reference tokens to be used with c-style languages.
- *
- * This base lexer is meant to be used as a stepping stone for an actual
- * language lexer/parser.
- *
- * It reformats the text in the two following ways:
- * 1.) Spliced lines (escaped newlines) are merged
- * 2.) All comments are converted to a single space
- */
- struct cf_lexer {
- char *file;
- struct lexer base_lexer;
- char *reformatted, *write_offset;
- DARRAY(struct cf_token) tokens;
- bool unexpected_eof; /* unexpected multi-line comment eof */
- };
- EXPORT void cf_lexer_init(struct cf_lexer *lex);
- EXPORT void cf_lexer_free(struct cf_lexer *lex);
- static inline struct cf_token *cf_lexer_get_tokens(struct cf_lexer *lex)
- {
- return lex->tokens.array;
- }
- EXPORT bool cf_lexer_lex(struct cf_lexer *lex, const char *str,
- const char *file);
- /* ------------------------------------------------------------------------- */
- /* c-family preprocessor definition */
- struct cf_def {
- struct cf_token name;
- DARRAY(struct cf_token) params;
- DARRAY(struct cf_token) tokens;
- bool macro;
- };
- static inline void cf_def_init(struct cf_def *cfd)
- {
- cf_token_clear(&cfd->name);
- da_init(cfd->params);
- da_init(cfd->tokens);
- cfd->macro = false;
- }
- static inline void cf_def_addparam(struct cf_def *cfd, struct cf_token *param)
- {
- da_push_back(cfd->params, param);
- }
- static inline void cf_def_addtoken(struct cf_def *cfd, struct cf_token *token)
- {
- da_push_back(cfd->tokens, token);
- }
- static inline struct cf_token *cf_def_getparam(const struct cf_def *cfd,
- size_t idx)
- {
- return cfd->params.array + idx;
- }
- static inline void cf_def_free(struct cf_def *cfd)
- {
- cf_token_clear(&cfd->name);
- da_free(cfd->params);
- da_free(cfd->tokens);
- }
- /* ------------------------------------------------------------------------- */
- /*
- * C-family preprocessor
- *
- * This preprocessor allows for standard c-style preprocessor directives
- * to be applied to source text, such as:
- *
- * + #include
- * + #define/#undef
- * + #ifdef/#ifndef/#if/#elif/#else/#endif
- *
- * Still left to implement (TODO):
- * + #if/#elif
- * + "defined" preprocessor keyword
- * + system includes
- * + variadic macros
- * + custom callbacks (for things like pragma)
- * + option to exclude features such as #import, variadic macros, and other
- * features for certain language implementations
- * + macro parameter string operator #
- * + macro parameter token concatenation operator ##
- * + predefined macros
- * + restricted macros
- */
- struct cf_preprocessor {
- struct cf_lexer *lex;
- struct error_data *ed;
- DARRAY(struct cf_def) defines;
- DARRAY(char *) sys_include_dirs;
- DARRAY(struct cf_lexer) dependencies;
- DARRAY(struct cf_token) tokens;
- bool ignore_state;
- };
- EXPORT void cf_preprocessor_init(struct cf_preprocessor *pp);
- EXPORT void cf_preprocessor_free(struct cf_preprocessor *pp);
- EXPORT bool cf_preprocess(struct cf_preprocessor *pp, struct cf_lexer *lex,
- struct error_data *ed);
- static inline void
- cf_preprocessor_add_sys_include_dir(struct cf_preprocessor *pp,
- const char *include_dir)
- {
- char *str = bstrdup(include_dir);
- if (include_dir)
- da_push_back(pp->sys_include_dirs, &str);
- }
- EXPORT void cf_preprocessor_add_def(struct cf_preprocessor *pp,
- struct cf_def *def);
- EXPORT void cf_preprocessor_remove_def(struct cf_preprocessor *pp,
- const char *def_name);
- static inline struct cf_token *
- cf_preprocessor_get_tokens(struct cf_preprocessor *pp)
- {
- return pp->tokens.array;
- }
- #ifdef __cplusplus
- }
- #endif
|