LexTXT.cpp 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339
  1. #include <stdlib.h>
  2. #include <string.h>
  3. #include <stdio.h>
  4. #include <stdarg.h>
  5. #include <assert.h>
  6. #include <ctype.h>
  7. #include <string>
  8. #include <vector>
  9. #include <map>
  10. #include <algorithm>
  11. #include "ILexer.h"
  12. #include "Scintilla.h"
  13. #include "SciLexer.h"
  14. #include "StringCopy.h"
  15. #include "WordList.h"
  16. #include "LexAccessor.h"
  17. #include "Accessor.h"
  18. #include "StyleContext.h"
  19. #include "CharacterSet.h"
  20. #include "CharacterCategory.h"
  21. #include "LexerModule.h"
  22. #include "OptionSet.h"
  23. #include "SubStyles.h"
  24. #include "DefaultLexer.h"
  25. #include "LexAccessor.h"
  26. #include "UniConversion.h"
  27. using namespace Scintilla;
  28. //const char styleSubable[] = { SCE_P_IDENTIFIER, 0 };
  29. //Default = 0,//中文
  30. //Ascii = 1,//英文
  31. //Keyword = 2, //关键字,只有以TXT为母版的
  32. LexicalClass lexicalClasses[] = {
  33. // Lexer Python SCLEX_PYTHON SCE_P_:
  34. 0, "SCE_TXT_DEFAULT", "default", "utf8 char",
  35. 1, "SCE_TXT_ASCII", "Ascii", "Ascii",
  36. 2, "SCE_TXT_KEYWORD", "keyword", "keyword",
  37. };
  38. enum literalsAllowed { litNone = 0, litU = 1, litB = 2, litF = 4 };
  39. // Options used for LexerPython
  40. struct OptionsTxt {
  41. bool ascii;
  42. bool utf8;
  43. OptionsTxt() {
  44. ascii = true;
  45. utf8 = true;
  46. }
  47. };
  48. struct OptionSetTxt : public OptionSet<OptionsTxt> {
  49. OptionSetTxt() {
  50. DefineProperty("lexer.txt.utf8string", &OptionsTxt::utf8,
  51. "text Unicode string");
  52. DefineProperty("lexer.txt.asciistring", &OptionsTxt::ascii,
  53. "text literals ascii string");
  54. }
  55. };
  56. class LexTXT :public DefaultLexer
  57. {
  58. WordList keywords;
  59. //SubStyles subStyles;
  60. OptionsTxt options;
  61. OptionSetTxt osTxt;
  62. public:
  63. explicit LexTXT() :
  64. DefaultLexer(lexicalClasses, ELEMENTS(lexicalClasses))/*,
  65. subStyles(styleSubable, 0x80, 0x40, 0)*/ {
  66. }
  67. virtual ~LexTXT() {}
  68. void SCI_METHOD Release() override {
  69. delete this;
  70. }
  71. int SCI_METHOD Version() const override {
  72. return lvSubStyles;
  73. }
  74. const char *SCI_METHOD PropertyNames() override {
  75. return osTxt.PropertyNames();
  76. }
  77. int SCI_METHOD PropertyType(const char *name) override {
  78. return osTxt.PropertyType(name);
  79. }
  80. const char *SCI_METHOD DescribeProperty(const char *name) override {
  81. return osTxt.DescribeProperty(name);
  82. }
  83. Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
  84. const char *SCI_METHOD DescribeWordListSets() override {
  85. return "";
  86. }
  87. Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
  88. void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
  89. void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
  90. void *SCI_METHOD PrivateCall(int, void *) override {
  91. return 0;
  92. }
  93. int SCI_METHOD LineEndTypesSupported() override {
  94. return SC_LINE_END_TYPE_UNICODE;
  95. }
  96. //int SCI_METHOD AllocateSubStyles(int styleBase, int numberStyles) override {
  97. // return subStyles.Allocate(styleBase, numberStyles);
  98. //}
  99. //int SCI_METHOD SubStylesStart(int styleBase) override {
  100. // return subStyles.Start(styleBase);
  101. //}
  102. //int SCI_METHOD SubStylesLength(int styleBase) override {
  103. // return subStyles.Length(styleBase);
  104. //}
  105. //int SCI_METHOD StyleFromSubStyle(int subStyle) override {
  106. // const int styleBase = subStyles.BaseStyle(subStyle);
  107. // return styleBase;
  108. //}
  109. int SCI_METHOD PrimaryStyleFromStyle(int style) override {
  110. return style;
  111. }
  112. //void SCI_METHOD FreeSubStyles() override {
  113. // subStyles.Free();
  114. //}
  115. //void SCI_METHOD SetIdentifiers(int style, const char *identifiers) override {
  116. // subStyles.SetIdentifiers(style, identifiers);
  117. //}
  118. int SCI_METHOD DistanceToSecondaryStyles() override {
  119. return 0;
  120. }
  121. //const char *SCI_METHOD GetSubStyleBases() override {
  122. // return styleSubable;
  123. //}
  124. static ILexer *LexerFactoryTxt() {
  125. return new LexTXT();
  126. }
  127. };
  128. Sci_Position SCI_METHOD LexTXT::PropertySet(const char *key, const char *val) {
  129. if (osTxt.PropertySet(&options, key, val)) {
  130. return 0;
  131. }
  132. return -1;
  133. }
  134. Sci_Position SCI_METHOD LexTXT::WordListSet(int n, const char *wl) {
  135. WordList *wordListN = 0;
  136. switch (n) {
  137. case 0:
  138. wordListN = &keywords;
  139. break;
  140. }
  141. Sci_Position firstModification = -1;
  142. if (wordListN) {
  143. WordList wlNew;
  144. wlNew.Set(wl);
  145. if (*wordListN != wlNew) {
  146. wordListN->Set(wl);
  147. firstModification = 0;
  148. }
  149. }
  150. return firstModification;
  151. }
  152. const int indicatorWhitespace = 1;
  153. //inline bool IsAWordChar(int ch, bool unicodeIdentifiers) {
  154. // if (ch < 0x80)
  155. // return (isalnum(ch) || ch == '.' || ch == '_');
  156. //
  157. // if (!unicodeIdentifiers)
  158. // return false;
  159. //
  160. // // Python uses the XID_Continue set from unicode data
  161. // return IsXidContinue(ch);
  162. //}
  163. inline bool IsAAsciiChar(int ch) {
  164. return (ch < 0x80);
  165. }
  166. inline bool IsAWordStart(int ch, bool unicodeIdentifiers) {
  167. if (ch < 0x80)
  168. return (isalpha(ch) || ch == '_');
  169. if (!unicodeIdentifiers)
  170. return false;
  171. // Python uses the XID_Start set from unicode data
  172. return IsXidStart(ch);
  173. }
  174. //只识别中文和英文两种单词的状态
  175. void SCI_METHOD LexTXT::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
  176. Accessor styler(pAccess, NULL);
  177. const Sci_Position endPos = startPos + length;
  178. //// Backtrack to previous line in case need to fix its tab whinging
  179. //Sci_Position lineCurrent = styler.GetLine(startPos);
  180. //if (startPos > 0) {
  181. // if (lineCurrent > 0) {
  182. // lineCurrent--;
  183. // // Look for backslash-continued lines
  184. // while (lineCurrent > 0) {
  185. // Sci_Position eolPos = styler.LineStart(lineCurrent) - 1;
  186. // const int eolStyle = styler.StyleAt(eolPos);
  187. // if (eolStyle == SCE_P_STRING
  188. // || eolStyle == SCE_P_CHARACTER
  189. // || eolStyle == SCE_P_STRINGEOL) {
  190. // lineCurrent -= 1;
  191. // }
  192. // else {
  193. // break;
  194. // }
  195. // }
  196. // startPos = styler.LineStart(lineCurrent);
  197. // }
  198. // initStyle = (startPos == 0 ? SCE_P_DEFAULT : styler.StyleAt(startPos - 1));
  199. //}
  200. //initStyle = initStyle & 31;
  201. //if (initStyle == SCE_P_STRINGEOL) {
  202. // initStyle = SCE_P_DEFAULT;
  203. //}
  204. StyleContext sc(startPos, endPos - startPos, initStyle, styler);
  205. Sci_Position startIndicator = sc.currentPos;
  206. for (; sc.More();) {
  207. // Check for a new state starting character
  208. if (sc.state == SCE_TXT_DEFAULT)
  209. {
  210. //遇到下一个ASCII字符的时候,进入识别状态
  211. if (IsAAsciiChar(sc.ch))
  212. {
  213. sc.SetState(SCE_TXT_IDENTIFIER);
  214. }
  215. }
  216. else if (sc.state == SCE_TXT_ASCII)
  217. {
  218. //遇到下一个非ASCII字符的时候,进入识别状态
  219. if (!IsAAsciiChar(sc.ch))
  220. {
  221. sc.SetState(SCE_TXT_IDENTIFIER);
  222. }
  223. }
  224. if (sc.state == SCE_TXT_IDENTIFIER) {
  225. //txt就三种状态、英文、中文、自定义关键字。默认是中文。
  226. //遇到非字符和非数字,开始检测单词,是关键字则识别为关键字;若不是关键字,则肯定是英文字符
  227. //如果遇到非ASCII字符,则开始检查
  228. if (!IsAAsciiChar(sc.ch)) {
  229. char s[1000];
  230. sc.GetCurrent(s, sizeof(s));
  231. int style = SCE_TXT_IDENTIFIER;
  232. if (keywords.InList(s))
  233. {
  234. style = SCE_TXT_KEYWORD;
  235. }
  236. else
  237. {
  238. //不是关键字,就是普通的英文单词
  239. style = SCE_TXT_ASCII;
  240. }
  241. sc.ChangeState(style);
  242. //下面函数运行就已经把关键字或英文给单独设置风格了。此时默认进入中文风格状态
  243. sc.SetState(SCE_TXT_DEFAULT);
  244. }
  245. }
  246. sc.Forward();
  247. }
  248. //最后一段不能遗漏,也需要识别
  249. if (IsAAsciiChar(sc.ch))
  250. {
  251. sc.ChangeState(SCE_TXT_ASCII);
  252. }
  253. else
  254. {
  255. sc.ChangeState(SCE_TXT_DEFAULT);
  256. }
  257. sc.SetState(SCE_TXT_DEFAULT);
  258. styler.IndicatorFill(startIndicator, sc.currentPos, indicatorWhitespace, 0);
  259. sc.Complete();
  260. }
  261. static bool IsCommentLine(Sci_Position line, Accessor &styler) {
  262. Sci_Position pos = styler.LineStart(line);
  263. const Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
  264. for (Sci_Position i = pos; i < eol_pos; i++) {
  265. const char ch = styler[i];
  266. if (ch == '#')
  267. return true;
  268. else if (ch != ' ' && ch != '\t')
  269. return false;
  270. }
  271. return false;
  272. }
  273. static bool IsQuoteLine(Sci_Position line, const Accessor &styler) {
  274. const int style = styler.StyleAt(styler.LineStart(line)) & 31;
  275. return false;
  276. }
  277. //不处理任何折叠
  278. void SCI_METHOD LexTXT::Fold(Sci_PositionU startPos, Sci_Position length, int /*initStyle - unused*/, IDocument *pAccess) {
  279. return;
  280. }
  281. static const char *const txtWordListDesc[] = {
  282. 0
  283. };
  284. LexerModule lmTxt(SCLEX_TXT, LexTXT::LexerFactoryTxt, "txt", txtWordListDesc);