LexLog.cpp 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362
  1. #include <stdlib.h>
  2. #include <string.h>
  3. #include <stdio.h>
  4. #include <stdarg.h>
  5. #include <assert.h>
  6. #include <ctype.h>
  7. #include <string>
  8. #include <vector>
  9. #include <map>
  10. #include <algorithm>
  11. #include <regex>
  12. #include "ILexer.h"
  13. #include "Scintilla.h"
  14. #include "SciLexer.h"
  15. #include "StringCopy.h"
  16. #include "WordList.h"
  17. #include "LexAccessor.h"
  18. #include "Accessor.h"
  19. #include "StyleContext.h"
  20. #include "CharacterSet.h"
  21. #include "CharacterCategory.h"
  22. #include "LexerModule.h"
  23. #include "OptionSet.h"
  24. #include "SubStyles.h"
  25. #include "DefaultLexer.h"
  26. #include "LexAccessor.h"
  27. #include "UniConversion.h"
  28. using namespace Scintilla;
  29. //const char styleSubable[] = { SCE_P_IDENTIFIER, 0 };
  30. //Default = 0,//中文
  31. //Ascii = 1,//英文
  32. //Keyword = 2, //关键字,只有以TXT为母版的
  33. static LexicalClass lexicalClasses[] = {
  34. // Lexer Python SCLEX_PYTHON SCE_P_:
  35. 0, "SCE_LOG_DEFAULT", "default", "default",
  36. 1, "SCE_LOG_NUM", "Num", "Num",
  37. 2, "SCE_LOG_DATE", "Date", "Date",
  38. 3, "SCE_LOG_KEYWORD", "keyword", "keyword",
  39. };
  40. enum LOG_STATUS {
  41. SCE_LOG_DEFAULT=0,
  42. SCE_LOG_NUM ,
  43. SCE_LOG_DATE,
  44. SCE_LOG_KEYWORD,
  45. SCE_LOG_IDENTIFIER,
  46. SCE_LOG_HEX
  47. };
  48. // Options used for LexerPython
  49. struct OptionsTxt {
  50. bool num;
  51. bool data;
  52. bool keyword;
  53. OptionsTxt() {
  54. num = true;
  55. data = true;
  56. keyword = true;
  57. }
  58. };
  59. struct OptionSetTxt : public OptionSet<OptionsTxt> {
  60. OptionSetTxt() {
  61. DefineProperty("lexer.log.num", &OptionsTxt::num,
  62. "text Unicode string");
  63. DefineProperty("lexer.txt.data", &OptionsTxt::data,
  64. "text literals ascii string");
  65. DefineProperty("lexer.txt.keyword", &OptionsTxt::keyword,
  66. "text keyword");
  67. }
  68. };
  69. class LexLOG :public DefaultLexer
  70. {
  71. WordList keywords;
  72. OptionsTxt options;
  73. OptionSetTxt osTxt;
  74. CharacterSet setWord;
  75. CharacterSet setWordStart;
  76. CharacterSet hexWord;
  77. public:
  78. explicit LexLOG() :
  79. DefaultLexer(lexicalClasses, ELEMENTS(lexicalClasses)),
  80. setWord(CharacterSet::setAlphaNum, "._", 0x80, true),//字母、数字、下划线、. 单词的字符集。
  81. setWordStart(CharacterSet::setAlpha, "_", 0x80, true),
  82. hexWord(CharacterSet::setDigits, "abcdefABCDEF", 0x80, true)
  83. {
  84. }
  85. virtual ~LexLOG() {}
  86. void SCI_METHOD Release() override {
  87. delete this;
  88. }
  89. int SCI_METHOD Version() const override {
  90. return lvSubStyles;
  91. }
  92. const char *SCI_METHOD PropertyNames() override {
  93. return osTxt.PropertyNames();
  94. }
  95. int SCI_METHOD PropertyType(const char *name) override {
  96. return osTxt.PropertyType(name);
  97. }
  98. const char *SCI_METHOD DescribeProperty(const char *name) override {
  99. return osTxt.DescribeProperty(name);
  100. }
  101. Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
  102. const char *SCI_METHOD DescribeWordListSets() override {
  103. return "";
  104. }
  105. Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
  106. void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
  107. void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
  108. void *SCI_METHOD PrivateCall(int, void *) override {
  109. return 0;
  110. }
  111. int SCI_METHOD LineEndTypesSupported() override {
  112. return SC_LINE_END_TYPE_UNICODE;
  113. }
  114. int SCI_METHOD PrimaryStyleFromStyle(int style) override {
  115. return style;
  116. }
  117. int SCI_METHOD DistanceToSecondaryStyles() override {
  118. return 0;
  119. }
  120. static ILexer *LexerFactoryLog() {
  121. return new LexLOG();
  122. }
  123. };
  124. Sci_Position SCI_METHOD LexLOG::PropertySet(const char *key, const char *val) {
  125. if (osTxt.PropertySet(&options, key, val)) {
  126. return 0;
  127. }
  128. return -1;
  129. }
  130. Sci_Position SCI_METHOD LexLOG::WordListSet(int n, const char *wl) {
  131. WordList *wordListN = 0;
  132. switch (n) {
  133. case 0:
  134. wordListN = &keywords;
  135. break;
  136. }
  137. Sci_Position firstModification = -1;
  138. if (wordListN) {
  139. WordList wlNew;
  140. wlNew.Set(wl);
  141. if (*wordListN != wlNew) {
  142. wordListN->Set(wl);
  143. firstModification = 0;
  144. }
  145. }
  146. return firstModification;
  147. }
  148. const int indicatorWhitespace = 1;
  149. inline bool IsAAsciiChar(int ch) {
  150. return (ch < 0x80);
  151. }
  152. inline bool IsAWordStart(int ch, bool unicodeIdentifiers) {
  153. if (ch < 0x80)
  154. return (isalpha(ch) || ch == '_');
  155. if (!unicodeIdentifiers)
  156. return false;
  157. return IsXidStart(ch);
  158. }
  159. //是否是单词分割符号,对于ascii中的数字和字母以外的字符,比如 . @ \t 等,都作为一个单词的分割符号。
  160. inline bool IsWordSplitChar(int ch) noexcept {
  161. return (ch < 0x80) && !isalnum(ch);
  162. }
  163. inline bool IsLineEol(int ch) noexcept {
  164. return ch == '\n' || ch == '\r';
  165. }
  166. //获取行,最多获取128个字符串
  167. static std::string GetLineContents(LexAccessor& styler, Sci_Position start, const int len=128) {
  168. std::string lineContent;
  169. Sci_Position i = 0;
  170. char ch = styler.SafeGetCharAt(start, '\n');
  171. const Sci_Position endLine = styler.LineEnd(styler.GetLine(start));
  172. while (((start + i) < endLine) && ( i < len)) {
  173. const char chNext = styler.SafeGetCharAt(start + i + 1, '\n');
  174. lineContent += ch;
  175. i++;
  176. ch = chNext;
  177. }
  178. return lineContent;
  179. }
  180. //只识别中文和英文两种单词的状态
  181. void SCI_METHOD LexLOG::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
  182. Accessor styler(pAccess, NULL);
  183. const Sci_Position endPos = startPos + length;
  184. StyleContext sc(startPos, endPos - startPos, initStyle, styler);
  185. std::regex patternData ("\\d{4}[-/]\\d{2}[-/]\\d{2} \\d{2}:\\d{2}:\\d{2}[.:]\\d{1,3}");
  186. Sci_Position startIndicator = sc.currentPos;
  187. bool isDataMatch = false;
  188. for (; sc.More();) {
  189. //如果是在行开头,则获取64字节,识别出其中的日期。
  190. if (sc.atLineStart) {
  191. std::string lineContens = GetLineContents(styler, sc.currentPos,64);
  192. //匹配里面的日期时间值
  193. std::smatch result;
  194. std::string::const_iterator iterStart = lineContens.begin();
  195. std::string::const_iterator iterEnd = lineContens.end();
  196. //匹配到日期
  197. if (std::regex_search(iterStart, iterEnd, result, patternData))
  198. {
  199. int start = result[0].first - iterStart;
  200. int end = result[0].second - iterStart;
  201. int i = 0;
  202. while (i < start)
  203. {
  204. sc.Forward();
  205. ++i;
  206. }
  207. sc.SetState(SCE_LOG_DATE);
  208. while (i < end)
  209. {
  210. sc.Forward();
  211. ++i;
  212. }
  213. sc.SetState(SCE_LOG_DEFAULT);
  214. }
  215. }
  216. /*if (!sc.atLineEnd && !setWordStart.Contains(sc.chPrev) && setWordStart.Contains(sc.ch)) {
  217. sc.SetState(SCE_LOG_DEFAULT);
  218. sc.ChangeState(SCE_LOG_IDENTIFIER);
  219. }*/
  220. switch (sc.state)
  221. {
  222. case SCE_LOG_HEX:
  223. {
  224. if (!hexWord.Contains(sc.ch))
  225. {
  226. sc.ChangeState(SCE_LOG_NUM);//16进制和10进制一样的格式
  227. sc.SetState(SCE_LOG_DEFAULT);
  228. }
  229. }
  230. break;
  231. case SCE_LOG_NUM:
  232. {
  233. if (!IsADigit(sc.ch))
  234. {
  235. sc.SetState(SCE_LOG_DEFAULT);
  236. }
  237. }
  238. break;
  239. case SCE_LOG_DEFAULT:
  240. {
  241. //注意顺序,要先判断数字。
  242. if (sc.Match('0','x'))
  243. {
  244. sc.SetState(SCE_LOG_HEX);
  245. sc.Forward();
  246. }
  247. else if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
  248. //如果当前字符是数字,或者是.数字,则直接设置当前状态为数字。
  249. sc.SetState(SCE_LOG_NUM);
  250. }
  251. else if (!sc.atLineEnd && setWordStart.Contains(sc.ch)) {
  252. //如果不在行尾,而且当前字符是一个单词的开头标识字符,则进入标识符识别状态。
  253. sc.SetState(SCE_LOG_IDENTIFIER);
  254. }
  255. }
  256. break;
  257. case SCE_LOG_IDENTIFIER:
  258. {
  259. if (sc.atLineStart || sc.atLineEnd || !setWord.Contains(sc.ch))
  260. {
  261. char s[1000];
  262. //取出当前标识符,注意标识符不会超过1000,这是预计,不会有人傻到取名一个超过1000的字符串变量。
  263. sc.GetCurrentLowered(s, sizeof(s));
  264. if (keywords.InList(s)) {
  265. sc.ChangeState(SCE_LOG_KEYWORD);
  266. }
  267. else
  268. {
  269. sc.ChangeState(SCE_LOG_DEFAULT);
  270. }
  271. sc.SetState(SCE_LOG_DEFAULT);
  272. }
  273. }
  274. }
  275. sc.Forward();
  276. }
  277. styler.IndicatorFill(startIndicator, sc.currentPos, indicatorWhitespace, 0);
  278. sc.Complete();
  279. }
  280. static bool IsCommentLine(Sci_Position line, Accessor &styler) {
  281. Sci_Position pos = styler.LineStart(line);
  282. const Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
  283. for (Sci_Position i = pos; i < eol_pos; i++) {
  284. const char ch = styler[i];
  285. if (ch == '#')
  286. return true;
  287. else if (ch != ' ' && ch != '\t')
  288. return false;
  289. }
  290. return false;
  291. }
  292. static bool IsQuoteLine(Sci_Position line, const Accessor &styler) {
  293. const int style = styler.StyleAt(styler.LineStart(line)) & 31;
  294. return false;
  295. }
  296. //不处理任何折叠
  297. void SCI_METHOD LexLOG::Fold(Sci_PositionU startPos, Sci_Position length, int /*initStyle - unused*/, IDocument *pAccess) {
  298. return;
  299. }
  300. static const char *const txtWordListDesc[] = {
  301. 0
  302. };
  303. LexerModule lmLog(SCLEX_LOG, LexLOG::LexerFactoryLog, "log", txtWordListDesc);