浏览代码

cmCommandArgumentParserHelper: rework input handling

Old implementation uses involved Flex input management technique that
requires usage of obsolete YY_INPUT macro. This causes a lot of useless
allocations and byte-by-byte scanning. New implementation avoids those
hacks, it uses yy_scan_string() API to setup Flex input. Also it fixes
reporting of syntax error position and corresponding tests.
Oleksandr Koval 5 年之前
父节点
当前提交
62d7acc6d4

+ 8 - 14
Source/LexerParser/cmCommandArgumentLexer.cxx

@@ -653,7 +653,7 @@ This file must be translated to C++ and modified to build everywhere.
 
 Run flex >= 2.6 like this:
 
-  flex --nounistd -DFLEXINT_H --noline --header-file=cmCommandArgumentLexer.h -ocmCommandArgumentLexer.cxx cmCommandArgumentLexer.in.l
+  flex --nounistd --never-interactive --batch -DFLEXINT_H --noline --header-file=cmCommandArgumentLexer.h -ocmCommandArgumentLexer.cxx cmCommandArgumentLexer.in.l
 
 Modify cmCommandArgumentLexer.cxx:
   - remove trailing whitespace:              sed -i 's/\s*$//' cmCommandArgumentLexer.h cmCommandArgumentLexer.cxx
@@ -668,10 +668,7 @@ Modify cmCommandArgumentLexer.cxx:
 
 #include "cmCommandArgumentParserHelper.h"
 
-/* Replace the lexer input function.  */
-#undef YY_INPUT
-#define YY_INPUT(buf, result, max_size) \
-  do { result = yyextra->LexInput(buf, max_size); } while (0)
+#define YY_USER_ACTION  yyextra->UpdateInputPosition(yyleng);
 
 /* Include the set of tokens from the parser.  */
 #include "cmCommandArgumentParserTokens.h"
@@ -967,16 +964,12 @@ yy_match:
 			yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c];
 			++yy_cp;
 			}
-		while ( yy_base[yy_current_state] != 41 );
+		while ( yy_current_state != 29 );
+		yy_cp = yyg->yy_last_accepting_cpos;
+		yy_current_state = yyg->yy_last_accepting_state;
 
 yy_find_action:
 		yy_act = yy_accept[yy_current_state];
-		if ( yy_act == 0 )
-			{ /* have to back up */
-			yy_cp = yyg->yy_last_accepting_cpos;
-			yy_current_state = yyg->yy_last_accepting_state;
-			yy_act = yy_accept[yy_current_state];
-			}
 
 		YY_DO_BEFORE_ACTION;
 
@@ -1173,7 +1166,8 @@ case YY_STATE_EOF(NOESCAPES):
 
 			else
 				{
-				yy_cp = yyg->yy_c_buf_p;
+				yy_cp = yyg->yy_last_accepting_cpos;
+				yy_current_state = yyg->yy_last_accepting_state;
 				goto yy_find_action;
 				}
 			}
@@ -1661,7 +1655,7 @@ static void yy_load_buffer_state  (yyscan_t yyscanner)
         b->yy_bs_column = 0;
     }
 
-        b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0;
+        b->yy_is_interactive = 0;
 
 	errno = oerrno;
 }

+ 2 - 5
Source/LexerParser/cmCommandArgumentLexer.in.l

@@ -7,7 +7,7 @@ This file must be translated to C++ and modified to build everywhere.
 
 Run flex >= 2.6 like this:
 
-  flex --nounistd -DFLEXINT_H --noline --header-file=cmCommandArgumentLexer.h -ocmCommandArgumentLexer.cxx cmCommandArgumentLexer.in.l
+  flex --nounistd --never-interactive --batch -DFLEXINT_H --noline --header-file=cmCommandArgumentLexer.h -ocmCommandArgumentLexer.cxx cmCommandArgumentLexer.in.l
 
 Modify cmCommandArgumentLexer.cxx:
   - remove trailing whitespace:              sed -i 's/\s*$//' cmCommandArgumentLexer.h cmCommandArgumentLexer.cxx
@@ -22,10 +22,7 @@ Modify cmCommandArgumentLexer.cxx:
 
 #include "cmCommandArgumentParserHelper.h"
 
-/* Replace the lexer input function.  */
-#undef YY_INPUT
-#define YY_INPUT(buf, result, max_size) \
-  do { result = yyextra->LexInput(buf, max_size); } while (0)
+#define YY_USER_ACTION  yyextra->UpdateInputPosition(yyleng);
 
 /* Include the set of tokens from the parser.  */
 #include "cmCommandArgumentParserTokens.h"

+ 17 - 21
Source/cmCommandArgumentParserHelper.cxx

@@ -205,23 +205,24 @@ bool cmCommandArgumentParserHelper::HandleEscapeSymbol(
 
 void cmCommandArgument_SetupEscapes(yyscan_t yyscanner, bool noEscapes);
 
-int cmCommandArgumentParserHelper::ParseString(const char* str, int verb)
+int cmCommandArgumentParserHelper::ParseString(std::string const& str,
+                                               int verb)
 {
-  if (!str) {
+  if (str.empty()) {
     return 0;
   }
+  this->InputSize = str.size();
   this->Verbose = verb;
-  this->InputBuffer = str;
-  this->InputBufferPos = 0;
-  this->CurrentLine = 0;
 
   this->Result.clear();
 
   yyscan_t yyscanner;
   cmCommandArgument_yylex_init(&yyscanner);
+  auto scanBuf = cmCommandArgument_yy_scan_string(str.c_str(), yyscanner);
   cmCommandArgument_yyset_extra(this, yyscanner);
   cmCommandArgument_SetupEscapes(yyscanner, this->NoEscapeMode);
   int res = cmCommandArgument_yyparse(yyscanner);
+  cmCommandArgument_yy_delete_buffer(scanBuf, yyscanner);
   cmCommandArgument_yylex_destroy(yyscanner);
   if (res != 0) {
     return 0;
@@ -241,25 +242,14 @@ void cmCommandArgumentParserHelper::CleanupParser()
   this->Variables.clear();
 }
 
-int cmCommandArgumentParserHelper::LexInput(char* buf, int maxlen)
+void cmCommandArgumentParserHelper::Error(const char* str)
 {
-  if (maxlen < 1) {
-    return 0;
+  auto pos = this->InputBufferPos;
+  auto const isEof = (this->InputSize < this->InputBufferPos);
+  if (!isEof) {
+    pos -= this->LastTokenLength;
   }
-  if (this->InputBufferPos < this->InputBuffer.size()) {
-    buf[0] = this->InputBuffer[this->InputBufferPos++];
-    if (buf[0] == '\n') {
-      this->CurrentLine++;
-    }
-    return (1);
-  }
-  buf[0] = '\n';
-  return (0);
-}
 
-void cmCommandArgumentParserHelper::Error(const char* str)
-{
-  unsigned long pos = static_cast<unsigned long>(this->InputBufferPos);
   std::ostringstream ostr;
   ostr << str << " (" << pos << ")";
   this->SetError(ostr.str());
@@ -286,3 +276,9 @@ void cmCommandArgumentParserHelper::SetError(std::string const& msg)
     this->ErrorString = msg;
   }
 }
+
+void cmCommandArgumentParserHelper::UpdateInputPosition(int const tokenLength)
+{
+  this->InputBufferPos += tokenLength;
+  this->LastTokenLength = tokenLength;
+}

+ 6 - 5
Source/cmCommandArgumentParserHelper.h

@@ -25,7 +25,7 @@ public:
   cmCommandArgumentParserHelper& operator=(
     cmCommandArgumentParserHelper const&) = delete;
 
-  int ParseString(const char* str, int verb);
+  int ParseString(std::string const& str, int verb);
 
   // For the lexer:
   void AllocateParserType(cmCommandArgumentParserHelper::ParserType* pt,
@@ -33,7 +33,6 @@ public:
   bool HandleEscapeSymbol(cmCommandArgumentParserHelper::ParserType* pt,
                           char symbol);
 
-  int LexInput(char* buf, int maxlen);
   void Error(const char* str);
 
   // For yacc
@@ -46,6 +45,8 @@ public:
 
   void SetMakefile(const cmMakefile* mf);
 
+  void UpdateInputPosition(int tokenLength);
+
   std::string& GetResult() { return this->Result; }
 
   void SetLineFile(long line, const char* file);
@@ -57,8 +58,9 @@ public:
   const char* GetError() { return this->ErrorString.c_str(); }
 
 private:
-  std::string::size_type InputBufferPos;
-  std::string InputBuffer;
+  std::string::size_type InputBufferPos{ 1 };
+  std::string::size_type LastTokenLength{};
+  std::string::size_type InputSize{};
   std::vector<char> OutputBuffer;
 
   void Print(const char* place, const char* str);
@@ -75,7 +77,6 @@ private:
   std::string ErrorString;
   const char* FileName;
   long FileLine;
-  int CurrentLine;
   int Verbose;
   bool EscapeQuotes;
   bool NoEscapeMode;

+ 1 - 1
Source/cmMakefile.cxx

@@ -2898,7 +2898,7 @@ MessageType cmMakefile::ExpandVariablesInStringOld(
   parser.SetNoEscapeMode(noEscapes);
   parser.SetReplaceAtSyntax(replaceAt);
   parser.SetRemoveEmpty(removeEmpty);
-  int res = parser.ParseString(source.c_str(), 0);
+  int res = parser.ParseString(source, 0);
   const char* emsg = parser.GetError();
   MessageType mtype = MessageType::LOG;
   if (res && !emsg[0]) {

+ 1 - 1
Tests/RunCMake/Syntax/CMP0053-Dollar-OLD-stderr.txt

@@ -19,6 +19,6 @@ CMake Error at CMP0053-Dollar-OLD.cmake:6 \(message\):
 
     -->\${\$}<--
 
-  syntax error, unexpected \$, expecting } \(7\)
+  syntax error, unexpected \$, expecting } \(6\)
 Call Stack \(most recent call first\):
   CMakeLists.txt:3 \(include\)$

+ 1 - 1
Tests/RunCMake/Syntax/NameWithCarriageReturn-stderr.txt

@@ -7,6 +7,6 @@
 
     \${var\\rwith\\rcarriagereturn}
 
-  syntax error, unexpected cal_SYMBOL, expecting } \(7\)
+  syntax error, unexpected cal_SYMBOL, expecting } \(6\)
 Call Stack \(most recent call first\):
   CMakeLists.txt:3 \(include\)$

+ 1 - 1
Tests/RunCMake/Syntax/NameWithCarriageReturnQuoted-stderr.txt

@@ -7,6 +7,6 @@
 
     \${var\\rwith\\rcarriagereturn}
 
-  syntax error, unexpected cal_SYMBOL, expecting } \(7\)
+  syntax error, unexpected cal_SYMBOL, expecting } \(6\)
 Call Stack \(most recent call first\):
   CMakeLists.txt:3 \(include\)$

+ 1 - 1
Tests/RunCMake/Syntax/NameWithEscapedSpaces-stderr.txt

@@ -7,6 +7,6 @@
 
     \${var\\ with\\ escaped\\ space}
 
-  syntax error, unexpected cal_SYMBOL, expecting } \(7\)
+  syntax error, unexpected cal_SYMBOL, expecting } \(6\)
 Call Stack \(most recent call first\):
   CMakeLists.txt:3 \(include\)$

+ 1 - 1
Tests/RunCMake/Syntax/NameWithEscapedSpacesQuoted-stderr.txt

@@ -7,6 +7,6 @@
 
     \${var\\ with\\ escaped\\ space}
 
-  syntax error, unexpected cal_SYMBOL, expecting } \(7\)
+  syntax error, unexpected cal_SYMBOL, expecting } \(6\)
 Call Stack \(most recent call first\):
   CMakeLists.txt:3 \(include\)$

+ 1 - 1
Tests/RunCMake/Syntax/NameWithNewline-stderr.txt

@@ -7,6 +7,6 @@
 
     \${var\\nwith\\nnewline}
 
-  syntax error, unexpected cal_SYMBOL, expecting } \(7\)
+  syntax error, unexpected cal_SYMBOL, expecting } \(6\)
 Call Stack \(most recent call first\):
   CMakeLists.txt:3 \(include\)$

+ 1 - 1
Tests/RunCMake/Syntax/NameWithNewlineQuoted-stderr.txt

@@ -7,6 +7,6 @@
 
     \${var\\nwith\\nnewline}
 
-  syntax error, unexpected cal_SYMBOL, expecting } \(7\)
+  syntax error, unexpected cal_SYMBOL, expecting } \(6\)
 Call Stack \(most recent call first\):
   CMakeLists.txt:3 \(include\)$

+ 1 - 1
Tests/RunCMake/Syntax/NameWithSpaces-stderr.txt

@@ -7,6 +7,6 @@
 
     \${var
 
-  syntax error, unexpected \$end, expecting } \(5\)
+  syntax error, unexpected \$end, expecting } \(6\)
 Call Stack \(most recent call first\):
   CMakeLists.txt:3 \(include\)$

+ 1 - 1
Tests/RunCMake/Syntax/NameWithSpacesQuoted-stderr.txt

@@ -7,6 +7,6 @@
 
     \${var with space}
 
-  syntax error, unexpected cal_SYMBOL, expecting } \(17\)
+  syntax error, unexpected cal_SYMBOL, expecting } \(3\)
 Call Stack \(most recent call first\):
   CMakeLists.txt:3 \(include\)$

+ 1 - 1
Tests/RunCMake/Syntax/NameWithTabs-stderr.txt

@@ -7,6 +7,6 @@
 
     \${var
 
-  syntax error, unexpected \$end, expecting } \(5\)
+  syntax error, unexpected \$end, expecting } \(6\)
 Call Stack \(most recent call first\):
   CMakeLists.txt:3 \(include\)$

+ 1 - 1
Tests/RunCMake/Syntax/NameWithTabsQuoted-stderr.txt

@@ -7,6 +7,6 @@
 
     \${var	with	tab}
 
-  syntax error, unexpected cal_SYMBOL, expecting } \(15\)
+  syntax error, unexpected cal_SYMBOL, expecting } \(3\)
 Call Stack \(most recent call first\):
   CMakeLists.txt:3 \(include\)$

+ 1 - 1
Tests/RunCMake/Syntax/ParenInENV-stderr.txt

@@ -15,6 +15,6 @@ CMake Error at ParenInENV.cmake:2 \(message\):
 
     -->\$ENV{e
 
-  syntax error, unexpected \$end, expecting } \(9\)
+  syntax error, unexpected \$end, expecting } \(10\)
 Call Stack \(most recent call first\):
   CMakeLists.txt:3 \(include\)

+ 1 - 1
Tests/RunCMake/Syntax/ParenInVarName1-stderr.txt

@@ -7,6 +7,6 @@ CMake Error at ParenInVarName1.cmake:4 \(message\):
 
     -->\${e\(x\)}<--
 
-  syntax error, unexpected cal_SYMBOL, expecting } \(10\)
+  syntax error, unexpected cal_SYMBOL, expecting } \(6\)
 Call Stack \(most recent call first\):
   CMakeLists.txt:3 \(include\)

+ 1 - 1
Tests/RunCMake/Syntax/UnterminatedBrace0-stderr.txt

@@ -7,6 +7,6 @@ CMake Error at UnterminatedBrace0.cmake:2 \(set\):
 
     \${
 
-  syntax error, unexpected \$end, expecting } \(2\)
+  syntax error, unexpected \$end, expecting } \(3\)
 Call Stack \(most recent call first\):
   CMakeLists.txt:3 \(include\)

+ 1 - 1
Tests/RunCMake/Syntax/UnterminatedBrace1-stderr.txt

@@ -7,7 +7,7 @@ CMake Warning \(dev\) at UnterminatedBrace1.cmake:3 \(set\):
 
     \${
 
-  syntax error, unexpected \$end, expecting } \(2\)
+  syntax error, unexpected \$end, expecting } \(3\)
 Call Stack \(most recent call first\):
   CMakeLists.txt:3 \(include\)
 This warning is for project developers.  Use -Wno-dev to suppress it.