ERMParser.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528
  1. /*
  2. * ERMParser.cpp, part of VCMI engine
  3. *
  4. * Authors: listed in file AUTHORS in main folder
  5. *
  6. * License: GNU General Public License v2.0 or later
  7. * Full text of license available in license.txt file, in main folder
  8. *
  9. */
  10. #include "StdInc.h"
  11. #include "ERMParser.h"
  12. #include <boost/spirit/include/qi.hpp>
  13. #include <boost/spirit/include/phoenix_core.hpp>
  14. #include <boost/spirit/include/phoenix_operator.hpp>
  15. #include <boost/spirit/include/phoenix_fusion.hpp>
  16. #include <boost/spirit/include/phoenix_stl.hpp>
  17. #include <boost/spirit/include/phoenix_object.hpp>
  18. #include <boost/fusion/include/adapt_struct.hpp>
  19. namespace qi = boost::spirit::qi;
  20. namespace ascii = spirit::ascii;
  21. namespace phoenix = boost::phoenix;
  22. //Greenspun's Tenth Rule of Programming:
  23. //Any sufficiently complicated C or Fortran program contains an ad hoc, informally-specified,
  24. //bug-ridden, slow implementation of half of Common Lisp.
  25. //actually these macros help in dealing with boost::variant
  26. CERMPreprocessor::CERMPreprocessor(const std::string & source)
  27. : sourceStream(source),
  28. lineNo(0),
  29. version(Version::INVALID)
  30. {
  31. //check header
  32. std::string header;
  33. getline(header);
  34. if(header == "ZVSE")
  35. version = Version::ERM;
  36. else if(header == "VERM")
  37. version = Version::VERM;
  38. else
  39. logGlobal->error("File %s has wrong header", fname);
  40. }
  41. class ParseErrorException : public std::exception
  42. {
  43. };
  44. std::string CERMPreprocessor::retrieveCommandLine()
  45. {
  46. std::string wholeCommand;
  47. //parse file
  48. bool verm = false;
  49. bool openedString = false;
  50. int openedBraces = 0;
  51. while(sourceStream.good())
  52. {
  53. std::string line ;
  54. getline(line); //reading line
  55. size_t dash = line.find_first_of('^');
  56. bool inTheMiddle = openedBraces || openedString;
  57. if(!inTheMiddle)
  58. {
  59. if(line.size() < 2)
  60. continue;
  61. if(line[0] != '!' ) //command lines must begin with ! -> otherwise treat as comment
  62. continue;
  63. verm = line[1] == '[';
  64. }
  65. if(openedString)
  66. {
  67. wholeCommand += "\n";
  68. if(dash != std::string::npos)
  69. {
  70. wholeCommand += line.substr(0, dash + 1);
  71. line.erase(0,dash + 1);
  72. openedString = false;
  73. }
  74. else //no closing marker -> the whole line is further part of string
  75. {
  76. wholeCommand += line;
  77. continue;
  78. }
  79. }
  80. int i = 0;
  81. for(; i < line.length(); i++)
  82. {
  83. char c = line[i];
  84. if(!openedString)
  85. {
  86. if(c == '[')
  87. openedBraces++;
  88. else if(c == ']')
  89. {
  90. openedBraces--;
  91. if(!openedBraces) //the last brace has been matched -> stop "parsing", everything else in the line is comment
  92. {
  93. i++;
  94. break;
  95. }
  96. }
  97. else if(c == '^')
  98. openedString = true;
  99. else if(c == ';' && !verm) //do not allow comments inside VExp for now
  100. {
  101. line.erase(i+!verm, line.length() - i - !verm); //leave ';' at the end only at ERM commands
  102. break;
  103. }
  104. // else if(c == ';') // a ';' that is in command line (and not in string) ends the command -> throw away rest
  105. // {
  106. // line.erase(i+!verm, line.length() - i - !verm); //leave ';' at the end only at ERM commands
  107. // break;
  108. // }
  109. }
  110. else if(c == '^')
  111. openedString = false;
  112. }
  113. if(verm && !openedBraces && i < line.length())
  114. {
  115. line.erase(i, line.length() - i);
  116. }
  117. if(wholeCommand.size()) //separate lines with a space
  118. wholeCommand += " ";
  119. wholeCommand += line;
  120. if(!openedBraces && !openedString)
  121. return wholeCommand;
  122. //loop end
  123. }
  124. if(openedBraces || openedString)
  125. {
  126. logGlobal->error("Ill-formed file: %s", fname);
  127. throw ParseErrorException();
  128. }
  129. return "";
  130. }
  131. void CERMPreprocessor::getline(std::string &ret)
  132. {
  133. lineNo++;
  134. std::getline(sourceStream, ret);
  135. boost::trim(ret); //get rid of wspace
  136. }
  137. ERMParser::ERMParser()
  138. {
  139. ERMgrammar = std::make_shared<ERM::ERM_grammar<std::string::const_iterator>>();
  140. }
  141. ERMParser::~ERMParser() = default;
  142. std::vector<LineInfo> ERMParser::parseFile(CERMPreprocessor & preproc)
  143. {
  144. std::vector<LineInfo> ret;
  145. try
  146. {
  147. while(1)
  148. {
  149. std::string command = preproc.retrieveCommandLine();
  150. if(command.length() == 0)
  151. break;
  152. repairEncoding(command);
  153. LineInfo li;
  154. li.realLineNum = preproc.getCurLineNo();
  155. li.tl = parseLine(command, li.realLineNum);
  156. ret.push_back(li);
  157. }
  158. }
  159. catch (ParseErrorException & e)
  160. {
  161. logGlobal->error("Stopped parsing file.");
  162. throw;
  163. }
  164. return ret;
  165. }
  166. BOOST_FUSION_ADAPT_STRUCT(
  167. ERM::TStringConstant,
  168. (std::string, str)
  169. )
  170. BOOST_FUSION_ADAPT_STRUCT(
  171. ERM::TMacroUsage,
  172. (std::string, macro)
  173. )
  174. // BOOST_FUSION_ADAPT_STRUCT(
  175. // ERM::TQMacroUsage,
  176. // (std::string, qmacro)
  177. // )
  178. BOOST_FUSION_ADAPT_STRUCT(
  179. ERM::TMacroDef,
  180. (std::string, macro)
  181. )
  182. BOOST_FUSION_ADAPT_STRUCT(
  183. ERM::TVarExpNotMacro,
  184. (boost::optional<char>, questionMark)
  185. (std::string, varsym)
  186. (ERM::TVarExpNotMacro::Tval, val)
  187. )
  188. BOOST_FUSION_ADAPT_STRUCT(
  189. ERM::TArithmeticOp,
  190. (ERM::TIexp, lhs)
  191. (char, opcode)
  192. (ERM::TIexp, rhs)
  193. )
  194. BOOST_FUSION_ADAPT_STRUCT(
  195. ERM::TVarpExp,
  196. (ERM::TVarExp, var)
  197. )
  198. BOOST_FUSION_ADAPT_STRUCT(
  199. ERM::TVRLogic,
  200. (char, opcode)
  201. (ERM::TIexp, var)
  202. )
  203. BOOST_FUSION_ADAPT_STRUCT(
  204. ERM::TVRArithmetic,
  205. (char, opcode)
  206. (ERM::TIexp, rhs)
  207. )
  208. BOOST_FUSION_ADAPT_STRUCT(
  209. ERM::TNormalBodyOption,
  210. (char, optionCode)
  211. (boost::optional<ERM::TNormalBodyOptionList>, params)
  212. )
  213. BOOST_FUSION_ADAPT_STRUCT(
  214. ERM::Ttrigger,
  215. (ERM::TCmdName, name)
  216. (boost::optional<ERM::Tidentifier>, identifier)
  217. (boost::optional<ERM::Tcondition>, condition)
  218. )
  219. BOOST_FUSION_ADAPT_STRUCT(
  220. ERM::TComparison,
  221. (ERM::TIexp, lhs)
  222. (std::string, compSign)
  223. (ERM::TIexp, rhs)
  224. )
  225. BOOST_FUSION_ADAPT_STRUCT(
  226. ERM::TSemiCompare,
  227. (std::string, compSign)
  228. (ERM::TIexp, rhs)
  229. )
  230. BOOST_FUSION_ADAPT_STRUCT(
  231. ERM::TCurriedString,
  232. (ERM::TIexp, iexp)
  233. (ERM::TStringConstant, string)
  234. )
  235. BOOST_FUSION_ADAPT_STRUCT(
  236. ERM::TVarConcatString,
  237. (ERM::TVarExp, var)
  238. (ERM::TStringConstant, string)
  239. )
  240. BOOST_FUSION_ADAPT_STRUCT(
  241. ERM::Tcondition,
  242. (char, ctype)
  243. (ERM::Tcondition::Tcond, cond)
  244. (ERM::TconditionNode, rhs)
  245. )
  246. BOOST_FUSION_ADAPT_STRUCT(
  247. ERM::Tinstruction,
  248. (ERM::TCmdName, name)
  249. (boost::optional<ERM::Tidentifier>, identifier)
  250. (boost::optional<ERM::Tcondition>, condition)
  251. (ERM::Tbody, body)
  252. )
  253. BOOST_FUSION_ADAPT_STRUCT(
  254. ERM::Treceiver,
  255. (ERM::TCmdName, name)
  256. (boost::optional<ERM::Tidentifier>, identifier)
  257. (boost::optional<ERM::Tcondition>, condition)
  258. (boost::optional<ERM::Tbody>, body)
  259. )
  260. BOOST_FUSION_ADAPT_STRUCT(
  261. ERM::TPostTrigger,
  262. (ERM::TCmdName, name)
  263. (boost::optional<ERM::Tidentifier>, identifier)
  264. (boost::optional<ERM::Tcondition>, condition)
  265. )
  266. //BOOST_FUSION_ADAPT_STRUCT(
  267. // ERM::Tcommand,
  268. // (ERM::Tcommand::Tcmd, cmd)
  269. // (std::string, comment)
  270. // )
  271. BOOST_FUSION_ADAPT_STRUCT(
  272. ERM::Tcommand,
  273. (ERM::Tcommand::Tcmd, cmd)
  274. )
  275. BOOST_FUSION_ADAPT_STRUCT(
  276. ERM::TVExp,
  277. (std::vector<ERM::TVModifier>, modifier)
  278. (std::vector<ERM::TVOption>, children)
  279. )
  280. BOOST_FUSION_ADAPT_STRUCT(
  281. ERM::TSymbol,
  282. (std::vector<ERM::TVModifier>, symModifier)
  283. (std::string, sym)
  284. )
  285. namespace ERM
  286. {
  287. template<typename Iterator>
  288. struct ERM_grammar : qi::grammar<Iterator, TLine(), ascii::space_type>
  289. {
  290. ERM_grammar() : ERM_grammar::base_type(vline, "VERM script line")
  291. {
  292. //do not build too complicated expressions, e.g. (a >> b) | c, qi has problems with them
  293. ERMmacroUsage %= qi::lexeme[qi::lit('$') >> *(qi::char_ - '$') >> qi::lit('$')];
  294. ERMmacroDef %= qi::lexeme[qi::lit('@') >> *(qi::char_ - '@') >> qi::lit('@')];
  295. varExpNotMacro %= -qi::char_("?") >> (+(qi::char_("a-z") - 'u')) >> -qi::int_;
  296. //TODO: mixed var/macro expressions like in !!HE-1&407:Id$cost$; [script 13]
  297. /*qERMMacroUsage %= qi::lexeme[qi::lit("?$") >> *(qi::char_ - '$') >> qi::lit('$')];*/
  298. varExp %= varExpNotMacro | ERMmacroUsage;
  299. iexp %= varExp | qi::int_;
  300. varp %= qi::lit("?") >> varExp;
  301. comment %= *qi::char_;
  302. commentLine %= (~qi::char_("!") >> comment | (qi::char_('!') >> (~qi::char_("?!$#[")) >> comment ));
  303. cmdName %= qi::lexeme[qi::repeat(2)[qi::char_]];
  304. arithmeticOp %= iexp >> qi::char_ >> iexp;
  305. //???
  306. //identifier is usually a vector of i-expressions but VR receiver performs arithmetic operations on it
  307. //identifier %= (iexp | arithmeticOp) % qi::lit('/');
  308. identifier %= iexp % qi::lit('/');
  309. comparison %= iexp >> (*qi::char_("<=>")) >> iexp;
  310. condition %= qi::char_("&|/") >> (comparison | qi::int_) >> -condition;
  311. trigger %= cmdName >> -identifier >> -condition > qi::lit(";"); /////
  312. string %= qi::lexeme['^' >> *(qi::char_ - '^') >> '^'];
  313. VRLogic %= qi::char_("&|") >> iexp;
  314. VRarithmetic %= qi::char_("+*:/%-") >> iexp;
  315. semiCompare %= +qi::char_("<=>") >> iexp;
  316. curStr %= iexp >> string;
  317. varConcatString %= varExp >> qi::lit("+") >> string;
  318. bodyOptionItem %= varConcatString | curStr | string | semiCompare | ERMmacroDef | varp | iexp ;
  319. exactBodyOptionList %= (bodyOptionItem % qi::lit("/"));
  320. normalBodyOption = qi::char_("A-Z") > -(exactBodyOptionList);
  321. bodyOption %= VRLogic | VRarithmetic | normalBodyOption;
  322. body %= qi::lit(":") >> *(bodyOption) > qi::lit(";");
  323. instruction %= cmdName >> -identifier >> -condition >> body;
  324. receiver %= cmdName >> -identifier >> -condition >> body;
  325. postTrigger %= cmdName >> -identifier >> -condition > qi::lit(";");
  326. command %= (qi::lit("!") >>
  327. (
  328. (qi::lit("?") >> trigger) |
  329. (qi::lit("!") >> receiver) |
  330. (qi::lit("#") >> instruction) |
  331. (qi::lit("$") >> postTrigger)
  332. ) //>> comment
  333. );
  334. rline %=
  335. (
  336. command | commentLine | spirit::eps
  337. );
  338. vmod %= qi::string("`") | qi::string(",!") | qi::string(",") | qi::string("#'") | qi::string("'");
  339. vsym %= *vmod >> qi::lexeme[+qi::char_("+*/$%&_=<>~a-zA-Z0-9-")];
  340. qi::real_parser<double, qi::strict_real_policies<double> > strict_double;
  341. vopt %= qi::lexeme[(qi::lit("!") >> qi::char_ >> qi::lit("!"))] | qi::lexeme[strict_double] | qi::lexeme[qi::int_] | command | vexp | string | vsym;
  342. vexp %= *vmod >> qi::lit("[") >> *(vopt) >> qi::lit("]");
  343. vline %= (( qi::lit("!") >>vexp) | rline ) > spirit::eoi;
  344. //error handling
  345. string.name("string constant");
  346. ERMmacroUsage.name("macro usage");
  347. /*qERMMacroUsage.name("macro usage with ?");*/
  348. ERMmacroDef.name("macro definition");
  349. varExpNotMacro.name("variable expression (not macro)");
  350. varExp.name("variable expression");
  351. iexp.name("i-expression");
  352. comment.name("comment");
  353. commentLine.name("comment line");
  354. cmdName.name("name of a command");
  355. identifier.name("identifier");
  356. condition.name("condition");
  357. trigger.name("trigger");
  358. body.name("body");
  359. instruction.name("instruction");
  360. receiver.name("receiver");
  361. postTrigger.name("post trigger");
  362. command.name("command");
  363. rline.name("ERM script line");
  364. vsym.name("V symbol");
  365. vopt.name("V option");
  366. vexp.name("V expression");
  367. vline.name("VERM line");
  368. qi::on_error<qi::fail>
  369. (
  370. vline
  371. , std::cout //or phoenix::ref(std::count), is there any difference?
  372. << phoenix::val("Error! Expecting ")
  373. << qi::_4 // what failed?
  374. << phoenix::val(" here: \"")
  375. << phoenix::construct<std::string>(qi::_3, qi::_2) // iterators to error-pos, end
  376. << phoenix::val("\"")
  377. );
  378. }
  379. qi::rule<Iterator, TStringConstant(), ascii::space_type> string;
  380. qi::rule<Iterator, TMacroUsage(), ascii::space_type> ERMmacroUsage;
  381. /*qi::rule<Iterator, TQMacroUsage(), ascii::space_type> qERMMacroUsage;*/
  382. qi::rule<Iterator, TMacroDef(), ascii::space_type> ERMmacroDef;
  383. qi::rule<Iterator, TVarExpNotMacro(), ascii::space_type> varExpNotMacro;
  384. qi::rule<Iterator, TVarExp(), ascii::space_type> varExp;
  385. qi::rule<Iterator, TIexp(), ascii::space_type> iexp;
  386. qi::rule<Iterator, TVarpExp(), ascii::space_type> varp;
  387. qi::rule<Iterator, TArithmeticOp(), ascii::space_type> arithmeticOp;
  388. qi::rule<Iterator, std::string(), ascii::space_type> comment;
  389. qi::rule<Iterator, std::string(), ascii::space_type> commentLine;
  390. qi::rule<Iterator, TCmdName(), ascii::space_type> cmdName;
  391. qi::rule<Iterator, Tidentifier(), ascii::space_type> identifier;
  392. qi::rule<Iterator, TComparison(), ascii::space_type> comparison;
  393. qi::rule<Iterator, Tcondition(), ascii::space_type> condition;
  394. qi::rule<Iterator, TVRLogic(), ascii::space_type> VRLogic;
  395. qi::rule<Iterator, TVRArithmetic(), ascii::space_type> VRarithmetic;
  396. qi::rule<Iterator, TSemiCompare(), ascii::space_type> semiCompare;
  397. qi::rule<Iterator, TCurriedString(), ascii::space_type> curStr;
  398. qi::rule<Iterator, TVarConcatString(), ascii::space_type> varConcatString;
  399. qi::rule<Iterator, TBodyOptionItem(), ascii::space_type> bodyOptionItem;
  400. qi::rule<Iterator, TNormalBodyOptionList(), ascii::space_type> exactBodyOptionList;
  401. qi::rule<Iterator, TNormalBodyOption(), ascii::space_type> normalBodyOption;
  402. qi::rule<Iterator, TBodyOption(), ascii::space_type> bodyOption;
  403. qi::rule<Iterator, Ttrigger(), ascii::space_type> trigger;
  404. qi::rule<Iterator, Tbody(), ascii::space_type> body;
  405. qi::rule<Iterator, Tinstruction(), ascii::space_type> instruction;
  406. qi::rule<Iterator, Treceiver(), ascii::space_type> receiver;
  407. qi::rule<Iterator, TPostTrigger(), ascii::space_type> postTrigger;
  408. qi::rule<Iterator, Tcommand(), ascii::space_type> command;
  409. qi::rule<Iterator, TERMline(), ascii::space_type> rline;
  410. qi::rule<Iterator, TSymbol(), ascii::space_type> vsym;
  411. qi::rule<Iterator, TVModifier(), ascii::space_type> vmod;
  412. qi::rule<Iterator, TVOption(), ascii::space_type> vopt;
  413. qi::rule<Iterator, TVExp(), ascii::space_type> vexp;
  414. qi::rule<Iterator, TLine(), ascii::space_type> vline;
  415. };
  416. }
  417. ERM::TLine ERMParser::parseLine(const std::string & line, int realLineNo)
  418. {
  419. try
  420. {
  421. return parseLine(line);
  422. }
  423. catch(...)
  424. {
  425. //logGlobal->error("Parse error occurred in file %s (line %d): %s", fname, realLineNo, line);
  426. throw;
  427. }
  428. }
  429. ERM::TLine ERMParser::parseLine(const std::string & line)
  430. {
  431. auto beg = line.begin();
  432. auto end = line.end();
  433. ERM::TLine AST;
  434. bool r = qi::phrase_parse(beg, end, *ERMgrammar.get(), ascii::space, AST);
  435. if(!r || beg != end)
  436. {
  437. logGlobal->error("Parse error: cannot parse: %s", std::string(beg, end));
  438. throw ParseErrorException();
  439. }
  440. return AST;
  441. }
  442. void ERMParser::repairEncoding(std::string & str) const
  443. {
  444. for(int g=0; g<str.size(); ++g)
  445. if(str[g] & 0x80)
  446. str[g] = '|';
  447. }
  448. void ERMParser::repairEncoding(char * str, int len) const
  449. {
  450. for(int g=0; g<len; ++g)
  451. if(str[g] & 0x80)
  452. str[g] = '|';
  453. }