ERMParser.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564
  1. #define VCMI_DLL
  2. #include "ERMParser.h"
  3. #include <boost/version.hpp>
  4. //To make compilation with older boost versions possible
  5. //Don't know exact version - 1.46 works while 1.42 not
  6. #if BOOST_VERSION >= 104600
  7. #include <boost/spirit/include/qi.hpp>
  8. #include <boost/bind.hpp>
  9. #include <boost/spirit/include/phoenix_core.hpp>
  10. #include <boost/spirit/include/phoenix_operator.hpp>
  11. #include <boost/spirit/include/phoenix_fusion.hpp>
  12. #include <boost/spirit/include/phoenix_stl.hpp>
  13. #include <boost/spirit/include/phoenix_object.hpp>
  14. #include <boost/fusion/include/adapt_struct.hpp>
  15. #include <fstream>
  16. #include <boost/algorithm/string/trim.hpp>
  17. namespace spirit = boost::spirit;
  18. namespace qi = boost::spirit::qi;
  19. namespace ascii = spirit::ascii;
  20. namespace phoenix = boost::phoenix;
  21. /*
  22. * ERMParser.cpp, part of VCMI engine
  23. *
  24. * Authors: listed in file AUTHORS in main folder
  25. *
  26. * License: GNU General Public License v2.0 or later
  27. * Full text of license available in license.txt file, in main folder
  28. *
  29. */
  30. //Greenspun's Tenth Rule of Programming:
  31. //Any sufficiently complicated C or Fortran program contains an ad hoc, informally-specified,
  32. //bug-ridden, slow implementation of half of Common Lisp.
  33. //actually these macros help in dealing with boost::variant
  34. #define BEGIN_TYPE_CASE(LinePrinterVisitor) struct LinePrinterVisitor : boost::static_visitor<> \
  35. {
  36. #define FOR_TYPE(TYPE, VAR) void operator()(TYPE const& VAR) const
  37. #define DO_TYPE_CASE(LinePrinterVisitor, VAR) } ___UN; boost::apply_visitor(___UN, VAR);
  38. CERMPreprocessor::CERMPreprocessor(const std::string &Fname) : fname(Fname), file(Fname), lineNo(0), version(INVALID)
  39. {
  40. if(!file.is_open())
  41. {
  42. tlog1 << "File " << Fname << " not found or unable to open\n";
  43. return;
  44. }
  45. //check header
  46. std::string header;
  47. getline(header);
  48. if(header == "ZVSE")
  49. version = ERM;
  50. else if(header == "VERM")
  51. version = VERM;
  52. else
  53. {
  54. tlog1 << "File " << fname << " has wrong header\n";
  55. return;
  56. }
  57. }
  58. class ParseErrorException : public std::exception
  59. {
  60. };
  61. std::string CERMPreprocessor::retreiveCommandLine()
  62. {
  63. std::string wholeCommand;
  64. //parse file
  65. bool verm = false;
  66. bool openedString = false;
  67. int openedBraces = 0;
  68. while(file.good())
  69. {
  70. std::string line ;
  71. getline(line); //reading line
  72. int dash = line.find_first_of('^');
  73. bool inTheMiddle = openedBraces || openedString;
  74. if(!inTheMiddle)
  75. {
  76. if(line.size() < 2)
  77. continue;
  78. if(line[0] != '!' ) //command lines must begin with ! -> otherwise treat as comment
  79. continue;
  80. verm = line[1] == '[';
  81. }
  82. if(openedString)
  83. {
  84. wholeCommand += "\\n";
  85. if(dash != std::string::npos)
  86. {
  87. wholeCommand += line.substr(0, dash);
  88. line.erase(0,dash);
  89. }
  90. else //no closing marker -> the whole line is further part of string
  91. {
  92. wholeCommand += line;
  93. continue;
  94. }
  95. }
  96. int i = 0;
  97. for(; i < line.length(); i++)
  98. {
  99. char c = line[i];
  100. if(!openedString)
  101. {
  102. if(c == '[')
  103. openedBraces++;
  104. else if(c == ']')
  105. {
  106. openedBraces--;
  107. if(!openedBraces) //the last brace has been matched -> stop "parsing", everything else in the line is comment
  108. {
  109. i++;
  110. break;
  111. }
  112. }
  113. else if(c == '^')
  114. openedString = true;
  115. else if(c == ';') // a ';' that is in command line (and not in string) ends the command -> throw away rest
  116. {
  117. line.erase(i+!verm, line.length() - i - !verm); //leave ';' at the end only at ERM commands
  118. break;
  119. }
  120. }
  121. else if(c == '^')
  122. openedString = false;
  123. }
  124. if(verm && !openedBraces && i < line.length())
  125. {
  126. line.erase(i, line.length() - i);
  127. }
  128. if(wholeCommand.size()) //separate lines with a space
  129. wholeCommand += " ";
  130. wholeCommand += line;
  131. if(!openedBraces && !openedString)
  132. return wholeCommand;
  133. //loop end
  134. }
  135. if(openedBraces || openedString)
  136. tlog1 << "Ill-formed file: " << fname << std::endl;
  137. return "";
  138. }
  139. void CERMPreprocessor::getline(std::string &ret)
  140. {
  141. lineNo++;
  142. std::getline(file, ret);
  143. boost::trim(ret); //get rid of wspace
  144. }
  145. ERMParser::ERMParser(std::string file)
  146. :srcFile(file)
  147. {}
  148. std::vector<ERM::TLine> ERMParser::parseFile()
  149. {
  150. CERMPreprocessor preproc(srcFile);
  151. std::vector<ERM::TLine> ret;
  152. try
  153. {
  154. while(1)
  155. {
  156. std::string command = preproc.retreiveCommandLine();
  157. if(command.length() == 0)
  158. break;
  159. repairEncoding(command);
  160. ret.push_back(parseLine(command));
  161. }
  162. }
  163. catch (ParseErrorException & e)
  164. {
  165. tlog1 << "stopped parsing file" << std::endl;
  166. }
  167. return ret;
  168. }
  169. BOOST_FUSION_ADAPT_STRUCT(
  170. ERM::TStringConstant,
  171. (std::string, str)
  172. )
  173. BOOST_FUSION_ADAPT_STRUCT(
  174. ERM::TMacroUsage,
  175. (std::string, macro)
  176. )
  177. BOOST_FUSION_ADAPT_STRUCT(
  178. ERM::TQMacroUsage,
  179. (std::string, qmacro)
  180. )
  181. BOOST_FUSION_ADAPT_STRUCT(
  182. ERM::TMacroDef,
  183. (std::string, macro)
  184. )
  185. BOOST_FUSION_ADAPT_STRUCT(
  186. ERM::TVarExpNotMacro,
  187. (boost::optional<char>, questionMark)
  188. (std::string, varsym)
  189. (ERM::TVarExpNotMacro::Tval, val)
  190. )
  191. BOOST_FUSION_ADAPT_STRUCT(
  192. ERM::TArithmeticOp,
  193. (ERM::TIexp, lhs)
  194. (char, opcode)
  195. (ERM::TIexp, rhs)
  196. )
  197. BOOST_FUSION_ADAPT_STRUCT(
  198. ERM::TVarpExp,
  199. (ERM::TVarpExp::Tvartype, var)
  200. )
  201. BOOST_FUSION_ADAPT_STRUCT(
  202. ERM::TVRLogic,
  203. (char, opcode)
  204. (ERM::TIexp, var)
  205. )
  206. BOOST_FUSION_ADAPT_STRUCT(
  207. ERM::TVRArithmetic,
  208. (char, opcode)
  209. (ERM::TIexp, rhs)
  210. )
  211. BOOST_FUSION_ADAPT_STRUCT(
  212. ERM::TNormalBodyOption,
  213. (char, optionCode)
  214. (ERM::TNormalBodyOptionList, params)
  215. )
  216. BOOST_FUSION_ADAPT_STRUCT(
  217. ERM::Ttrigger,
  218. (ERM::TCmdName, name)
  219. (boost::optional<ERM::Tidentifier>, identifier)
  220. (boost::optional<ERM::Tcondition>, condition)
  221. )
  222. BOOST_FUSION_ADAPT_STRUCT(
  223. ERM::TComparison,
  224. (ERM::TIexp, lhs)
  225. (std::string, compSign)
  226. (ERM::TIexp, rhs)
  227. )
  228. BOOST_FUSION_ADAPT_STRUCT(
  229. ERM::TSemiCompare,
  230. (std::string, compSign)
  231. (ERM::TIexp, rhs)
  232. )
  233. BOOST_FUSION_ADAPT_STRUCT(
  234. ERM::TCurriedString,
  235. (ERM::TIexp, iexp)
  236. (ERM::TStringConstant, string)
  237. )
  238. BOOST_FUSION_ADAPT_STRUCT(
  239. ERM::TVarConcatString,
  240. (ERM::TVarExp, var)
  241. (ERM::TStringConstant, string)
  242. )
  243. BOOST_FUSION_ADAPT_STRUCT(
  244. ERM::Tcondition,
  245. (char, ctype)
  246. (ERM::Tcondition::Tcond, cond)
  247. (ERM::TconditionNode, rhs)
  248. )
  249. BOOST_FUSION_ADAPT_STRUCT(
  250. ERM::Tinstruction,
  251. (ERM::TCmdName, name)
  252. (boost::optional<ERM::Tidentifier>, identifier)
  253. (boost::optional<ERM::Tcondition>, condition)
  254. (ERM::Tbody, body)
  255. )
  256. BOOST_FUSION_ADAPT_STRUCT(
  257. ERM::Treceiver,
  258. (ERM::TCmdName, name)
  259. (boost::optional<ERM::Tidentifier>, identifier)
  260. (boost::optional<ERM::Tcondition>, condition)
  261. (boost::optional<ERM::Tbody>, body)
  262. )
  263. BOOST_FUSION_ADAPT_STRUCT(
  264. ERM::TPostTrigger,
  265. (ERM::TCmdName, name)
  266. (boost::optional<ERM::Tidentifier>, identifier)
  267. (boost::optional<ERM::Tcondition>, condition)
  268. )
  269. BOOST_FUSION_ADAPT_STRUCT(
  270. ERM::Tcommand,
  271. (ERM::Tcommand::Tcmd, cmd)
  272. (std::string, comment)
  273. )
  274. BOOST_FUSION_ADAPT_STRUCT(
  275. ERM::TVExp,
  276. (std::vector<ERM::TVModifier>, modifier)
  277. (std::vector<ERM::TVOption>, children)
  278. )
  279. BOOST_FUSION_ADAPT_STRUCT(
  280. ERM::TSymbol,
  281. (std::vector<ERM::TVModifier>, symModifier)
  282. (std::string, sym)
  283. )
  284. namespace ERM
  285. {
  286. template<typename Iterator>
  287. struct ERM_grammar : qi::grammar<Iterator, TLine(), ascii::space_type>
  288. {
  289. ERM_grammar() : ERM_grammar::base_type(vline, "VERM script line")
  290. {
  291. //do not build too complicated expressions, e.g. (a >> b) | c, qi has problems with them
  292. ERMmacroUsage %= qi::lexeme[qi::lit('$') >> *(qi::char_ - '$') >> qi::lit('$')];
  293. ERMmacroDef %= qi::lexeme[qi::lit('@') >> *(qi::char_ - '@') >> qi::lit('@')];
  294. varExpNotMacro %= -qi::char_("?") >> (+(qi::char_("a-z") - 'u')) >> -qi::int_;
  295. qERMMacroUsage %= qi::lexeme[qi::lit("?$") >> *(qi::char_ - '$') >> qi::lit('$')];
  296. varExp %= varExpNotMacro | ERMmacroUsage;
  297. iexp %= varExp | qi::int_;
  298. varp %=/* qi::lit("?") >> */(varExpNotMacro | qERMMacroUsage);
  299. comment %= *qi::char_;
  300. commentLine %= (~qi::char_("!") >> comment | (qi::char_('!') >> (~qi::char_("?!$#[")) >> comment ));
  301. cmdName %= qi::lexeme[qi::repeat(2)[qi::char_]];
  302. arithmeticOp %= iexp >> qi::char_ >> iexp;
  303. //identifier is usually a vector of i-expressions but VR receiver performs arithmetic operations on it
  304. identifier %= (iexp | arithmeticOp) % qi::lit('/');
  305. comparison %= iexp >> (*qi::char_("<=>")) >> iexp;
  306. condition %= qi::char_("&|X/") >> (comparison | qi::int_) >> -condition;
  307. trigger %= cmdName >> -identifier >> -condition > qi::lit(";"); /////
  308. string %= qi::lexeme['^' >> *(qi::char_ - '^') >> '^'];
  309. VRLogic %= qi::char_("&|X") >> iexp;
  310. VRarithmetic %= qi::char_("+*:/%-") >> iexp;
  311. semiCompare %= *qi::char_("<=>") >> iexp;
  312. curStr %= iexp >> string;
  313. varConcatString %= varExp >> qi::lit("+") >> string;
  314. bodyOptionItem %= varConcatString | curStr | string | semiCompare | ERMmacroUsage | ERMmacroDef | varp | iexp | qi::eps;
  315. exactBodyOptionList %= (bodyOptionItem % qi::lit("/"));
  316. normalBodyOption = qi::char_("A-Z+") > exactBodyOptionList;
  317. bodyOption %= VRLogic | VRarithmetic | normalBodyOption;
  318. body %= qi::lit(":") >> +(bodyOption) > qi::lit(";");
  319. instruction %= cmdName >> -identifier >> -condition >> body;
  320. receiver %= cmdName >> -identifier >> -condition >> -body; //receiver without body exists... change needed
  321. postTrigger %= cmdName >> -identifier >> -condition > qi::lit(";");
  322. command %= (qi::lit("!") >>
  323. (
  324. (qi::lit("?") >> trigger) |
  325. (qi::lit("!") >> receiver) |
  326. (qi::lit("#") >> instruction) |
  327. (qi::lit("$") >> postTrigger)
  328. ) >> comment
  329. );
  330. rline %=
  331. (
  332. command | commentLine | spirit::eps
  333. );
  334. vmod %= qi::string("`") | qi::string(",!") | qi::string(",") | qi::string("#'") | qi::string("'");
  335. vsym %= *vmod >> qi::lexeme[+qi::char_("+*/$%&_=<>~a-zA-Z0-9-")];
  336. qi::real_parser<double, qi::strict_real_policies<double> > strict_double;
  337. vopt %= qi::lexeme[(qi::lit("!") >> qi::char_ >> qi::lit("!"))] | qi::lexeme[strict_double] | qi::lexeme[qi::int_] | command | vexp | string | vsym;
  338. vexp %= *vmod >> qi::lit("[") >> *(vopt) >> qi::lit("]");
  339. vline %= (( qi::lit("!") >>vexp) | rline ) > spirit::eoi;
  340. //error handling
  341. string.name("string constant");
  342. ERMmacroUsage.name("macro usage");
  343. qERMMacroUsage.name("macro usage with ?");
  344. ERMmacroDef.name("macro definition");
  345. varExpNotMacro.name("variable expression (not macro)");
  346. varExp.name("variable expression");
  347. iexp.name("i-expression");
  348. comment.name("comment");
  349. commentLine.name("comment line");
  350. cmdName.name("name of a command");
  351. identifier.name("identifier");
  352. condition.name("condition");
  353. trigger.name("trigger");
  354. body.name("body");
  355. instruction.name("instruction");
  356. receiver.name("receiver");
  357. postTrigger.name("post trigger");
  358. command.name("command");
  359. rline.name("ERM script line");
  360. vsym.name("V symbol");
  361. vopt.name("V option");
  362. vexp.name("V expression");
  363. vline.name("VERM line");
  364. qi::on_error<qi::fail>
  365. (
  366. vline
  367. , std::cout //or phoenix::ref(std::count), is there any difference?
  368. << phoenix::val("Error! Expecting ")
  369. << qi::_4 // what failed?
  370. << phoenix::val(" here: \"")
  371. << phoenix::construct<std::string>(qi::_3, qi::_2) // iterators to error-pos, end
  372. << phoenix::val("\"")
  373. << std::endl
  374. );
  375. }
  376. qi::rule<Iterator, TStringConstant(), ascii::space_type> string;
  377. qi::rule<Iterator, TMacroUsage(), ascii::space_type> ERMmacroUsage;
  378. qi::rule<Iterator, TQMacroUsage(), ascii::space_type> qERMMacroUsage;
  379. qi::rule<Iterator, TMacroDef(), ascii::space_type> ERMmacroDef;
  380. qi::rule<Iterator, TVarExpNotMacro(), ascii::space_type> varExpNotMacro;
  381. qi::rule<Iterator, TVarExp(), ascii::space_type> varExp;
  382. qi::rule<Iterator, TIexp(), ascii::space_type> iexp;
  383. qi::rule<Iterator, TVarpExp(), ascii::space_type> varp;
  384. qi::rule<Iterator, TArithmeticOp(), ascii::space_type> arithmeticOp;
  385. qi::rule<Iterator, std::string(), ascii::space_type> comment;
  386. qi::rule<Iterator, std::string(), ascii::space_type> commentLine;
  387. qi::rule<Iterator, TCmdName(), ascii::space_type> cmdName;
  388. qi::rule<Iterator, Tidentifier(), ascii::space_type> identifier;
  389. qi::rule<Iterator, TComparison(), ascii::space_type> comparison;
  390. qi::rule<Iterator, Tcondition(), ascii::space_type> condition;
  391. qi::rule<Iterator, TVRLogic(), ascii::space_type> VRLogic;
  392. qi::rule<Iterator, TVRArithmetic(), ascii::space_type> VRarithmetic;
  393. qi::rule<Iterator, TSemiCompare(), ascii::space_type> semiCompare;
  394. qi::rule<Iterator, TCurriedString(), ascii::space_type> curStr;
  395. qi::rule<Iterator, TVarConcatString(), ascii::space_type> varConcatString;
  396. qi::rule<Iterator, TBodyOptionItem(), ascii::space_type> bodyOptionItem;
  397. qi::rule<Iterator, TNormalBodyOptionList(), ascii::space_type> exactBodyOptionList;
  398. qi::rule<Iterator, TNormalBodyOption(), ascii::space_type> normalBodyOption;
  399. qi::rule<Iterator, TBodyOption(), ascii::space_type> bodyOption;
  400. qi::rule<Iterator, Ttrigger(), ascii::space_type> trigger;
  401. qi::rule<Iterator, Tbody(), ascii::space_type> body;
  402. qi::rule<Iterator, Tinstruction(), ascii::space_type> instruction;
  403. qi::rule<Iterator, Treceiver(), ascii::space_type> receiver;
  404. qi::rule<Iterator, TPostTrigger(), ascii::space_type> postTrigger;
  405. qi::rule<Iterator, Tcommand(), ascii::space_type> command;
  406. qi::rule<Iterator, TERMline(), ascii::space_type> rline;
  407. qi::rule<Iterator, TSymbol(), ascii::space_type> vsym;
  408. qi::rule<Iterator, TVModifier(), ascii::space_type> vmod;
  409. qi::rule<Iterator, TVOption(), ascii::space_type> vopt;
  410. qi::rule<Iterator, TVExp(), ascii::space_type> vexp;
  411. qi::rule<Iterator, TLine(), ascii::space_type> vline;
  412. };
  413. };
  414. ERM::TLine ERMParser::parseLine( const std::string & line )
  415. {
  416. std::string::const_iterator beg = line.begin(),
  417. end = line.end();
  418. ERM::ERM_grammar<std::string::const_iterator> ERMgrammar;
  419. ERM::TLine AST;
  420. bool r = qi::phrase_parse(beg, end, ERMgrammar, ascii::space, AST);
  421. if(!r || beg != end)
  422. {
  423. tlog1 << "Parse error in file " << srcFile << " (line " << parsedLine << ") :\n" << line << std::endl;
  424. tlog1 << "\tCannot parse: " << std::string(beg, end) << std::endl;
  425. throw ParseErrorException();
  426. }
  427. return AST;
  428. }
  429. ERMParser::ELineType ERMParser::classifyLine( const std::string & line, bool inString ) const
  430. {
  431. ERMParser::ELineType ret;
  432. if(line[0] == '!')
  433. {
  434. if(countHatsBeforeSemicolon(line) % 2 == 1)
  435. ret = ERMParser::UNFINISHED;
  436. else
  437. ret = ERMParser::COMMAND_FULL;
  438. }
  439. else
  440. {
  441. if(inString)
  442. {
  443. if(countHatsBeforeSemicolon(line) % 2 == 1)
  444. ret = ERMParser::END_OF;
  445. else
  446. ret = ERMParser::UNFINISHED;
  447. }
  448. else
  449. {
  450. ret = ERMParser::COMMENT;
  451. }
  452. }
  453. return ret;
  454. }
  455. int ERMParser::countHatsBeforeSemicolon( const std::string & line ) const
  456. {
  457. //CHECK: omit macros? or anything else?
  458. int numOfHats = 0; //num of '^' before ';'
  459. //check for unmatched ^
  460. BOOST_FOREACH(char c, line)
  461. {
  462. if(c == ';')
  463. break;
  464. if(c == '^')
  465. ++numOfHats;
  466. }
  467. return numOfHats;
  468. }
  469. void ERMParser::repairEncoding( std::string & str ) const
  470. {
  471. for(int g=0; g<str.size(); ++g)
  472. if(str[g] & 0x80)
  473. str[g] = '|';
  474. }
  475. void ERMParser::repairEncoding( char * str, int len ) const
  476. {
  477. for(int g=0; g<len; ++g)
  478. if(str[g] & 0x80)
  479. str[g] = '|';
  480. }
  481. #else
  482. ERMParser::ERMParser(std::string file){}
  483. std::vector<ERM::TLine> ERMParser::parseFile() {}
  484. #endif