ERMParser.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518
  1. #include "StdInc.h"
  2. #include "ERMParser.h"
  3. /*
  4. * ERMParser.cpp, part of VCMI engine
  5. *
  6. * Authors: listed in file AUTHORS in main folder
  7. *
  8. * License: GNU General Public License v2.0 or later
  9. * Full text of license available in license.txt file, in main folder
  10. *
  11. */
  12. //Greenspun's Tenth Rule of Programming:
  13. //Any sufficiently complicated C or Fortran program contains an ad hoc, informally-specified,
  14. //bug-ridden, slow implementation of half of Common Lisp.
  15. //actually these macros help in dealing with boost::variant
  16. CERMPreprocessor::CERMPreprocessor(const std::string &Fname) : fname(Fname), file(Fname.c_str()), lineNo(0), version(INVALID)
  17. {
  18. if(!file.is_open())
  19. {
  20. logGlobal->errorStream() << "File " << Fname << " not found or unable to open";
  21. return;
  22. }
  23. //check header
  24. std::string header;
  25. getline(header);
  26. if(header == "ZVSE")
  27. version = ERM;
  28. else if(header == "VERM")
  29. version = VERM;
  30. else
  31. {
  32. logGlobal->errorStream() << "File " << fname << " has wrong header";
  33. return;
  34. }
  35. }
  36. class ParseErrorException : public std::exception
  37. {
  38. };
  39. std::string CERMPreprocessor::retreiveCommandLine()
  40. {
  41. std::string wholeCommand;
  42. //parse file
  43. bool verm = false;
  44. bool openedString = false;
  45. int openedBraces = 0;
  46. while(file.good())
  47. {
  48. std::string line ;
  49. getline(line); //reading line
  50. size_t dash = line.find_first_of('^');
  51. bool inTheMiddle = openedBraces || openedString;
  52. if(!inTheMiddle)
  53. {
  54. if(line.size() < 2)
  55. continue;
  56. if(line[0] != '!' ) //command lines must begin with ! -> otherwise treat as comment
  57. continue;
  58. verm = line[1] == '[';
  59. }
  60. if(openedString)
  61. {
  62. wholeCommand += "\\n";
  63. if(dash != std::string::npos)
  64. {
  65. wholeCommand += line.substr(0, dash);
  66. line.erase(0,dash);
  67. }
  68. else //no closing marker -> the whole line is further part of string
  69. {
  70. wholeCommand += line;
  71. continue;
  72. }
  73. }
  74. int i = 0;
  75. for(; i < line.length(); i++)
  76. {
  77. char c = line[i];
  78. if(!openedString)
  79. {
  80. if(c == '[')
  81. openedBraces++;
  82. else if(c == ']')
  83. {
  84. openedBraces--;
  85. if(!openedBraces) //the last brace has been matched -> stop "parsing", everything else in the line is comment
  86. {
  87. i++;
  88. break;
  89. }
  90. }
  91. else if(c == '^')
  92. openedString = true;
  93. else if(c == ';') // a ';' that is in command line (and not in string) ends the command -> throw away rest
  94. {
  95. line.erase(i+!verm, line.length() - i - !verm); //leave ';' at the end only at ERM commands
  96. break;
  97. }
  98. }
  99. else if(c == '^')
  100. openedString = false;
  101. }
  102. if(verm && !openedBraces && i < line.length())
  103. {
  104. line.erase(i, line.length() - i);
  105. }
  106. if(wholeCommand.size()) //separate lines with a space
  107. wholeCommand += " ";
  108. wholeCommand += line;
  109. if(!openedBraces && !openedString)
  110. return wholeCommand;
  111. //loop end
  112. }
  113. if(openedBraces || openedString)
  114. logGlobal->errorStream() << "Ill-formed file: " << fname;
  115. return "";
  116. }
  117. void CERMPreprocessor::getline(std::string &ret)
  118. {
  119. lineNo++;
  120. std::getline(file, ret);
  121. boost::trim(ret); //get rid of wspace
  122. }
  123. ERMParser::ERMParser(std::string file)
  124. :srcFile(file)
  125. {}
  126. std::vector<LineInfo> ERMParser::parseFile()
  127. {
  128. CERMPreprocessor preproc(srcFile);
  129. std::vector<LineInfo> ret;
  130. try
  131. {
  132. while(1)
  133. {
  134. std::string command = preproc.retreiveCommandLine();
  135. if(command.length() == 0)
  136. break;
  137. repairEncoding(command);
  138. LineInfo li;
  139. li.realLineNum = preproc.getCurLineNo();
  140. li.tl = parseLine(command, li.realLineNum);
  141. ret.push_back(li);
  142. }
  143. }
  144. catch (ParseErrorException & e)
  145. {
  146. logGlobal->errorStream() << "stopped parsing file";
  147. }
  148. return ret;
  149. }
  150. BOOST_FUSION_ADAPT_STRUCT(
  151. ERM::TStringConstant,
  152. (std::string, str)
  153. )
  154. BOOST_FUSION_ADAPT_STRUCT(
  155. ERM::TMacroUsage,
  156. (std::string, macro)
  157. )
  158. // BOOST_FUSION_ADAPT_STRUCT(
  159. // ERM::TQMacroUsage,
  160. // (std::string, qmacro)
  161. // )
  162. BOOST_FUSION_ADAPT_STRUCT(
  163. ERM::TMacroDef,
  164. (std::string, macro)
  165. )
  166. BOOST_FUSION_ADAPT_STRUCT(
  167. ERM::TVarExpNotMacro,
  168. (boost::optional<char>, questionMark)
  169. (std::string, varsym)
  170. (ERM::TVarExpNotMacro::Tval, val)
  171. )
  172. BOOST_FUSION_ADAPT_STRUCT(
  173. ERM::TArithmeticOp,
  174. (ERM::TIexp, lhs)
  175. (char, opcode)
  176. (ERM::TIexp, rhs)
  177. )
  178. BOOST_FUSION_ADAPT_STRUCT(
  179. ERM::TVarpExp,
  180. (ERM::TVarExp, var)
  181. )
  182. BOOST_FUSION_ADAPT_STRUCT(
  183. ERM::TVRLogic,
  184. (char, opcode)
  185. (ERM::TIexp, var)
  186. )
  187. BOOST_FUSION_ADAPT_STRUCT(
  188. ERM::TVRArithmetic,
  189. (char, opcode)
  190. (ERM::TIexp, rhs)
  191. )
  192. BOOST_FUSION_ADAPT_STRUCT(
  193. ERM::TNormalBodyOption,
  194. (char, optionCode)
  195. (ERM::TNormalBodyOptionList, params)
  196. )
  197. BOOST_FUSION_ADAPT_STRUCT(
  198. ERM::Ttrigger,
  199. (ERM::TCmdName, name)
  200. (boost::optional<ERM::Tidentifier>, identifier)
  201. (boost::optional<ERM::Tcondition>, condition)
  202. )
  203. BOOST_FUSION_ADAPT_STRUCT(
  204. ERM::TComparison,
  205. (ERM::TIexp, lhs)
  206. (std::string, compSign)
  207. (ERM::TIexp, rhs)
  208. )
  209. BOOST_FUSION_ADAPT_STRUCT(
  210. ERM::TSemiCompare,
  211. (std::string, compSign)
  212. (ERM::TIexp, rhs)
  213. )
  214. BOOST_FUSION_ADAPT_STRUCT(
  215. ERM::TCurriedString,
  216. (ERM::TIexp, iexp)
  217. (ERM::TStringConstant, string)
  218. )
  219. BOOST_FUSION_ADAPT_STRUCT(
  220. ERM::TVarConcatString,
  221. (ERM::TVarExp, var)
  222. (ERM::TStringConstant, string)
  223. )
  224. BOOST_FUSION_ADAPT_STRUCT(
  225. ERM::Tcondition,
  226. (char, ctype)
  227. (ERM::Tcondition::Tcond, cond)
  228. (ERM::TconditionNode, rhs)
  229. )
  230. BOOST_FUSION_ADAPT_STRUCT(
  231. ERM::Tinstruction,
  232. (ERM::TCmdName, name)
  233. (boost::optional<ERM::Tidentifier>, identifier)
  234. (boost::optional<ERM::Tcondition>, condition)
  235. (ERM::Tbody, body)
  236. )
  237. BOOST_FUSION_ADAPT_STRUCT(
  238. ERM::Treceiver,
  239. (ERM::TCmdName, name)
  240. (boost::optional<ERM::Tidentifier>, identifier)
  241. (boost::optional<ERM::Tcondition>, condition)
  242. (boost::optional<ERM::Tbody>, body)
  243. )
  244. BOOST_FUSION_ADAPT_STRUCT(
  245. ERM::TPostTrigger,
  246. (ERM::TCmdName, name)
  247. (boost::optional<ERM::Tidentifier>, identifier)
  248. (boost::optional<ERM::Tcondition>, condition)
  249. )
  250. BOOST_FUSION_ADAPT_STRUCT(
  251. ERM::Tcommand,
  252. (ERM::Tcommand::Tcmd, cmd)
  253. (std::string, comment)
  254. )
  255. BOOST_FUSION_ADAPT_STRUCT(
  256. ERM::TVExp,
  257. (std::vector<ERM::TVModifier>, modifier)
  258. (std::vector<ERM::TVOption>, children)
  259. )
  260. BOOST_FUSION_ADAPT_STRUCT(
  261. ERM::TSymbol,
  262. (std::vector<ERM::TVModifier>, symModifier)
  263. (std::string, sym)
  264. )
  265. namespace ERM
  266. {
  267. template<typename Iterator>
  268. struct ERM_grammar : qi::grammar<Iterator, TLine(), ascii::space_type>
  269. {
  270. ERM_grammar() : ERM_grammar::base_type(vline, "VERM script line")
  271. {
  272. //do not build too complicated expressions, e.g. (a >> b) | c, qi has problems with them
  273. ERMmacroUsage %= qi::lexeme[qi::lit('$') >> *(qi::char_ - '$') >> qi::lit('$')];
  274. ERMmacroDef %= qi::lexeme[qi::lit('@') >> *(qi::char_ - '@') >> qi::lit('@')];
  275. varExpNotMacro %= -qi::char_("?") >> (+(qi::char_("a-z") - 'u')) >> -qi::int_;
  276. //TODO: mixed var/macro expressions like in !!HE-1&407:Id$cost$; [script 13]
  277. /*qERMMacroUsage %= qi::lexeme[qi::lit("?$") >> *(qi::char_ - '$') >> qi::lit('$')];*/
  278. varExp %= varExpNotMacro | ERMmacroUsage;
  279. iexp %= varExp | qi::int_;
  280. varp %= qi::lit("?") >> varExp;
  281. comment %= *qi::char_;
  282. commentLine %= (~qi::char_("!") >> comment | (qi::char_('!') >> (~qi::char_("?!$#[")) >> comment ));
  283. cmdName %= qi::lexeme[qi::repeat(2)[qi::char_]];
  284. arithmeticOp %= iexp >> qi::char_ >> iexp;
  285. //identifier is usually a vector of i-expressions but VR receiver performs arithmetic operations on it
  286. identifier %= (iexp | arithmeticOp) % qi::lit('/');
  287. comparison %= iexp >> (*qi::char_("<=>")) >> iexp;
  288. condition %= qi::char_("&|X/") >> (comparison | qi::int_) >> -condition;
  289. trigger %= cmdName >> -identifier >> -condition > qi::lit(";"); /////
  290. string %= qi::lexeme['^' >> *(qi::char_ - '^') >> '^'];
  291. VRLogic %= qi::char_("&|X") >> iexp;
  292. VRarithmetic %= qi::char_("+*:/%-") >> iexp;
  293. semiCompare %= +qi::char_("<=>") >> iexp;
  294. curStr %= iexp >> string;
  295. varConcatString %= varExp >> qi::lit("+") >> string;
  296. bodyOptionItem %= varConcatString | curStr | string | semiCompare | ERMmacroDef | varp | iexp | qi::eps;
  297. exactBodyOptionList %= (bodyOptionItem % qi::lit("/"));
  298. normalBodyOption = qi::char_("A-Z+") > exactBodyOptionList;
  299. bodyOption %= VRLogic | VRarithmetic | normalBodyOption;
  300. body %= qi::lit(":") >> +(bodyOption) > qi::lit(";");
  301. instruction %= cmdName >> -identifier >> -condition >> body;
  302. receiver %= cmdName >> -identifier >> -condition >> -body; //receiver without body exists... change needed
  303. postTrigger %= cmdName >> -identifier >> -condition > qi::lit(";");
  304. command %= (qi::lit("!") >>
  305. (
  306. (qi::lit("?") >> trigger) |
  307. (qi::lit("!") >> receiver) |
  308. (qi::lit("#") >> instruction) |
  309. (qi::lit("$") >> postTrigger)
  310. ) >> comment
  311. );
  312. rline %=
  313. (
  314. command | commentLine | spirit::eps
  315. );
  316. vmod %= qi::string("`") | qi::string(",!") | qi::string(",") | qi::string("#'") | qi::string("'");
  317. vsym %= *vmod >> qi::lexeme[+qi::char_("+*/$%&_=<>~a-zA-Z0-9-")];
  318. qi::real_parser<double, qi::strict_real_policies<double> > strict_double;
  319. vopt %= qi::lexeme[(qi::lit("!") >> qi::char_ >> qi::lit("!"))] | qi::lexeme[strict_double] | qi::lexeme[qi::int_] | command | vexp | string | vsym;
  320. vexp %= *vmod >> qi::lit("[") >> *(vopt) >> qi::lit("]");
  321. vline %= (( qi::lit("!") >>vexp) | rline ) > spirit::eoi;
  322. //error handling
  323. string.name("string constant");
  324. ERMmacroUsage.name("macro usage");
  325. /*qERMMacroUsage.name("macro usage with ?");*/
  326. ERMmacroDef.name("macro definition");
  327. varExpNotMacro.name("variable expression (not macro)");
  328. varExp.name("variable expression");
  329. iexp.name("i-expression");
  330. comment.name("comment");
  331. commentLine.name("comment line");
  332. cmdName.name("name of a command");
  333. identifier.name("identifier");
  334. condition.name("condition");
  335. trigger.name("trigger");
  336. body.name("body");
  337. instruction.name("instruction");
  338. receiver.name("receiver");
  339. postTrigger.name("post trigger");
  340. command.name("command");
  341. rline.name("ERM script line");
  342. vsym.name("V symbol");
  343. vopt.name("V option");
  344. vexp.name("V expression");
  345. vline.name("VERM line");
  346. qi::on_error<qi::fail>
  347. (
  348. vline
  349. , std::cout //or phoenix::ref(std::count), is there any difference?
  350. << phoenix::val("Error! Expecting ")
  351. << qi::_4 // what failed?
  352. << phoenix::val(" here: \"")
  353. << phoenix::construct<std::string>(qi::_3, qi::_2) // iterators to error-pos, end
  354. << phoenix::val("\"")
  355. );
  356. }
  357. qi::rule<Iterator, TStringConstant(), ascii::space_type> string;
  358. qi::rule<Iterator, TMacroUsage(), ascii::space_type> ERMmacroUsage;
  359. /*qi::rule<Iterator, TQMacroUsage(), ascii::space_type> qERMMacroUsage;*/
  360. qi::rule<Iterator, TMacroDef(), ascii::space_type> ERMmacroDef;
  361. qi::rule<Iterator, TVarExpNotMacro(), ascii::space_type> varExpNotMacro;
  362. qi::rule<Iterator, TVarExp(), ascii::space_type> varExp;
  363. qi::rule<Iterator, TIexp(), ascii::space_type> iexp;
  364. qi::rule<Iterator, TVarpExp(), ascii::space_type> varp;
  365. qi::rule<Iterator, TArithmeticOp(), ascii::space_type> arithmeticOp;
  366. qi::rule<Iterator, std::string(), ascii::space_type> comment;
  367. qi::rule<Iterator, std::string(), ascii::space_type> commentLine;
  368. qi::rule<Iterator, TCmdName(), ascii::space_type> cmdName;
  369. qi::rule<Iterator, Tidentifier(), ascii::space_type> identifier;
  370. qi::rule<Iterator, TComparison(), ascii::space_type> comparison;
  371. qi::rule<Iterator, Tcondition(), ascii::space_type> condition;
  372. qi::rule<Iterator, TVRLogic(), ascii::space_type> VRLogic;
  373. qi::rule<Iterator, TVRArithmetic(), ascii::space_type> VRarithmetic;
  374. qi::rule<Iterator, TSemiCompare(), ascii::space_type> semiCompare;
  375. qi::rule<Iterator, TCurriedString(), ascii::space_type> curStr;
  376. qi::rule<Iterator, TVarConcatString(), ascii::space_type> varConcatString;
  377. qi::rule<Iterator, TBodyOptionItem(), ascii::space_type> bodyOptionItem;
  378. qi::rule<Iterator, TNormalBodyOptionList(), ascii::space_type> exactBodyOptionList;
  379. qi::rule<Iterator, TNormalBodyOption(), ascii::space_type> normalBodyOption;
  380. qi::rule<Iterator, TBodyOption(), ascii::space_type> bodyOption;
  381. qi::rule<Iterator, Ttrigger(), ascii::space_type> trigger;
  382. qi::rule<Iterator, Tbody(), ascii::space_type> body;
  383. qi::rule<Iterator, Tinstruction(), ascii::space_type> instruction;
  384. qi::rule<Iterator, Treceiver(), ascii::space_type> receiver;
  385. qi::rule<Iterator, TPostTrigger(), ascii::space_type> postTrigger;
  386. qi::rule<Iterator, Tcommand(), ascii::space_type> command;
  387. qi::rule<Iterator, TERMline(), ascii::space_type> rline;
  388. qi::rule<Iterator, TSymbol(), ascii::space_type> vsym;
  389. qi::rule<Iterator, TVModifier(), ascii::space_type> vmod;
  390. qi::rule<Iterator, TVOption(), ascii::space_type> vopt;
  391. qi::rule<Iterator, TVExp(), ascii::space_type> vexp;
  392. qi::rule<Iterator, TLine(), ascii::space_type> vline;
  393. };
  394. };
  395. ERM::TLine ERMParser::parseLine( const std::string & line, int realLineNo )
  396. {
  397. try
  398. {
  399. return parseLine(line);
  400. }
  401. catch(...)
  402. {
  403. logGlobal->errorStream() << "Parse error occurred in file " << srcFile << " (line " << realLineNo << ") :" << line;
  404. throw;
  405. }
  406. }
  407. ERM::TLine ERMParser::parseLine(const std::string & line)
  408. {
  409. std::string::const_iterator beg = line.begin(),
  410. end = line.end();
  411. ERM::ERM_grammar<std::string::const_iterator> ERMgrammar;
  412. ERM::TLine AST;
  413. bool r = qi::phrase_parse(beg, end, ERMgrammar, ascii::space, AST);
  414. if(!r || beg != end)
  415. {
  416. logGlobal->errorStream() << "Parse error: cannot parse: " << std::string(beg, end);
  417. throw ParseErrorException();
  418. }
  419. return AST;
  420. }
  421. int ERMParser::countHatsBeforeSemicolon( const std::string & line ) const
  422. {
  423. //CHECK: omit macros? or anything else?
  424. int numOfHats = 0; //num of '^' before ';'
  425. //check for unmatched ^
  426. for (char c : line)
  427. {
  428. if(c == ';')
  429. break;
  430. if(c == '^')
  431. ++numOfHats;
  432. }
  433. return numOfHats;
  434. }
  435. void ERMParser::repairEncoding( std::string & str ) const
  436. {
  437. for(int g=0; g<str.size(); ++g)
  438. if(str[g] & 0x80)
  439. str[g] = '|';
  440. }
  441. void ERMParser::repairEncoding( char * str, int len ) const
  442. {
  443. for(int g=0; g<len; ++g)
  444. if(str[g] & 0x80)
  445. str[g] = '|';
  446. }