CGeneralTextHandler.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521
  1. /*
  2. * CGeneralTextHandler.cpp, part of VCMI engine
  3. *
  4. * Authors: listed in file AUTHORS in main folder
  5. *
  6. * License: GNU General Public License v2.0 or later
  7. * Full text of license available in license.txt file, in main folder
  8. *
  9. */
  10. #include "StdInc.h"
  11. #include "CGeneralTextHandler.h"
  12. #include <boost/locale.hpp>
  13. #include "filesystem/Filesystem.h"
  14. #include "CConfigHandler.h"
  15. #include "CModHandler.h"
  16. #include "GameConstants.h"
  17. #include "VCMI_Lib.h"
  18. #include "Terrain.h"
  19. size_t Unicode::getCharacterSize(char firstByte)
  20. {
  21. // length of utf-8 character can be determined from 1st byte by counting number of highest bits set to 1:
  22. // 0xxxxxxx -> 1 - ASCII chars
  23. // 110xxxxx -> 2
  24. // 11110xxx -> 4 - last allowed in current standard
  25. // 1111110x -> 6 - last allowed in original standard
  26. if ((ui8)firstByte < 0x80)
  27. return 1; // ASCII
  28. size_t ret = 0;
  29. for (size_t i=0; i<8; i++)
  30. {
  31. if (((ui8)firstByte & (0x80 >> i)) != 0)
  32. ret++;
  33. else
  34. break;
  35. }
  36. return ret;
  37. }
  38. bool Unicode::isValidCharacter(const char * character, size_t maxSize)
  39. {
  40. // can't be first byte in UTF8
  41. if ((ui8)character[0] >= 0x80 && (ui8)character[0] < 0xC0)
  42. return false;
  43. // first character must follow rules checked in getCharacterSize
  44. size_t size = getCharacterSize((ui8)character[0]);
  45. if ((ui8)character[0] > 0xF4)
  46. return false; // above maximum allowed in standard (UTF codepoints are capped at 0x0010FFFF)
  47. if (size > maxSize)
  48. return false;
  49. // remaining characters must have highest bit set to 1
  50. for (size_t i = 1; i < size; i++)
  51. {
  52. if (((ui8)character[i] & 0x80) == 0)
  53. return false;
  54. }
  55. return true;
  56. }
  57. bool Unicode::isValidASCII(const std::string & text)
  58. {
  59. for (const char & ch : text)
  60. if (ui8(ch) >= 0x80 )
  61. return false;
  62. return true;
  63. }
  64. bool Unicode::isValidASCII(const char * data, size_t size)
  65. {
  66. for (size_t i=0; i<size; i++)
  67. if (ui8(data[i]) >= 0x80 )
  68. return false;
  69. return true;
  70. }
  71. bool Unicode::isValidString(const std::string & text)
  72. {
  73. for (size_t i=0; i<text.size(); i += getCharacterSize(text[i]))
  74. {
  75. if (!isValidCharacter(text.data() + i, text.size() - i))
  76. return false;
  77. }
  78. return true;
  79. }
  80. bool Unicode::isValidString(const char * data, size_t size)
  81. {
  82. for (size_t i=0; i<size; i += getCharacterSize(data[i]))
  83. {
  84. if (!isValidCharacter(data + i, size - i))
  85. return false;
  86. }
  87. return true;
  88. }
  89. static std::string getSelectedEncoding()
  90. {
  91. return settings["general"]["encoding"].String();
  92. }
  93. std::string Unicode::toUnicode(const std::string &text)
  94. {
  95. return toUnicode(text, getSelectedEncoding());
  96. }
  97. std::string Unicode::toUnicode(const std::string &text, const std::string &encoding)
  98. {
  99. return boost::locale::conv::to_utf<char>(text, encoding);
  100. }
  101. std::string Unicode::fromUnicode(const std::string & text)
  102. {
  103. return fromUnicode(text, getSelectedEncoding());
  104. }
  105. std::string Unicode::fromUnicode(const std::string &text, const std::string &encoding)
  106. {
  107. return boost::locale::conv::from_utf<char>(text, encoding);
  108. }
  109. void Unicode::trimRight(std::string & text, const size_t amount)
  110. {
  111. if(text.empty())
  112. return;
  113. //todo: more efficient algorithm
  114. for(int i = 0; i< amount; i++){
  115. auto b = text.begin();
  116. auto e = text.end();
  117. size_t lastLen = 0;
  118. size_t len = 0;
  119. while (b != e) {
  120. lastLen = len;
  121. size_t n = getCharacterSize(*b);
  122. if(!isValidCharacter(&(*b),e-b))
  123. {
  124. logGlobal->error("Invalid UTF8 sequence");
  125. break;//invalid sequence will be trimmed
  126. }
  127. len += n;
  128. b += n;
  129. }
  130. text.resize(lastLen);
  131. }
  132. }
  133. //Helper for string -> float conversion
  134. class LocaleWithComma: public std::numpunct<char>
  135. {
  136. protected:
  137. char do_decimal_point() const override
  138. {
  139. return ',';
  140. }
  141. };
  142. CLegacyConfigParser::CLegacyConfigParser(std::string URI)
  143. {
  144. init(CResourceHandler::get()->load(ResourceID(URI, EResType::TEXT)));
  145. }
  146. CLegacyConfigParser::CLegacyConfigParser(const std::unique_ptr<CInputStream> & input)
  147. {
  148. init(input);
  149. }
  150. void CLegacyConfigParser::init(const std::unique_ptr<CInputStream> & input)
  151. {
  152. data.reset(new char[input->getSize()]);
  153. input->read((ui8*)data.get(), input->getSize());
  154. curr = data.get();
  155. end = curr + input->getSize();
  156. }
  157. std::string CLegacyConfigParser::extractQuotedPart()
  158. {
  159. assert(*curr == '\"');
  160. curr++; // skip quote
  161. char * begin = curr;
  162. while (curr != end && *curr != '\"' && *curr != '\t')
  163. curr++;
  164. return std::string(begin, curr++); //increment curr to close quote
  165. }
  166. std::string CLegacyConfigParser::extractQuotedString()
  167. {
  168. assert(*curr == '\"');
  169. std::string ret;
  170. while (true)
  171. {
  172. ret += extractQuotedPart();
  173. // double quote - add it to string and continue quoted part
  174. if (curr < end && *curr == '\"')
  175. {
  176. ret += '\"';
  177. }
  178. //extract normal part
  179. else if(curr < end && *curr != '\t' && *curr != '\r')
  180. {
  181. char * begin = curr;
  182. while (curr < end && *curr != '\t' && *curr != '\r' && *curr != '\"')//find end of string or next quoted part start
  183. curr++;
  184. ret += std::string(begin, curr);
  185. if(curr>=end || *curr != '\"')
  186. return ret;
  187. }
  188. else // end of string
  189. return ret;
  190. }
  191. }
  192. std::string CLegacyConfigParser::extractNormalString()
  193. {
  194. char * begin = curr;
  195. while (curr < end && *curr != '\t' && *curr != '\r')//find end of string
  196. curr++;
  197. return std::string(begin, curr);
  198. }
  199. std::string CLegacyConfigParser::readRawString()
  200. {
  201. if (curr >= end || *curr == '\n')
  202. return "";
  203. std::string ret;
  204. if (*curr == '\"')
  205. ret = extractQuotedString();// quoted text - find closing quote
  206. else
  207. ret = extractNormalString();//string without quotes - copy till \t or \r
  208. curr++;
  209. return ret;
  210. }
  211. std::string CLegacyConfigParser::readString()
  212. {
  213. // do not convert strings that are already in ASCII - this will only slow down loading process
  214. std::string str = readRawString();
  215. if (Unicode::isValidASCII(str))
  216. return str;
  217. return Unicode::toUnicode(str);
  218. }
  219. float CLegacyConfigParser::readNumber()
  220. {
  221. std::string input = readRawString();
  222. std::istringstream stream(input);
  223. if(input.find(',') != std::string::npos) // code to handle conversion with comma as decimal separator
  224. stream.imbue(std::locale(std::locale(), new LocaleWithComma()));
  225. float result;
  226. if ( !(stream >> result) )
  227. return 0;
  228. return result;
  229. }
  230. bool CLegacyConfigParser::isNextEntryEmpty() const
  231. {
  232. char * nextSymbol = curr;
  233. while (nextSymbol < end && *nextSymbol == ' ')
  234. nextSymbol++; //find next meaningfull symbol
  235. return nextSymbol >= end || *nextSymbol == '\n' || *nextSymbol == '\r' || *nextSymbol == '\t';
  236. }
  237. bool CLegacyConfigParser::endLine()
  238. {
  239. while (curr < end && *curr != '\n')
  240. readString();
  241. curr++;
  242. return curr < end;
  243. }
  244. void CGeneralTextHandler::readToVector(std::string sourceName, std::vector<std::string> &dest)
  245. {
  246. CLegacyConfigParser parser(sourceName);
  247. do
  248. {
  249. dest.push_back(parser.readString());
  250. }
  251. while (parser.endLine());
  252. }
  253. CGeneralTextHandler::CGeneralTextHandler()
  254. {
  255. std::vector<std::string> h3mTerrainNames;
  256. readToVector("DATA/VCDESC.TXT", victoryConditions);
  257. readToVector("DATA/LCDESC.TXT", lossCondtions);
  258. readToVector("DATA/TCOMMAND.TXT", tcommands);
  259. readToVector("DATA/HALLINFO.TXT", hcommands);
  260. readToVector("DATA/CASTINFO.TXT", fcommands);
  261. readToVector("DATA/ADVEVENT.TXT", advobtxt);
  262. readToVector("DATA/XTRAINFO.TXT", xtrainfo);
  263. readToVector("DATA/RESTYPES.TXT", restypes);
  264. readToVector("DATA/TERRNAME.TXT", h3mTerrainNames);
  265. readToVector("DATA/RANDSIGN.TXT", randsign);
  266. readToVector("DATA/CRGEN1.TXT", creGens);
  267. readToVector("DATA/CRGEN4.TXT", creGens4);
  268. readToVector("DATA/OVERVIEW.TXT", overview);
  269. readToVector("DATA/ARRAYTXT.TXT", arraytxt);
  270. readToVector("DATA/PRISKILL.TXT", primarySkillNames);
  271. readToVector("DATA/JKTEXT.TXT", jktexts);
  272. readToVector("DATA/TVRNINFO.TXT", tavernInfo);
  273. readToVector("DATA/RANDTVRN.TXT", tavernRumors);
  274. readToVector("DATA/TURNDUR.TXT", turnDurations);
  275. readToVector("DATA/HEROSCRN.TXT", heroscrn);
  276. readToVector("DATA/TENTCOLR.TXT", tentColors);
  277. readToVector("DATA/SKILLLEV.TXT", levels);
  278. for(int i = 0; i < h3mTerrainNames.size(); ++i)
  279. {
  280. terrainNames[Terrain::createTerrainTypeH3M(i)] = h3mTerrainNames[i];
  281. }
  282. for(auto & terrain : Terrain::Manager::terrains())
  283. {
  284. if(!Terrain::Manager::getInfo(terrain).terrainText.empty())
  285. terrainNames[terrain] = Terrain::Manager::getInfo(terrain).terrainText;
  286. }
  287. static const char * QE_MOD_COMMANDS = "DATA/QECOMMANDS.TXT";
  288. if (CResourceHandler::get()->existsResource(ResourceID(QE_MOD_COMMANDS, EResType::TEXT)))
  289. readToVector(QE_MOD_COMMANDS, qeModCommands);
  290. localizedTexts = JsonNode(ResourceID("config/translate.json", EResType::TEXT));
  291. {
  292. CLegacyConfigParser parser("DATA/GENRLTXT.TXT");
  293. parser.endLine();
  294. do
  295. {
  296. allTexts.push_back(parser.readString());
  297. }
  298. while (parser.endLine());
  299. }
  300. {
  301. CLegacyConfigParser parser("DATA/HELP.TXT");
  302. do
  303. {
  304. std::string first = parser.readString();
  305. std::string second = parser.readString();
  306. zelp.push_back(std::make_pair(first, second));
  307. }
  308. while (parser.endLine());
  309. }
  310. {
  311. CLegacyConfigParser nameParser("DATA/MINENAME.TXT");
  312. CLegacyConfigParser eventParser("DATA/MINEEVNT.TXT");
  313. do
  314. {
  315. std::string name = nameParser.readString();
  316. std::string event = eventParser.readString();
  317. mines.push_back(std::make_pair(name, event));
  318. }
  319. while (nameParser.endLine() && eventParser.endLine());
  320. }
  321. {
  322. CLegacyConfigParser parser("DATA/PLCOLORS.TXT");
  323. do
  324. {
  325. std::string color = parser.readString();
  326. colors.push_back(color);
  327. color[0] = toupper(color[0]);
  328. capColors.push_back(color);
  329. }
  330. while (parser.endLine());
  331. }
  332. {
  333. CLegacyConfigParser parser("DATA/SEERHUT.TXT");
  334. //skip header
  335. parser.endLine();
  336. for (int i = 0; i < 6; ++i)
  337. seerEmpty.push_back(parser.readString());
  338. parser.endLine();
  339. quests.resize(10);
  340. for (int i = 0; i < 9; ++i) //9 types of quests
  341. {
  342. quests[i].resize(5);
  343. for (int j = 0; j < 5; ++j)
  344. {
  345. parser.readString(); //front description
  346. for (int k = 0; k < 6; ++k)
  347. quests[i][j].push_back(parser.readString());
  348. parser.endLine();
  349. }
  350. }
  351. quests[9].resize(1);
  352. for (int k = 0; k < 6; ++k) //Time limit
  353. {
  354. quests[9][0].push_back(parser.readString());
  355. }
  356. parser.endLine();
  357. parser.endLine(); // empty line
  358. parser.endLine(); // header
  359. for (int i = 0; i < 48; ++i)
  360. {
  361. seerNames.push_back(parser.readString());
  362. parser.endLine();
  363. }
  364. }
  365. {
  366. CLegacyConfigParser parser("DATA/CAMPTEXT.TXT");
  367. //skip header
  368. parser.endLine();
  369. std::string text;
  370. do
  371. {
  372. text = parser.readString();
  373. if (!text.empty())
  374. campaignMapNames.push_back(text);
  375. }
  376. while (parser.endLine() && !text.empty());
  377. for (size_t i=0; i<campaignMapNames.size(); i++)
  378. {
  379. do // skip empty space and header
  380. {
  381. text = parser.readString();
  382. }
  383. while (parser.endLine() && text.empty());
  384. campaignRegionNames.push_back(std::vector<std::string>());
  385. do
  386. {
  387. text = parser.readString();
  388. if (!text.empty())
  389. campaignRegionNames.back().push_back(text);
  390. }
  391. while (parser.endLine() && !text.empty());
  392. }
  393. }
  394. if (VLC->modh->modules.STACK_EXP)
  395. {
  396. CLegacyConfigParser parser("DATA/ZCREXP.TXT");
  397. parser.endLine();//header
  398. for (size_t iter=0; iter<325; iter++)
  399. {
  400. parser.readString(); //ignore 1st column with description
  401. zcrexp.push_back(parser.readString());
  402. parser.endLine();
  403. }
  404. // line 325 - some weird formatting here
  405. zcrexp.push_back(parser.readString());
  406. parser.readString();
  407. parser.endLine();
  408. do // rest of file can be read normally
  409. {
  410. parser.readString(); //ignore 1st column with description
  411. zcrexp.push_back(parser.readString());
  412. }
  413. while (parser.endLine());
  414. }
  415. if (VLC->modh->modules.COMMANDERS)
  416. {
  417. try
  418. {
  419. CLegacyConfigParser parser("DATA/ZNPC00.TXT");
  420. parser.endLine();//header
  421. do
  422. {
  423. znpc00.push_back(parser.readString());
  424. } while (parser.endLine());
  425. }
  426. catch (const std::runtime_error &)
  427. {
  428. logGlobal->warn("WoG file ZNPC00.TXT containing commander texts was not found");
  429. }
  430. }
  431. }
  432. int32_t CGeneralTextHandler::pluralText(const int32_t textIndex, const int32_t count) const
  433. {
  434. if(textIndex == 0)
  435. return 0;
  436. else if(textIndex < 0)
  437. return -textIndex;
  438. else if(count == 1)
  439. return textIndex;
  440. else
  441. return textIndex + 1;
  442. }