CGeneralTextHandler.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504
  1. /*
  2. * CGeneralTextHandler.cpp, part of VCMI engine
  3. *
  4. * Authors: listed in file AUTHORS in main folder
  5. *
  6. * License: GNU General Public License v2.0 or later
  7. * Full text of license available in license.txt file, in main folder
  8. *
  9. */
  10. #include "StdInc.h"
  11. #include "CGeneralTextHandler.h"
  12. #include <boost/locale.hpp>
  13. #include "filesystem/Filesystem.h"
  14. #include "CConfigHandler.h"
  15. #include "CModHandler.h"
  16. #include "GameConstants.h"
  17. #include "VCMI_Lib.h"
  18. size_t Unicode::getCharacterSize(char firstByte)
  19. {
  20. // length of utf-8 character can be determined from 1st byte by counting number of highest bits set to 1:
  21. // 0xxxxxxx -> 1 - ASCII chars
  22. // 110xxxxx -> 2
  23. // 11110xxx -> 4 - last allowed in current standard
  24. // 1111110x -> 6 - last allowed in original standard
  25. if ((ui8)firstByte < 0x80)
  26. return 1; // ASCII
  27. size_t ret = 0;
  28. for (size_t i=0; i<8; i++)
  29. {
  30. if (((ui8)firstByte & (0x80 >> i)) != 0)
  31. ret++;
  32. else
  33. break;
  34. }
  35. return ret;
  36. }
  37. bool Unicode::isValidCharacter(const char * character, size_t maxSize)
  38. {
  39. // can't be first byte in UTF8
  40. if ((ui8)character[0] >= 0x80 && (ui8)character[0] < 0xC0)
  41. return false;
  42. // first character must follow rules checked in getCharacterSize
  43. size_t size = getCharacterSize((ui8)character[0]);
  44. if ((ui8)character[0] > 0xF4)
  45. return false; // above maximum allowed in standard (UTF codepoints are capped at 0x0010FFFF)
  46. if (size > maxSize)
  47. return false;
  48. // remaining characters must have highest bit set to 1
  49. for (size_t i = 1; i < size; i++)
  50. {
  51. if (((ui8)character[i] & 0x80) == 0)
  52. return false;
  53. }
  54. return true;
  55. }
  56. bool Unicode::isValidASCII(const std::string & text)
  57. {
  58. for (const char & ch : text)
  59. if (ui8(ch) >= 0x80 )
  60. return false;
  61. return true;
  62. }
  63. bool Unicode::isValidASCII(const char * data, size_t size)
  64. {
  65. for (size_t i=0; i<size; i++)
  66. if (ui8(data[i]) >= 0x80 )
  67. return false;
  68. return true;
  69. }
  70. bool Unicode::isValidString(const std::string & text)
  71. {
  72. for (size_t i=0; i<text.size(); i += getCharacterSize(text[i]))
  73. {
  74. if (!isValidCharacter(text.data() + i, text.size() - i))
  75. return false;
  76. }
  77. return true;
  78. }
  79. bool Unicode::isValidString(const char * data, size_t size)
  80. {
  81. for (size_t i=0; i<size; i += getCharacterSize(data[i]))
  82. {
  83. if (!isValidCharacter(data + i, size - i))
  84. return false;
  85. }
  86. return true;
  87. }
  88. static std::string getSelectedEncoding()
  89. {
  90. return settings["general"]["encoding"].String();
  91. }
  92. std::string Unicode::toUnicode(const std::string &text)
  93. {
  94. return toUnicode(text, getSelectedEncoding());
  95. }
  96. std::string Unicode::toUnicode(const std::string &text, const std::string &encoding)
  97. {
  98. return boost::locale::conv::to_utf<char>(text, encoding);
  99. }
  100. std::string Unicode::fromUnicode(const std::string & text)
  101. {
  102. return fromUnicode(text, getSelectedEncoding());
  103. }
  104. std::string Unicode::fromUnicode(const std::string &text, const std::string &encoding)
  105. {
  106. return boost::locale::conv::from_utf<char>(text, encoding);
  107. }
  108. void Unicode::trimRight(std::string & text, const size_t amount)
  109. {
  110. if(text.empty())
  111. return;
  112. //todo: more efficient algorithm
  113. for(int i = 0; i< amount; i++){
  114. auto b = text.begin();
  115. auto e = text.end();
  116. size_t lastLen = 0;
  117. size_t len = 0;
  118. while (b != e) {
  119. lastLen = len;
  120. size_t n = getCharacterSize(*b);
  121. if(!isValidCharacter(&(*b),e-b))
  122. {
  123. logGlobal->error("Invalid UTF8 sequence");
  124. break;//invalid sequence will be trimmed
  125. }
  126. len += n;
  127. b += n;
  128. }
  129. text.resize(lastLen);
  130. }
  131. }
  132. //Helper for string -> float conversion
  133. class LocaleWithComma: public std::numpunct<char>
  134. {
  135. protected:
  136. char do_decimal_point() const override
  137. {
  138. return ',';
  139. }
  140. };
  141. CLegacyConfigParser::CLegacyConfigParser(std::string URI)
  142. {
  143. init(CResourceHandler::get()->load(ResourceID(URI, EResType::TEXT)));
  144. }
  145. CLegacyConfigParser::CLegacyConfigParser(const std::unique_ptr<CInputStream> & input)
  146. {
  147. init(input);
  148. }
  149. void CLegacyConfigParser::init(const std::unique_ptr<CInputStream> & input)
  150. {
  151. data.reset(new char[input->getSize()]);
  152. input->read((ui8*)data.get(), input->getSize());
  153. curr = data.get();
  154. end = curr + input->getSize();
  155. }
  156. std::string CLegacyConfigParser::extractQuotedPart()
  157. {
  158. assert(*curr == '\"');
  159. curr++; // skip quote
  160. char * begin = curr;
  161. while (curr != end && *curr != '\"' && *curr != '\t')
  162. curr++;
  163. return std::string(begin, curr++); //increment curr to close quote
  164. }
  165. std::string CLegacyConfigParser::extractQuotedString()
  166. {
  167. assert(*curr == '\"');
  168. std::string ret;
  169. while (true)
  170. {
  171. ret += extractQuotedPart();
  172. // double quote - add it to string and continue quoted part
  173. if (curr < end && *curr == '\"')
  174. {
  175. ret += '\"';
  176. }
  177. //extract normal part
  178. else if(curr < end && *curr != '\t' && *curr != '\r')
  179. {
  180. char * begin = curr;
  181. while (curr < end && *curr != '\t' && *curr != '\r' && *curr != '\"')//find end of string or next quoted part start
  182. curr++;
  183. ret += std::string(begin, curr);
  184. if(curr>=end || *curr != '\"')
  185. return ret;
  186. }
  187. else // end of string
  188. return ret;
  189. }
  190. }
  191. std::string CLegacyConfigParser::extractNormalString()
  192. {
  193. char * begin = curr;
  194. while (curr < end && *curr != '\t' && *curr != '\r')//find end of string
  195. curr++;
  196. return std::string(begin, curr);
  197. }
  198. std::string CLegacyConfigParser::readRawString()
  199. {
  200. if (curr >= end || *curr == '\n')
  201. return "";
  202. std::string ret;
  203. if (*curr == '\"')
  204. ret = extractQuotedString();// quoted text - find closing quote
  205. else
  206. ret = extractNormalString();//string without quotes - copy till \t or \r
  207. curr++;
  208. return ret;
  209. }
  210. std::string CLegacyConfigParser::readString()
  211. {
  212. // do not convert strings that are already in ASCII - this will only slow down loading process
  213. std::string str = readRawString();
  214. if (Unicode::isValidASCII(str))
  215. return str;
  216. return Unicode::toUnicode(str);
  217. }
  218. float CLegacyConfigParser::readNumber()
  219. {
  220. std::string input = readRawString();
  221. std::istringstream stream(input);
  222. if(input.find(',') != std::string::npos) // code to handle conversion with comma as decimal separator
  223. stream.imbue(std::locale(std::locale(), new LocaleWithComma()));
  224. float result;
  225. if ( !(stream >> result) )
  226. return 0;
  227. return result;
  228. }
  229. bool CLegacyConfigParser::isNextEntryEmpty() const
  230. {
  231. char * nextSymbol = curr;
  232. while (nextSymbol < end && *nextSymbol == ' ')
  233. nextSymbol++; //find next meaningfull symbol
  234. return nextSymbol >= end || *nextSymbol == '\n' || *nextSymbol == '\r' || *nextSymbol == '\t';
  235. }
  236. bool CLegacyConfigParser::endLine()
  237. {
  238. while (curr < end && *curr != '\n')
  239. readString();
  240. curr++;
  241. return curr < end;
  242. }
  243. void CGeneralTextHandler::readToVector(std::string sourceName, std::vector<std::string> &dest)
  244. {
  245. CLegacyConfigParser parser(sourceName);
  246. do
  247. {
  248. dest.push_back(parser.readString());
  249. }
  250. while (parser.endLine());
  251. }
  252. CGeneralTextHandler::CGeneralTextHandler()
  253. {
  254. readToVector("DATA/VCDESC.TXT", victoryConditions);
  255. readToVector("DATA/LCDESC.TXT", lossCondtions);
  256. readToVector("DATA/TCOMMAND.TXT", tcommands);
  257. readToVector("DATA/HALLINFO.TXT", hcommands);
  258. readToVector("DATA/CASTINFO.TXT", fcommands);
  259. readToVector("DATA/ADVEVENT.TXT", advobtxt);
  260. readToVector("DATA/XTRAINFO.TXT", xtrainfo);
  261. readToVector("DATA/RESTYPES.TXT", restypes);
  262. readToVector("DATA/TERRNAME.TXT", terrainNames);
  263. readToVector("DATA/RANDSIGN.TXT", randsign);
  264. readToVector("DATA/CRGEN1.TXT", creGens);
  265. readToVector("DATA/CRGEN4.TXT", creGens4);
  266. readToVector("DATA/OVERVIEW.TXT", overview);
  267. readToVector("DATA/ARRAYTXT.TXT", arraytxt);
  268. readToVector("DATA/PRISKILL.TXT", primarySkillNames);
  269. readToVector("DATA/JKTEXT.TXT", jktexts);
  270. readToVector("DATA/TVRNINFO.TXT", tavernInfo);
  271. readToVector("DATA/RANDTVRN.TXT", tavernRumors);
  272. readToVector("DATA/TURNDUR.TXT", turnDurations);
  273. readToVector("DATA/HEROSCRN.TXT", heroscrn);
  274. readToVector("DATA/TENTCOLR.TXT", tentColors);
  275. readToVector("DATA/SKILLLEV.TXT", levels);
  276. localizedTexts = JsonNode(ResourceID("config/translate.json", EResType::TEXT));
  277. {
  278. CLegacyConfigParser parser("DATA/GENRLTXT.TXT");
  279. parser.endLine();
  280. do
  281. {
  282. allTexts.push_back(parser.readString());
  283. }
  284. while (parser.endLine());
  285. }
  286. {
  287. CLegacyConfigParser parser("DATA/HELP.TXT");
  288. do
  289. {
  290. std::string first = parser.readString();
  291. std::string second = parser.readString();
  292. zelp.push_back(std::make_pair(first, second));
  293. }
  294. while (parser.endLine());
  295. }
  296. {
  297. CLegacyConfigParser nameParser("DATA/MINENAME.TXT");
  298. CLegacyConfigParser eventParser("DATA/MINEEVNT.TXT");
  299. do
  300. {
  301. std::string name = nameParser.readString();
  302. std::string event = eventParser.readString();
  303. mines.push_back(std::make_pair(name, event));
  304. }
  305. while (nameParser.endLine() && eventParser.endLine());
  306. }
  307. {
  308. CLegacyConfigParser parser("DATA/PLCOLORS.TXT");
  309. do
  310. {
  311. std::string color = parser.readString();
  312. colors.push_back(color);
  313. color[0] = toupper(color[0]);
  314. capColors.push_back(color);
  315. }
  316. while (parser.endLine());
  317. }
  318. {
  319. CLegacyConfigParser parser("DATA/SEERHUT.TXT");
  320. //skip header
  321. parser.endLine();
  322. for (int i = 0; i < 6; ++i)
  323. seerEmpty.push_back(parser.readString());
  324. parser.endLine();
  325. quests.resize(10);
  326. for (int i = 0; i < 9; ++i) //9 types of quests
  327. {
  328. quests[i].resize(5);
  329. for (int j = 0; j < 5; ++j)
  330. {
  331. parser.readString(); //front description
  332. for (int k = 0; k < 6; ++k)
  333. quests[i][j].push_back(parser.readString());
  334. parser.endLine();
  335. }
  336. }
  337. quests[9].resize(1);
  338. for (int k = 0; k < 6; ++k) //Time limit
  339. {
  340. quests[9][0].push_back(parser.readString());
  341. }
  342. parser.endLine();
  343. parser.endLine(); // empty line
  344. parser.endLine(); // header
  345. for (int i = 0; i < 48; ++i)
  346. {
  347. seerNames.push_back(parser.readString());
  348. parser.endLine();
  349. }
  350. }
  351. {
  352. CLegacyConfigParser parser("DATA/CAMPTEXT.TXT");
  353. //skip header
  354. parser.endLine();
  355. std::string text;
  356. do
  357. {
  358. text = parser.readString();
  359. if (!text.empty())
  360. campaignMapNames.push_back(text);
  361. }
  362. while (parser.endLine() && !text.empty());
  363. for (size_t i=0; i<campaignMapNames.size(); i++)
  364. {
  365. do // skip empty space and header
  366. {
  367. text = parser.readString();
  368. }
  369. while (parser.endLine() && text.empty());
  370. campaignRegionNames.push_back(std::vector<std::string>());
  371. do
  372. {
  373. text = parser.readString();
  374. if (!text.empty())
  375. campaignRegionNames.back().push_back(text);
  376. }
  377. while (parser.endLine() && !text.empty());
  378. }
  379. }
  380. if (VLC->modh->modules.STACK_EXP)
  381. {
  382. CLegacyConfigParser parser("DATA/ZCREXP.TXT");
  383. parser.endLine();//header
  384. for (size_t iter=0; iter<325; iter++)
  385. {
  386. parser.readString(); //ignore 1st column with description
  387. zcrexp.push_back(parser.readString());
  388. parser.endLine();
  389. }
  390. // line 325 - some weird formatting here
  391. zcrexp.push_back(parser.readString());
  392. parser.readString();
  393. parser.endLine();
  394. do // rest of file can be read normally
  395. {
  396. parser.readString(); //ignore 1st column with description
  397. zcrexp.push_back(parser.readString());
  398. }
  399. while (parser.endLine());
  400. }
  401. if (VLC->modh->modules.COMMANDERS)
  402. {
  403. try
  404. {
  405. CLegacyConfigParser parser("DATA/ZNPC00.TXT");
  406. parser.endLine();//header
  407. do
  408. {
  409. znpc00.push_back(parser.readString());
  410. } while (parser.endLine());
  411. }
  412. catch (std::runtime_error)
  413. {
  414. logGlobal->warn("WoG file ZNPC00.TXT containing commander texts was not found");
  415. }
  416. }
  417. }
  418. int32_t CGeneralTextHandler::pluralText(const int32_t textIndex, const int32_t count) const
  419. {
  420. if(textIndex == 0)
  421. return 0;
  422. else if(textIndex < 0)
  423. return -textIndex;
  424. else if(count == 1)
  425. return textIndex;
  426. else
  427. return textIndex + 1;
  428. }