123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493 |
- #include "StdInc.h"
- #include "CGeneralTextHandler.h"
- #include <boost/locale.hpp>
- #include "filesystem/Filesystem.h"
- #include "CConfigHandler.h"
- #include "CModHandler.h"
- #include "GameConstants.h"
- #include "VCMI_Lib.h"
- /*
- * CGeneralTextHandler.cpp, part of VCMI engine
- *
- * Authors: listed in file AUTHORS in main folder
- *
- * License: GNU General Public License v2.0 or later
- * Full text of license available in license.txt file, in main folder
- *
- */
- size_t Unicode::getCharacterSize(char firstByte)
- {
- // length of utf-8 character can be determined from 1st byte by counting number of highest bits set to 1:
- // 0xxxxxxx -> 1 - ASCII chars
- // 110xxxxx -> 2
- // 11110xxx -> 4 - last allowed in current standard
- // 1111110x -> 6 - last allowed in original standard
- if ((ui8)firstByte < 0x80)
- return 1; // ASCII
- size_t ret = 0;
- for (size_t i=0; i<8; i++)
- {
- if (((ui8)firstByte & (0x80 >> i)) != 0)
- ret++;
- else
- break;
- }
- return ret;
- }
- bool Unicode::isValidCharacter(const char * character, size_t maxSize)
- {
- // can't be first byte in UTF8
- if ((ui8)character[0] >= 0x80 && (ui8)character[0] < 0xC0)
- return false;
- // first character must follow rules checked in getCharacterSize
- size_t size = getCharacterSize((ui8)character[0]);
- if ((ui8)character[0] > 0xF4)
- return false; // above maximum allowed in standard (UTF codepoints are capped at 0x0010FFFF)
- if (size > maxSize)
- return false;
- // remaining characters must have highest bit set to 1
- for (size_t i = 1; i < size; i++)
- {
- if (((ui8)character[i] & 0x80) == 0)
- return false;
- }
- return true;
- }
- bool Unicode::isValidASCII(const std::string & text)
- {
- for (const char & ch : text)
- if (ui8(ch) >= 0x80 )
- return false;
- return true;
- }
- bool Unicode::isValidASCII(const char * data, size_t size)
- {
- for (size_t i=0; i<size; i++)
- if (ui8(data[i]) >= 0x80 )
- return false;
- return true;
- }
- bool Unicode::isValidString(const std::string & text)
- {
- for (size_t i=0; i<text.size(); i += getCharacterSize(text[i]))
- {
- if (!isValidCharacter(text.data() + i, text.size() - i))
- return false;
- }
- return true;
- }
- bool Unicode::isValidString(const char * data, size_t size)
- {
- for (size_t i=0; i<size; i += getCharacterSize(data[i]))
- {
- if (!isValidCharacter(data + i, size - i))
- return false;
- }
- return true;
- }
- static std::string getSelectedEncoding()
- {
- return settings["general"]["encoding"].String();
- }
- std::string Unicode::toUnicode(const std::string &text)
- {
- return toUnicode(text, getSelectedEncoding());
- }
- std::string Unicode::toUnicode(const std::string &text, const std::string &encoding)
- {
- return boost::locale::conv::to_utf<char>(text, encoding);
- }
- std::string Unicode::fromUnicode(const std::string & text)
- {
- return fromUnicode(text, getSelectedEncoding());
- }
- std::string Unicode::fromUnicode(const std::string &text, const std::string &encoding)
- {
- return boost::locale::conv::from_utf<char>(text, encoding);
- }
- void Unicode::trimRight(std::string & text, const size_t amount/* =1 */)
- {
- if(text.empty())
- return;
- //todo: more efficient algorithm
- for(int i = 0; i< amount; i++){
- auto b = text.begin();
- auto e = text.end();
- size_t lastLen = 0;
- size_t len = 0;
- while (b != e) {
- lastLen = len;
- size_t n = getCharacterSize(*b);
- if(!isValidCharacter(&(*b),e-b))
- {
- logGlobal->error("Invalid UTF8 sequence");
- break;//invalid sequence will be trimmed
- }
- len += n;
- b += n;
- }
- text.resize(lastLen);
- }
- }
- //Helper for string -> float conversion
- class LocaleWithComma: public std::numpunct<char>
- {
- protected:
- char do_decimal_point() const override
- {
- return ',';
- }
- };
- CLegacyConfigParser::CLegacyConfigParser(std::string URI)
- {
- init(CResourceHandler::get()->load(ResourceID(URI, EResType::TEXT)));
- }
- CLegacyConfigParser::CLegacyConfigParser(const std::unique_ptr<CInputStream> & input)
- {
- init(input);
- }
- void CLegacyConfigParser::init(const std::unique_ptr<CInputStream> & input)
- {
- data.reset(new char[input->getSize()]);
- input->read((ui8*)data.get(), input->getSize());
- curr = data.get();
- end = curr + input->getSize();
- }
- std::string CLegacyConfigParser::extractQuotedPart()
- {
- assert(*curr == '\"');
- curr++; // skip quote
- char * begin = curr;
- while (curr != end && *curr != '\"' && *curr != '\t')
- curr++;
- return std::string(begin, curr++); //increment curr to close quote
- }
- std::string CLegacyConfigParser::extractQuotedString()
- {
- assert(*curr == '\"');
- std::string ret;
- while (true)
- {
- ret += extractQuotedPart();
- // double quote - add it to string and continue quoted part
- if (curr < end && *curr == '\"')
- {
- ret += '\"';
- }
- //extract normal part
- else if(curr < end && *curr != '\t' && *curr != '\r')
- {
- char * begin = curr;
- while (curr < end && *curr != '\t' && *curr != '\r' && *curr != '\"')//find end of string or next quoted part start
- curr++;
- ret += std::string(begin, curr);
- if(curr>=end || *curr != '\"')
- return ret;
- }
- else // end of string
- return ret;
- }
- }
- std::string CLegacyConfigParser::extractNormalString()
- {
- char * begin = curr;
- while (curr < end && *curr != '\t' && *curr != '\r')//find end of string
- curr++;
- return std::string(begin, curr);
- }
- std::string CLegacyConfigParser::readRawString()
- {
- if (curr >= end || *curr == '\n')
- return "";
- std::string ret;
- if (*curr == '\"')
- ret = extractQuotedString();// quoted text - find closing quote
- else
- ret = extractNormalString();//string without quotes - copy till \t or \r
- curr++;
- return ret;
- }
- std::string CLegacyConfigParser::readString()
- {
- // do not convert strings that are already in ASCII - this will only slow down loading process
- std::string str = readRawString();
- if (Unicode::isValidASCII(str))
- return str;
- return Unicode::toUnicode(str);
- }
- float CLegacyConfigParser::readNumber()
- {
- std::string input = readRawString();
- std::istringstream stream(input);
- if (input.find(',') != std::string::npos) // code to handle conversion with comma as decimal separator
- stream.imbue(std::locale(std::locale(), new LocaleWithComma));
- float result;
- if ( !(stream >> result) )
- return 0;
- return result;
- }
- bool CLegacyConfigParser::isNextEntryEmpty() const
- {
- char * nextSymbol = curr;
- while (nextSymbol < end && *nextSymbol == ' ')
- nextSymbol++; //find next meaningfull symbol
- return nextSymbol >= end || *nextSymbol == '\n' || *nextSymbol == '\r' || *nextSymbol == '\t';
- }
- bool CLegacyConfigParser::endLine()
- {
- while (curr < end && *curr != '\n')
- readString();
- curr++;
- return curr < end;
- }
- void CGeneralTextHandler::readToVector(std::string sourceName, std::vector<std::string> & dest)
- {
- CLegacyConfigParser parser(sourceName);
- do
- {
- dest.push_back(parser.readString());
- }
- while (parser.endLine());
- }
- CGeneralTextHandler::CGeneralTextHandler()
- {
- readToVector("DATA/VCDESC.TXT", victoryConditions);
- readToVector("DATA/LCDESC.TXT", lossCondtions);
- readToVector("DATA/TCOMMAND.TXT", tcommands);
- readToVector("DATA/HALLINFO.TXT", hcommands);
- readToVector("DATA/CASTINFO.TXT", fcommands);
- readToVector("DATA/ADVEVENT.TXT", advobtxt);
- readToVector("DATA/XTRAINFO.TXT", xtrainfo);
- readToVector("DATA/RESTYPES.TXT", restypes);
- readToVector("DATA/TERRNAME.TXT", terrainNames);
- readToVector("DATA/RANDSIGN.TXT", randsign);
- readToVector("DATA/CRGEN1.TXT", creGens);
- readToVector("DATA/CRGEN4.TXT", creGens4);
- readToVector("DATA/OVERVIEW.TXT", overview);
- readToVector("DATA/ARRAYTXT.TXT", arraytxt);
- readToVector("DATA/PRISKILL.TXT", primarySkillNames);
- readToVector("DATA/JKTEXT.TXT", jktexts);
- readToVector("DATA/TVRNINFO.TXT", tavernInfo);
- readToVector("DATA/RANDTVRN.TXT", tavernRumors);
- readToVector("DATA/TURNDUR.TXT", turnDurations);
- readToVector("DATA/HEROSCRN.TXT", heroscrn);
- readToVector("DATA/TENTCOLR.TXT", tentColors);
- readToVector("DATA/SKILLLEV.TXT", levels);
- localizedTexts = JsonNode(ResourceID("config/translate.json", EResType::TEXT));
- {
- CLegacyConfigParser parser("DATA/GENRLTXT.TXT");
- parser.endLine();
- do
- {
- allTexts.push_back(parser.readString());
- }
- while (parser.endLine());
- }
- {
- CLegacyConfigParser parser("DATA/HELP.TXT");
- do
- {
- std::string first = parser.readString();
- std::string second = parser.readString();
- zelp.push_back(std::make_pair(first, second));
- }
- while (parser.endLine());
- }
- {
- CLegacyConfigParser nameParser("DATA/MINENAME.TXT");
- CLegacyConfigParser eventParser("DATA/MINEEVNT.TXT");
- do
- {
- std::string name = nameParser.readString();
- std::string event = eventParser.readString();
- mines.push_back(std::make_pair(name, event));
- }
- while (nameParser.endLine() && eventParser.endLine());
- }
- {
- CLegacyConfigParser parser("DATA/PLCOLORS.TXT");
- do
- {
- std::string color = parser.readString();
- colors.push_back(color);
- color[0] = toupper(color[0]);
- capColors.push_back(color);
- }
- while (parser.endLine());
- }
- {
- CLegacyConfigParser parser("DATA/SSTRAITS.TXT");
- //skip header
- parser.endLine();
- parser.endLine();
- do
- {
- skillName.push_back(parser.readString());
- skillInfoTexts.push_back(std::vector<std::string>());
- for(int j = 0; j < 3; j++)
- skillInfoTexts.back().push_back(parser.readString());
- }
- while (parser.endLine());
- }
- {
- CLegacyConfigParser parser("DATA/SEERHUT.TXT");
- //skip header
- parser.endLine();
- for (int i = 0; i < 6; ++i)
- seerEmpty.push_back(parser.readString());
- parser.endLine();
- quests.resize(10);
- for (int i = 0; i < 9; ++i) //9 types of quests
- {
- quests[i].resize(5);
- for (int j = 0; j < 5; ++j)
- {
- parser.readString(); //front description
- for (int k = 0; k < 6; ++k)
- quests[i][j].push_back(parser.readString());
- parser.endLine();
- }
- }
- quests[9].resize(1);
- for (int k = 0; k < 6; ++k) //Time limit
- {
- quests[9][0].push_back(parser.readString());
- }
- parser.endLine();
- parser.endLine(); // empty line
- parser.endLine(); // header
- for (int i = 0; i < 48; ++i)
- {
- seerNames.push_back(parser.readString());
- parser.endLine();
- }
- }
- {
- CLegacyConfigParser parser("DATA/CAMPTEXT.TXT");
- //skip header
- parser.endLine();
- std::string text;
- do
- {
- text = parser.readString();
- if (!text.empty())
- campaignMapNames.push_back(text);
- }
- while (parser.endLine() && !text.empty());
- for (size_t i=0; i<campaignMapNames.size(); i++)
- {
- do // skip empty space and header
- {
- text = parser.readString();
- }
- while (parser.endLine() && text.empty());
- campaignRegionNames.push_back(std::vector<std::string>());
- do
- {
- text = parser.readString();
- if (!text.empty())
- campaignRegionNames.back().push_back(text);
- }
- while (parser.endLine() && !text.empty());
- }
- }
- if (VLC->modh->modules.STACK_EXP)
- {
- CLegacyConfigParser parser("DATA/ZCREXP.TXT");
- parser.endLine();//header
- for (size_t iter=0; iter<325; iter++)
- {
- parser.readString(); //ignore 1st column with description
- zcrexp.push_back(parser.readString());
- parser.endLine();
- }
- // line 325 - some weird formatting here
- zcrexp.push_back(parser.readString());
- parser.readString();
- parser.endLine();
- do // rest of file can be read normally
- {
- parser.readString(); //ignore 1st column with description
- zcrexp.push_back(parser.readString());
- }
- while (parser.endLine());
- }
- }
|