пре 2 година · 65c020ef34
--- a/client/adventureMap/CInGameConsole.cpp
+++ b/client/adventureMap/CInGameConsole.cpp
@@ -20,7 +20,7 @@
 
				 
			
 
				 #include "../../CCallback.h"
			
 
				 #include "../../lib/CConfigHandler.h"
			
 
				-#include "../../lib/CGeneralTextHandler.h"
			
 
				+#include "../../lib/TextOperations.h"
			
 
				 #include "../../lib/mapObjects/CArmedInstance.h"
			
 
				 
			
 
				 #include <SDL_timer.h>
			
--- a/client/render/IFont.cpp
+++ b/client/render/IFont.cpp
@@ -12,7 +12,7 @@
 
				 #include "IFont.h"
			
 
				 
			
 
				 #include "../../lib/Point.h"
			
 
				-#include "../../lib/CGeneralTextHandler.h"
			
 
				+#include "../../lib/TextOperations.h"
			
 
				 //
			
 
				 
			
 
				 size_t IFont::getStringWidth(const std::string & data) const
			
--- a/client/renderSDL/CBitmapFont.cpp
+++ b/client/renderSDL/CBitmapFont.cpp
@@ -14,7 +14,7 @@
 
				 
			
 
				 #include "../../lib/vcmi_endian.h"
			
 
				 #include "../../lib/filesystem/Filesystem.h"
			
 
				-#include "../../lib/CGeneralTextHandler.h"
			
 
				+#include "../../lib/TextOperations.h"
			
 
				 #include "../../lib/Rect.h"
			
 
				 
			
 
				 #include <SDL_surface.h>
			
--- a/client/renderSDL/CBitmapHanFont.cpp
+++ b/client/renderSDL/CBitmapHanFont.cpp
@@ -15,7 +15,7 @@
 
				 
			
 
				 #include "../../lib/JsonNode.h"
			
 
				 #include "../../lib/filesystem/Filesystem.h"
			
 
				-#include "../../lib/CGeneralTextHandler.h"
			
 
				+#include "../../lib/TextOperations.h"
			
 
				 #include "../../lib/Rect.h"
			
 
				 
			
 
				 #include <SDL_surface.h>
			
--- a/client/renderSDL/CTrueTypeFont.cpp
+++ b/client/renderSDL/CTrueTypeFont.cpp
@@ -14,7 +14,7 @@
 
				 #include "../renderSDL/SDL_Extensions.h"
			
 
				 
			
 
				 #include "../../lib/JsonNode.h"
			
 
				-#include "../../lib/CGeneralTextHandler.h"
			
 
				+#include "../../lib/TextOperations.h"
			
 
				 #include "../../lib/filesystem/Filesystem.h"
			
 
				 
			
 
				 #include <SDL_ttf.h>
			
--- a/client/widgets/TextControls.cpp
+++ b/client/widgets/TextControls.cpp
@@ -19,7 +19,7 @@
 
				 #include "../adventureMap/CInGameConsole.h"
			
 
				 #include "../renderSDL/SDL_Extensions.h"
			
 
				 
			
 
				-#include "../../lib/CGeneralTextHandler.h"
			
 
				+#include "../../lib/TextOperations.h"
			
 
				 
			
 
				 #ifdef VCMI_ANDROID
			
 
				 #include "lib/CAndroidVMHelper.h"
			
--- a/client/windows/CMessage.cpp
+++ b/client/windows/CMessage.cpp
@@ -13,6 +13,7 @@
 
				 

			
 
				 #include "../CGameInfo.h"

			
 
				 #include "../../lib/CGeneralTextHandler.h"

			
 
				+#include "../../lib/TextOperations.h"

			
 
				 

			
 
				 #include "../windows/InfoWindows.h"

			
 
				 #include "../widgets/Buttons.h"

			
--- a/cmake_modules/VCMI_lib.cmake
+++ b/cmake_modules/VCMI_lib.cmake
@@ -197,6 +197,7 @@ macro(add_main_lib TARGET_NAME LIBRARY_TYPE)
 
				 		${MAIN_LIB_DIR}/RoadHandler.cpp
			
 
				 		${MAIN_LIB_DIR}/ScriptHandler.cpp
			
 
				 		${MAIN_LIB_DIR}/TerrainHandler.cpp
			
 
				+		${MAIN_LIB_DIR}/TextOperations.cpp
			
 
				 		${MAIN_LIB_DIR}/VCMIDirs.cpp
			
 
				 		${MAIN_LIB_DIR}/VCMI_Lib.cpp
			
 
				 	)
			
@@ -454,6 +455,7 @@ macro(add_main_lib TARGET_NAME LIBRARY_TYPE)
 
				 		${MAIN_LIB_DIR}/StartInfo.h
			
 
				 		${MAIN_LIB_DIR}/StringConstants.h
			
 
				 		${MAIN_LIB_DIR}/TerrainHandler.h
			
 
				+		${MAIN_LIB_DIR}/TextOperations.h
			
 
				 		${MAIN_LIB_DIR}/UnlockGuard.h
			
 
				 		${MAIN_LIB_DIR}/VCMIDirs.h
			
 
				 		${MAIN_LIB_DIR}/vcmi_endian.h
			
--- a/lib/CGeneralTextHandler.cpp
+++ b/lib/CGeneralTextHandler.cpp
@@ -10,100 +10,16 @@
 
				 #include "StdInc.h"

			
 
				 #include "CGeneralTextHandler.h"

			
 
				 

			
 
				-#include <boost/locale.hpp>

			
 
				-

			
 
				 #include "filesystem/Filesystem.h"

			
 
				 #include "CConfigHandler.h"

			
 
				 #include "CModHandler.h"

			
 
				-#include "GameConstants.h"

			
 
				 #include "mapObjects/CQuest.h"

			
 
				 #include "VCMI_Lib.h"

			
 
				 #include "Languages.h"

			
 
				+#include "TextOperations.h"

			
 
				 

			
 
				 VCMI_LIB_NAMESPACE_BEGIN

			
 
				 

			
 
				-size_t Unicode::getCharacterSize(char firstByte)

			
 
				-{

			
 
				-	// length of utf-8 character can be determined from 1st byte by counting number of highest bits set to 1:

			
 
				-	// 0xxxxxxx -> 1 -  ASCII chars

			
 
				-	// 110xxxxx -> 2

			
 
				-	// 11110xxx -> 4 - last allowed in current standard

			
 
				-	// 1111110x -> 6 - last allowed in original standard

			
 
				-

			
 
				-	if ((ui8)firstByte < 0x80)

			
 
				-		return 1; // ASCII

			
 
				-

			
 
				-	size_t ret = 0;

			
 
				-

			
 
				-	for (size_t i=0; i<8; i++)

			
 
				-	{

			
 
				-		if (((ui8)firstByte & (0x80 >> i)) != 0)

			
 
				-			ret++;

			
 
				-		else

			
 
				-			break;

			
 
				-	}

			
 
				-	return ret;

			
 
				-}

			
 
				-

			
 
				-bool Unicode::isValidCharacter(const char * character, size_t maxSize)

			
 
				-{

			
 
				-	// can't be first byte in UTF8

			
 
				-	if ((ui8)character[0] >= 0x80 && (ui8)character[0] < 0xC0)

			
 
				-		return false;

			
 
				-	// first character must follow rules checked in getCharacterSize

			
 
				-	size_t size = getCharacterSize((ui8)character[0]);

			
 
				-

			
 
				-	if ((ui8)character[0] > 0xF4)

			
 
				-		return false; // above maximum allowed in standard (UTF codepoints are capped at 0x0010FFFF)

			
 
				-

			
 
				-	if (size > maxSize)

			
 
				-		return false;

			
 
				-

			
 
				-	// remaining characters must have highest bit set to 1

			
 
				-	for (size_t i = 1; i < size; i++)

			
 
				-	{

			
 
				-		if (((ui8)character[i] & 0x80) == 0)

			
 
				-			return false;

			
 
				-	}

			
 
				-	return true;

			
 
				-}

			
 
				-

			
 
				-bool Unicode::isValidASCII(const std::string & text)

			
 
				-{

			
 
				-	for (const char & ch : text)

			
 
				-		if (ui8(ch) >= 0x80 )

			
 
				-			return false;

			
 
				-	return true;

			
 
				-}

			
 
				-

			
 
				-bool Unicode::isValidASCII(const char * data, size_t size)

			
 
				-{

			
 
				-	for (size_t i=0; i<size; i++)

			
 
				-		if (ui8(data[i]) >= 0x80 )

			
 
				-			return false;

			
 
				-	return true;

			
 
				-}

			
 
				-

			
 
				-bool Unicode::isValidString(const std::string & text)

			
 
				-{

			
 
				-	for (size_t i=0; i<text.size(); i += getCharacterSize(text[i]))

			
 
				-	{

			
 
				-		if (!isValidCharacter(text.data() + i, text.size() - i))

			
 
				-			return false;

			
 
				-	}

			
 
				-	return true;

			
 
				-}

			
 
				-

			
 
				-bool Unicode::isValidString(const char * data, size_t size)

			
 
				-{

			
 
				-	for (size_t i=0; i<size; i += getCharacterSize(data[i]))

			
 
				-	{

			
 
				-		if (!isValidCharacter(data + i, size - i))

			
 
				-			return false;

			
 
				-	}

			
 
				-	return true;

			
 
				-}

			
 
				-

			
 
				 /// Detects language and encoding of H3 text files based on matching against pregenerated footprints of H3 file

			
 
				 void CGeneralTextHandler::detectInstallParameters()

			
 
				 {

			
@@ -173,55 +89,6 @@ void CGeneralTextHandler::detectInstallParameters()
 
				 	encoding->String() =  Languages::getLanguageOptions(knownLanguages[bestIndex]).encoding;

			
 
				 }

			
 
				 

			
 
				-std::string Unicode::toUnicode(const std::string &text)

			
 
				-{

			
 
				-	return toUnicode(text, CGeneralTextHandler::getInstalledEncoding());

			
 
				-}

			
 
				-

			
 
				-std::string Unicode::toUnicode(const std::string &text, const std::string &encoding)

			
 
				-{

			
 
				-	return boost::locale::conv::to_utf<char>(text, encoding);

			
 
				-}

			
 
				-

			
 
				-std::string Unicode::fromUnicode(const std::string & text)

			
 
				-{

			
 
				-	return fromUnicode(text, CGeneralTextHandler::getInstalledEncoding());

			
 
				-}

			
 
				-

			
 
				-std::string Unicode::fromUnicode(const std::string &text, const std::string &encoding)

			
 
				-{

			
 
				-	return boost::locale::conv::from_utf<char>(text, encoding);

			
 
				-}

			
 
				-

			
 
				-void Unicode::trimRight(std::string & text, const size_t amount)

			
 
				-{

			
 
				-	if(text.empty())

			
 
				-		return;

			
 
				-	//todo: more efficient algorithm

			
 
				-	for(int i = 0; i< amount; i++){

			
 
				-		auto b = text.begin();

			
 
				-		auto e = text.end();

			
 
				-		size_t lastLen = 0;

			
 
				-		size_t len = 0;

			
 
				-		while (b != e) {

			
 
				-			lastLen = len;

			
 
				-			size_t n = getCharacterSize(*b);

			
 
				-

			
 
				-			if(!isValidCharacter(&(*b),e-b))

			
 
				-			{

			
 
				-				logGlobal->error("Invalid UTF8 sequence");

			
 
				-				break;//invalid sequence will be trimmed

			
 
				-			}

			
 
				-

			
 
				-			len += n;

			
 
				-			b += n;

			
 
				-		}

			
 
				-

			
 
				-		text.resize(lastLen);

			
 
				-	}

			
 
				-}

			
 
				-

			
 
				-

			
 
				 //Helper for string -> float conversion

			
 
				 class LocaleWithComma: public std::numpunct<char>

			
 
				 {

			
@@ -710,9 +577,9 @@ void CGeneralTextHandler::dumpAllTexts()
 
				 	for ( auto const & entry : stringsLocalizations)

			
 
				 	{

			
 
				 		if (!entry.second.overrideValue.empty())

			
 
				-			logGlobal->info(R"("%s" : "%s", // %s / %s)", entry.first, escapeString(entry.second.overrideValue), entry.second.modContext, entry.second.overrideLanguage);

			
 
				+			logGlobal->info(R"("%s" : "%s",)", entry.first, escapeString(entry.second.overrideValue));

			
 
				 		else

			
 
				-			logGlobal->info(R"("%s" : "%s", // %s / %s)", entry.first, escapeString(entry.second.baseValue), entry.second.modContext, entry.second.baseLanguage);

			
 
				+			logGlobal->info(R"("%s" : "%s",)", entry.first, escapeString(entry.second.baseValue));

			
 
				 	}

			
 
				 

			
 
				 	logGlobal->info("END TEXT EXPORT");

			
--- a/lib/CGeneralTextHandler.h
+++ b/lib/CGeneralTextHandler.h
@@ -9,42 +9,10 @@
 
				  */

			
 
				 #pragma once

			
 
				 

			
 
				-#include "JsonNode.h"

			
 
				-

			
 
				 VCMI_LIB_NAMESPACE_BEGIN

			
 
				 

			
 
				-/// Namespace that provides utilites for unicode support (UTF-8)

			
 
				-namespace Unicode

			
 
				-{

			
 
				-	/// evaluates size of UTF-8 character

			
 
				-	size_t DLL_LINKAGE getCharacterSize(char firstByte);

			
 
				-

			
 
				-	/// test if character is a valid UTF-8 symbol

			
 
				-	/// maxSize - maximum number of bytes this symbol may consist from ( = remainer of string)

			
 
				-	bool DLL_LINKAGE isValidCharacter(const char * character, size_t maxSize);

			
 
				-

			
 
				-	/// test if text contains ASCII-string (no need for unicode conversion)

			
 
				-	bool DLL_LINKAGE isValidASCII(const std::string & text);

			
 
				-	bool DLL_LINKAGE isValidASCII(const char * data, size_t size);

			
 
				-

			
 
				-	/// test if text contains valid UTF-8 sequence

			
 
				-	bool DLL_LINKAGE isValidString(const std::string & text);

			
 
				-	bool DLL_LINKAGE isValidString(const char * data, size_t size);

			
 
				-

			
 
				-	/// converts text to unicode from specified encoding or from one specified in settings

			
 
				-	std::string DLL_LINKAGE toUnicode(const std::string & text);

			
 
				-	std::string DLL_LINKAGE toUnicode(const std::string & text, const std::string & encoding);

			
 
				-

			
 
				-	/// converts text from unicode to specified encoding or to one specified in settings

			
 
				-	/// NOTE: usage of these functions should be avoided if possible

			
 
				-	std::string DLL_LINKAGE fromUnicode(const std::string & text);

			
 
				-	std::string DLL_LINKAGE fromUnicode(const std::string & text, const std::string & encoding);

			
 
				-

			
 
				-	///delete (amount) UTF characters from right

			
 
				-	DLL_LINKAGE void trimRight(std::string & text, size_t amount = 1);

			
 
				-};

			
 
				-

			
 
				 class CInputStream;

			
 
				+class JsonNode;

			
 
				 

			
 
				 /// Parser for any text files from H3

			
 
				 class DLL_LINKAGE CLegacyConfigParser

			
--- a/lib/JsonDetail.cpp
+++ b/lib/JsonDetail.cpp
@@ -12,7 +12,7 @@
 
				 #include "JsonDetail.h"

			
 
				 

			
 
				 #include "VCMI_Lib.h"

			
 
				-#include "CGeneralTextHandler.h"

			
 
				+#include "TextOperations.h"

			
 
				 #include "CModHandler.h"

			
 
				 

			
 
				 #include "filesystem/Filesystem.h"

			
--- a/lib/TextOperations.cpp
+++ b/lib/TextOperations.cpp
@@ -0,0 +1,149 @@
 
				+/*

			
 
				+ * TextOperations.cpp, part of VCMI engine

			
 
				+ *

			
 
				+ * Authors: listed in file AUTHORS in main folder

			
 
				+ *

			
 
				+ * License: GNU General Public License v2.0 or later

			
 
				+ * Full text of license available in license.txt file, in main folder

			
 
				+ *

			
 
				+ */

			
 
				+#include "StdInc.h"

			
 
				+#include "TextOperations.h"

			
 
				+

			
 
				+#include "CGeneralTextHandler.h"

			
 
				+

			
 
				+#include <boost/locale.hpp>

			
 
				+

			
 
				+VCMI_LIB_NAMESPACE_BEGIN

			
 
				+

			
 
				+size_t Unicode::getCharacterSize(char firstByte)

			
 
				+{

			
 
				+	// length of utf-8 character can be determined from 1st byte by counting number of highest bits set to 1:

			
 
				+	// 0xxxxxxx -> 1 -  ASCII chars

			
 
				+	// 110xxxxx -> 2

			
 
				+	// 11110xxx -> 4 - last allowed in current standard

			
 
				+	// 1111110x -> 6 - last allowed in original standard

			
 
				+

			
 
				+	if ((ui8)firstByte < 0x80)

			
 
				+		return 1; // ASCII

			
 
				+

			
 
				+	size_t ret = 0;

			
 
				+

			
 
				+	for (size_t i=0; i<8; i++)

			
 
				+	{

			
 
				+		if (((ui8)firstByte & (0x80 >> i)) != 0)

			
 
				+			ret++;

			
 
				+		else

			
 
				+			break;

			
 
				+	}

			
 
				+	return ret;

			
 
				+}

			
 
				+

			
 
				+bool Unicode::isValidCharacter(const char * character, size_t maxSize)

			
 
				+{

			
 
				+	// can't be first byte in UTF8

			
 
				+	if ((ui8)character[0] >= 0x80 && (ui8)character[0] < 0xC0)

			
 
				+		return false;

			
 
				+	// first character must follow rules checked in getCharacterSize

			
 
				+	size_t size = getCharacterSize((ui8)character[0]);

			
 
				+

			
 
				+	if ((ui8)character[0] > 0xF4)

			
 
				+		return false; // above maximum allowed in standard (UTF codepoints are capped at 0x0010FFFF)

			
 
				+

			
 
				+	if (size > maxSize)

			
 
				+		return false;

			
 
				+

			
 
				+	// remaining characters must have highest bit set to 1

			
 
				+	for (size_t i = 1; i < size; i++)

			
 
				+	{

			
 
				+		if (((ui8)character[i] & 0x80) == 0)

			
 
				+			return false;

			
 
				+	}

			
 
				+	return true;

			
 
				+}

			
 
				+

			
 
				+bool Unicode::isValidASCII(const std::string & text)

			
 
				+{

			
 
				+	for (const char & ch : text)

			
 
				+		if (ui8(ch) >= 0x80 )

			
 
				+			return false;

			
 
				+	return true;

			
 
				+}

			
 
				+

			
 
				+bool Unicode::isValidASCII(const char * data, size_t size)

			
 
				+{

			
 
				+	for (size_t i=0; i<size; i++)

			
 
				+		if (ui8(data[i]) >= 0x80 )

			
 
				+			return false;

			
 
				+	return true;

			
 
				+}

			
 
				+

			
 
				+bool Unicode::isValidString(const std::string & text)

			
 
				+{

			
 
				+	for (size_t i=0; i<text.size(); i += getCharacterSize(text[i]))

			
 
				+	{

			
 
				+		if (!isValidCharacter(text.data() + i, text.size() - i))

			
 
				+			return false;

			
 
				+	}

			
 
				+	return true;

			
 
				+}

			
 
				+

			
 
				+bool Unicode::isValidString(const char * data, size_t size)

			
 
				+{

			
 
				+	for (size_t i=0; i<size; i += getCharacterSize(data[i]))

			
 
				+	{

			
 
				+		if (!isValidCharacter(data + i, size - i))

			
 
				+			return false;

			
 
				+	}

			
 
				+	return true;

			
 
				+}

			
 
				+

			
 
				+std::string Unicode::toUnicode(const std::string &text)

			
 
				+{

			
 
				+	return toUnicode(text, CGeneralTextHandler::getInstalledEncoding());

			
 
				+}

			
 
				+

			
 
				+std::string Unicode::toUnicode(const std::string &text, const std::string &encoding)

			
 
				+{

			
 
				+	return boost::locale::conv::to_utf<char>(text, encoding);

			
 
				+}

			
 
				+

			
 
				+std::string Unicode::fromUnicode(const std::string & text)

			
 
				+{

			
 
				+	return fromUnicode(text, CGeneralTextHandler::getInstalledEncoding());

			
 
				+}

			
 
				+

			
 
				+std::string Unicode::fromUnicode(const std::string &text, const std::string &encoding)

			
 
				+{

			
 
				+	return boost::locale::conv::from_utf<char>(text, encoding);

			
 
				+}

			
 
				+

			
 
				+void Unicode::trimRight(std::string & text, const size_t amount)

			
 
				+{

			
 
				+	if(text.empty())

			
 
				+		return;

			
 
				+	//todo: more efficient algorithm

			
 
				+	for(int i = 0; i< amount; i++){

			
 
				+		auto b = text.begin();

			
 
				+		auto e = text.end();

			
 
				+		size_t lastLen = 0;

			
 
				+		size_t len = 0;

			
 
				+		while (b != e) {

			
 
				+			lastLen = len;

			
 
				+			size_t n = getCharacterSize(*b);

			
 
				+

			
 
				+			if(!isValidCharacter(&(*b),e-b))

			
 
				+			{

			
 
				+				logGlobal->error("Invalid UTF8 sequence");

			
 
				+				break;//invalid sequence will be trimmed

			
 
				+			}

			
 
				+

			
 
				+			len += n;

			
 
				+			b += n;

			
 
				+		}

			
 
				+

			
 
				+		text.resize(lastLen);

			
 
				+	}

			
 
				+}

			
 
				+

			
 
				+VCMI_LIB_NAMESPACE_END

			
--- a/lib/TextOperations.h
+++ b/lib/TextOperations.h
@@ -0,0 +1,45 @@
 
				+/*

			
 
				+ * TextOperations.h, part of VCMI engine

			
 
				+ *

			
 
				+ * Authors: listed in file AUTHORS in main folder

			
 
				+ *

			
 
				+ * License: GNU General Public License v2.0 or later

			
 
				+ * Full text of license available in license.txt file, in main folder

			
 
				+ *

			
 
				+ */

			
 
				+#pragma once

			
 
				+

			
 
				+VCMI_LIB_NAMESPACE_BEGIN

			
 
				+

			
 
				+/// Namespace that provides utilites for unicode support (UTF-8)

			
 
				+namespace Unicode

			
 
				+{

			
 
				+	/// evaluates size of UTF-8 character

			
 
				+	size_t DLL_LINKAGE getCharacterSize(char firstByte);

			
 
				+

			
 
				+	/// test if character is a valid UTF-8 symbol

			
 
				+	/// maxSize - maximum number of bytes this symbol may consist from ( = remainer of string)

			
 
				+	bool DLL_LINKAGE isValidCharacter(const char * character, size_t maxSize);

			
 
				+

			
 
				+	/// test if text contains ASCII-string (no need for unicode conversion)

			
 
				+	bool DLL_LINKAGE isValidASCII(const std::string & text);

			
 
				+	bool DLL_LINKAGE isValidASCII(const char * data, size_t size);

			
 
				+

			
 
				+	/// test if text contains valid UTF-8 sequence

			
 
				+	bool DLL_LINKAGE isValidString(const std::string & text);

			
 
				+	bool DLL_LINKAGE isValidString(const char * data, size_t size);

			
 
				+

			
 
				+	/// converts text to unicode from specified encoding or from one specified in settings

			
 
				+	std::string DLL_LINKAGE toUnicode(const std::string & text);

			
 
				+	std::string DLL_LINKAGE toUnicode(const std::string & text, const std::string & encoding);

			
 
				+

			
 
				+	/// converts text from unicode to specified encoding or to one specified in settings

			
 
				+	/// NOTE: usage of these functions should be avoided if possible

			
 
				+	std::string DLL_LINKAGE fromUnicode(const std::string & text);

			
 
				+	std::string DLL_LINKAGE fromUnicode(const std::string & text, const std::string & encoding);

			
 
				+

			
 
				+	///delete (amount) UTF characters from right

			
 
				+	DLL_LINKAGE void trimRight(std::string & text, size_t amount = 1);

			
 
				+};

			
 
				+

			
 
				+VCMI_LIB_NAMESPACE_END

			
--- a/lib/filesystem/CBinaryReader.cpp
+++ b/lib/filesystem/CBinaryReader.cpp
@@ -11,7 +11,7 @@
 
				 #include "CBinaryReader.h"
			
 
				 
			
 
				 #include "CInputStream.h"
			
 
				-#include "../CGeneralTextHandler.h"
			
 
				+#include "../TextOperations.h"
			
 
				 
			
 
				 VCMI_LIB_NAMESPACE_BEGIN