| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240 | 
							- /*
 
-  * TextOperations.cpp, part of VCMI engine
 
-  *
 
-  * Authors: listed in file AUTHORS in main folder
 
-  *
 
-  * License: GNU General Public License v2.0 or later
 
-  * Full text of license available in license.txt file, in main folder
 
-  *
 
-  */
 
- #include "StdInc.h"
 
- #include "TextOperations.h"
 
- #include "CGeneralTextHandler.h"
 
- #include "Languages.h"
 
- #include "CConfigHandler.h"
 
- #include <vstd/DateUtils.h>
 
- #include <boost/locale.hpp>
 
- VCMI_LIB_NAMESPACE_BEGIN
 
- size_t TextOperations::getUnicodeCharacterSize(char firstByte)
 
- {
 
- 	// length of utf-8 character can be determined from 1st byte by counting number of highest bits set to 1:
 
- 	// 0xxxxxxx -> 1 -  ASCII chars
 
- 	// 110xxxxx -> 2
 
- 	// 1110xxxx -> 3
 
- 	// 11110xxx -> 4 - last allowed in current standard
 
- 	auto value = static_cast<uint8_t>(firstByte);
 
- 	if ((value & 0b10000000) == 0)
 
- 		return 1; // ASCII
 
- 	if ((value & 0b11100000) == 0b11000000)
 
- 		return 2;
 
- 	if ((value & 0b11110000) == 0b11100000)
 
- 		return 3;
 
- 	if ((value & 0b11111000) == 0b11110000)
 
- 		return 4;
 
- 	assert(0);// invalid unicode sequence
 
- 	return 4;
 
- }
 
- bool TextOperations::isValidUnicodeCharacter(const char * character, size_t maxSize)
 
- {
 
- 	assert(maxSize > 0);
 
- 	auto value = static_cast<uint8_t>(character[0]);
 
- 	// ASCII
 
- 	if ( value < 0b10000000)
 
- 		return maxSize > 0;
 
- 	// can't be first byte in UTF8
 
- 	if (value < 0b11000000)
 
- 		return false;
 
- 	// above maximum allowed in standard (UTF codepoints are capped at 0x0010FFFF)
 
- 	if (value > 0b11110000)
 
- 		return false;
 
- 	// first character must follow rules checked in getUnicodeCharacterSize
 
- 	size_t size = getUnicodeCharacterSize(character[0]);
 
- 	if (size > maxSize)
 
- 		return false;
 
- 	// remaining characters must have highest bit set to 1
 
- 	for (size_t i = 1; i < size; i++)
 
- 	{
 
- 		auto characterValue = static_cast<uint8_t>(character[i]);
 
- 		if (characterValue < 0b10000000)
 
- 			return false;
 
- 	}
 
- 	return true;
 
- }
 
- bool TextOperations::isValidASCII(const std::string & text)
 
- {
 
- 	for (const char & ch : text)
 
- 		if (static_cast<uint8_t>(ch) >= 0x80 )
 
- 			return false;
 
- 	return true;
 
- }
 
- bool TextOperations::isValidASCII(const char * data, size_t size)
 
- {
 
- 	for (size_t i=0; i<size; i++)
 
- 		if (static_cast<uint8_t>(data[i]) >= 0x80 )
 
- 			return false;
 
- 	return true;
 
- }
 
- bool TextOperations::isValidUnicodeString(const std::string & text)
 
- {
 
- 	for (size_t i=0; i<text.size(); i += getUnicodeCharacterSize(text[i]))
 
- 	{
 
- 		if (!isValidUnicodeCharacter(text.data() + i, text.size() - i))
 
- 			return false;
 
- 	}
 
- 	return true;
 
- }
 
- bool TextOperations::isValidUnicodeString(const char * data, size_t size)
 
- {
 
- 	for (size_t i=0; i<size; i += getUnicodeCharacterSize(data[i]))
 
- 	{
 
- 		if (!isValidUnicodeCharacter(data + i, size - i))
 
- 			return false;
 
- 	}
 
- 	return true;
 
- }
 
- uint32_t TextOperations::getUnicodeCodepoint(const char * data, size_t maxSize)
 
- {
 
- 	assert(isValidUnicodeCharacter(data, maxSize));
 
- 	if (!isValidUnicodeCharacter(data, maxSize))
 
- 		return 0;
 
- 	// https://en.wikipedia.org/wiki/UTF-8#Encoding
 
- 	switch (getUnicodeCharacterSize(data[0]))
 
- 	{
 
- 		case 1:
 
- 			return static_cast<uint8_t>(data[0]) & 0b1111111;
 
- 		case 2:
 
- 			return
 
- 				((static_cast<uint8_t>(data[0]) & 0b11111 ) << 6) +
 
- 				((static_cast<uint8_t>(data[1]) & 0b111111) << 0) ;
 
- 		case 3:
 
- 			return
 
- 				((static_cast<uint8_t>(data[0]) & 0b1111 )  << 12) +
 
- 				((static_cast<uint8_t>(data[1]) & 0b111111) << 6) +
 
- 				((static_cast<uint8_t>(data[2]) & 0b111111) << 0) ;
 
- 		case 4:
 
- 			return
 
- 				((static_cast<uint8_t>(data[0]) & 0b111 )   << 18) +
 
- 				((static_cast<uint8_t>(data[1]) & 0b111111) << 12) +
 
- 				((static_cast<uint8_t>(data[2]) & 0b111111) << 6) +
 
- 				((static_cast<uint8_t>(data[3]) & 0b111111) << 0) ;
 
- 	}
 
- 	assert(0);
 
- 	return 0;
 
- }
 
- uint32_t TextOperations::getUnicodeCodepoint(char data, const std::string & encoding )
 
- {
 
- 	std::string stringNative(1, data);
 
- 	std::string stringUnicode = toUnicode(stringNative, encoding);
 
- 	if (stringUnicode.empty())
 
- 		return 0;
 
- 	return getUnicodeCodepoint(stringUnicode.data(), stringUnicode.size());
 
- }
 
- std::string TextOperations::toUnicode(const std::string &text, const std::string &encoding)
 
- {
 
- 	return boost::locale::conv::to_utf<char>(text, encoding);
 
- }
 
- std::string TextOperations::fromUnicode(const std::string &text, const std::string &encoding)
 
- {
 
- 	return boost::locale::conv::from_utf<char>(text, encoding);
 
- }
 
- void TextOperations::trimRightUnicode(std::string & text, const size_t amount)
 
- {
 
- 	if(text.empty())
 
- 		return;
 
- 	//todo: more efficient algorithm
 
- 	for(int i = 0; i< amount; i++){
 
- 		auto b = text.begin();
 
- 		auto e = text.end();
 
- 		size_t lastLen = 0;
 
- 		size_t len = 0;
 
- 		while (b != e) {
 
- 			lastLen = len;
 
- 			size_t n = getUnicodeCharacterSize(*b);
 
- 			if(!isValidUnicodeCharacter(&(*b),e-b))
 
- 			{
 
- 				logGlobal->error("Invalid UTF8 sequence");
 
- 				break;//invalid sequence will be trimmed
 
- 			}
 
- 			len += n;
 
- 			b += n;
 
- 		}
 
- 		text.resize(lastLen);
 
- 	}
 
- }
 
- size_t TextOperations::getUnicodeCharactersCount(const std::string & text)
 
- {
 
- 	std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> conv;
 
- 	return conv.from_bytes(text).size(); 
 
- }
 
- std::string TextOperations::escapeString(std::string input)
 
- {
 
- 	boost::replace_all(input, "\\", "\\\\");
 
- 	boost::replace_all(input, "\n", "\\n");
 
- 	boost::replace_all(input, "\r", "\\r");
 
- 	boost::replace_all(input, "\t", "\\t");
 
- 	boost::replace_all(input, "\"", "\\\"");
 
- 	return input;
 
- }
 
- std::string TextOperations::getFormattedDateTimeLocal(std::time_t dt)
 
- {
 
- 	return vstd::getFormattedDateTime(dt, Languages::getLanguageOptions(settings["general"]["language"].String()).dateTimeFormat);
 
- }
 
- std::string TextOperations::getFormattedTimeLocal(std::time_t dt)
 
- {
 
- 	return vstd::getFormattedDateTime(dt, "%H:%M");
 
- }
 
- std::string TextOperations::getCurrentFormattedTimeLocal(std::chrono::seconds timeOffset)
 
- {
 
- 	auto timepoint = std::chrono::system_clock::now() + timeOffset;
 
- 	return TextOperations::getFormattedTimeLocal(std::chrono::system_clock::to_time_t(timepoint));
 
- }
 
- std::string TextOperations::getCurrentFormattedDateTimeLocal(std::chrono::seconds timeOffset)
 
- {
 
- 	auto timepoint = std::chrono::system_clock::now() + timeOffset;
 
- 	return TextOperations::getFormattedDateTimeLocal(std::chrono::system_clock::to_time_t(timepoint));
 
- }
 
- VCMI_LIB_NAMESPACE_END
 
 
  |