|
@@ -0,0 +1,215 @@
|
|
|
|
|
+/* Distributed under the OSI-approved BSD 3-Clause License. See accompanying
|
|
|
|
|
+ file Copyright.txt or https://cmake.org/licensing for details. */
|
|
|
|
|
+#include "cm_codecvt.hxx"
|
|
|
|
|
+#include <limits>
|
|
|
|
|
+
|
|
|
|
|
+#if defined(_WIN32)
|
|
|
|
|
+#include <windows.h>
|
|
|
|
|
+#undef max
|
|
|
|
|
+#include <cmsys/Encoding.hxx>
|
|
|
|
|
+#endif
|
|
|
|
|
+
|
|
|
|
|
+codecvt::codecvt(Encoding e)
|
|
|
|
|
+ : m_lastState(0)
|
|
|
|
|
+#if defined(_WIN32)
|
|
|
|
|
+ , m_codepage(0)
|
|
|
|
|
+#endif
|
|
|
|
|
+{
|
|
|
|
|
+ switch (e) {
|
|
|
|
|
+ case codecvt::ANSI:
|
|
|
|
|
+#if defined(_WIN32)
|
|
|
|
|
+ m_noconv = false;
|
|
|
|
|
+ m_codepage = CP_ACP;
|
|
|
|
|
+ break;
|
|
|
|
|
+#endif
|
|
|
|
|
+ // We don't know which ANSI encoding to use for other platforms than
|
|
|
|
|
+ // Windows so we don't do any conversion there
|
|
|
|
|
+ case codecvt::UTF8:
|
|
|
|
|
+ // Assume internal encoding is UTF-8
|
|
|
|
|
+ case codecvt::None:
|
|
|
|
|
+ // No encoding
|
|
|
|
|
+ default:
|
|
|
|
|
+ m_noconv = true;
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+codecvt::~codecvt(){};
|
|
|
|
|
+
|
|
|
|
|
+bool codecvt::do_always_noconv() const throw()
|
|
|
|
|
+{
|
|
|
|
|
+ return m_noconv;
|
|
|
|
|
+};
|
|
|
|
|
+
|
|
|
|
|
+std::codecvt_base::result codecvt::do_out(mbstate_t& state, const char* from,
|
|
|
|
|
+ const char* from_end,
|
|
|
|
|
+ const char*& from_next, char* to,
|
|
|
|
|
+ char* to_end, char*& to_next) const
|
|
|
|
|
+{
|
|
|
|
|
+ if (m_noconv) {
|
|
|
|
|
+ return noconv;
|
|
|
|
|
+ }
|
|
|
|
|
+ std::codecvt_base::result res = error;
|
|
|
|
|
+#if defined(_WIN32)
|
|
|
|
|
+ from_next = from;
|
|
|
|
|
+ to_next = to;
|
|
|
|
|
+ bool convert = true;
|
|
|
|
|
+ size_t count = from_end - from;
|
|
|
|
|
+ const char* data = from;
|
|
|
|
|
+ unsigned int& stateId = reinterpret_cast<unsigned int&>(state);
|
|
|
|
|
+ if (count == 0) {
|
|
|
|
|
+ return codecvt::ok;
|
|
|
|
|
+ } else if (count == 1) {
|
|
|
|
|
+ if (stateId == 0) {
|
|
|
|
|
+ // decode first byte for UTF-8
|
|
|
|
|
+ if ((*from & 0xF8) == 0xF0 || // 1111 0xxx; 4 bytes for codepoint
|
|
|
|
|
+ (*from & 0xF0) == 0xE0 || // 1110 xxxx; 3 bytes for codepoint
|
|
|
|
|
+ (*from & 0xE0) == 0xC0) // 110x xxxx; 2 bytes for codepoint
|
|
|
|
|
+ {
|
|
|
|
|
+ stateId = findStateId();
|
|
|
|
|
+ codecvt::State& s = m_states.at(stateId - 1);
|
|
|
|
|
+ s.bytes[0] = *from;
|
|
|
|
|
+ convert = false;
|
|
|
|
|
+ if ((*from & 0xF8) == 0xF0) {
|
|
|
|
|
+ s.totalBytes = 4;
|
|
|
|
|
+ } else if ((*from & 0xF0) == 0xE0) {
|
|
|
|
|
+ s.totalBytes = 3;
|
|
|
|
|
+ } else if ((*from & 0xE0) == 0xC0) {
|
|
|
|
|
+ s.totalBytes = 2;
|
|
|
|
|
+ }
|
|
|
|
|
+ s.bytesLeft = s.totalBytes - 1;
|
|
|
|
|
+ };
|
|
|
|
|
+ // else 1 byte for codepoint
|
|
|
|
|
+ } else {
|
|
|
|
|
+ codecvt::State& s = m_states.at(stateId - 1);
|
|
|
|
|
+ s.bytes[s.totalBytes - s.bytesLeft] = *from;
|
|
|
|
|
+ s.bytesLeft--;
|
|
|
|
|
+ data = s.bytes;
|
|
|
|
|
+ count = s.totalBytes - s.bytesLeft;
|
|
|
|
|
+ if ((*from & 0xC0) == 0x80) { // 10xx xxxx
|
|
|
|
|
+ convert = s.bytesLeft == 0;
|
|
|
|
|
+ } else {
|
|
|
|
|
+ // invalid multi-byte
|
|
|
|
|
+ convert = true;
|
|
|
|
|
+ }
|
|
|
|
|
+ if (convert) {
|
|
|
|
|
+ s.used = false;
|
|
|
|
|
+ if (stateId == m_lastState) {
|
|
|
|
|
+ m_lastState--;
|
|
|
|
|
+ }
|
|
|
|
|
+ stateId = 0;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ if (convert) {
|
|
|
|
|
+ std::wstring wide = cmsys::Encoding::ToWide(std::string(data, count));
|
|
|
|
|
+ int r = WideCharToMultiByte(m_codepage, 0, wide.c_str(),
|
|
|
|
|
+ static_cast<int>(wide.size()), to,
|
|
|
|
|
+ to_end - to, NULL, NULL);
|
|
|
|
|
+ if (r > 0) {
|
|
|
|
|
+ from_next = from_end;
|
|
|
|
|
+ to_next = to + r;
|
|
|
|
|
+ res = ok;
|
|
|
|
|
+ }
|
|
|
|
|
+ } else {
|
|
|
|
|
+ res = partial;
|
|
|
|
|
+ from_next = from_end;
|
|
|
|
|
+ to_next = to;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+#else
|
|
|
|
|
+ static_cast<void>(state);
|
|
|
|
|
+ static_cast<void>(from);
|
|
|
|
|
+ static_cast<void>(from_end);
|
|
|
|
|
+ static_cast<void>(from_next);
|
|
|
|
|
+ static_cast<void>(to);
|
|
|
|
|
+ static_cast<void>(to_end);
|
|
|
|
|
+ static_cast<void>(to_next);
|
|
|
|
|
+ res = codecvt::noconv;
|
|
|
|
|
+#endif
|
|
|
|
|
+ return res;
|
|
|
|
|
+};
|
|
|
|
|
+
|
|
|
|
|
+std::codecvt_base::result codecvt::do_unshift(mbstate_t& state, char* to,
|
|
|
|
|
+ char* to_end,
|
|
|
|
|
+ char*& to_next) const
|
|
|
|
|
+{
|
|
|
|
|
+ std::codecvt_base::result res = error;
|
|
|
|
|
+ to_next = to;
|
|
|
|
|
+#if defined(_WIN32)
|
|
|
|
|
+ unsigned int& stateId = reinterpret_cast<unsigned int&>(state);
|
|
|
|
|
+ if (stateId > 0) {
|
|
|
|
|
+ codecvt::State& s = m_states.at(stateId - 1);
|
|
|
|
|
+ s.used = false;
|
|
|
|
|
+ if (stateId == m_lastState) {
|
|
|
|
|
+ m_lastState--;
|
|
|
|
|
+ }
|
|
|
|
|
+ stateId = 0;
|
|
|
|
|
+ std::wstring wide = cmsys::Encoding::ToWide(
|
|
|
|
|
+ std::string(s.bytes, s.totalBytes - s.bytesLeft));
|
|
|
|
|
+ int r = WideCharToMultiByte(m_codepage, 0, wide.c_str(),
|
|
|
|
|
+ static_cast<int>(wide.size()), to, to_end - to,
|
|
|
|
|
+ NULL, NULL);
|
|
|
|
|
+ if (r > 0) {
|
|
|
|
|
+ to_next = to + r;
|
|
|
|
|
+ res = ok;
|
|
|
|
|
+ }
|
|
|
|
|
+ } else {
|
|
|
|
|
+ res = ok;
|
|
|
|
|
+ }
|
|
|
|
|
+#else
|
|
|
|
|
+ static_cast<void>(state);
|
|
|
|
|
+ static_cast<void>(to_end);
|
|
|
|
|
+ res = ok;
|
|
|
|
|
+#endif
|
|
|
|
|
+ return res;
|
|
|
|
|
+};
|
|
|
|
|
+
|
|
|
|
|
+int codecvt::do_max_length() const throw()
|
|
|
|
|
+{
|
|
|
|
|
+ return 4;
|
|
|
|
|
+};
|
|
|
|
|
+
|
|
|
|
|
+int codecvt::do_encoding() const throw()
|
|
|
|
|
+{
|
|
|
|
|
+ return 0;
|
|
|
|
|
+};
|
|
|
|
|
+
|
|
|
|
|
+unsigned int codecvt::findStateId() const
|
|
|
|
|
+{
|
|
|
|
|
+ unsigned int stateId = 0;
|
|
|
|
|
+ bool add = false;
|
|
|
|
|
+ const unsigned int maxSize = std::numeric_limits<unsigned int>::max();
|
|
|
|
|
+ if (m_lastState >= maxSize) {
|
|
|
|
|
+ m_lastState = 0;
|
|
|
|
|
+ }
|
|
|
|
|
+ if (m_states.size() <= m_lastState) {
|
|
|
|
|
+ add = true;
|
|
|
|
|
+ } else {
|
|
|
|
|
+ unsigned int i = m_lastState;
|
|
|
|
|
+ while (i < maxSize) {
|
|
|
|
|
+ codecvt::State& s = m_states.at(i);
|
|
|
|
|
+ i++;
|
|
|
|
|
+ if (!s.used) {
|
|
|
|
|
+ m_lastState = i;
|
|
|
|
|
+ stateId = m_lastState;
|
|
|
|
|
+ s.used = true;
|
|
|
|
|
+ s.totalBytes = 0;
|
|
|
|
|
+ s.bytesLeft = 0;
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+ if (i >= m_states.size()) {
|
|
|
|
|
+ i = 0;
|
|
|
|
|
+ }
|
|
|
|
|
+ if (i == m_lastState) {
|
|
|
|
|
+ add = true;
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ };
|
|
|
|
|
+ if (add) {
|
|
|
|
|
+ codecvt::State s = { true, 0, 0, { 0, 0, 0, 0 } };
|
|
|
|
|
+ m_states.push_back(s);
|
|
|
|
|
+ m_lastState = (unsigned int)m_states.size();
|
|
|
|
|
+ stateId = m_lastState;
|
|
|
|
|
+ }
|
|
|
|
|
+ return stateId;
|
|
|
|
|
+};
|