浏览代码

Merge topic 'nmake-encoding'

c666f8cb NMake: Use ANSI encoding for NMake Makefiles
f00214aa cmGeneratedFileStream: Add optional encoding support
bb1d3370 codecvt: Add class for encoding conversion
Brad King 9 年之前
父节点
当前提交
b4fab15fc7

+ 2 - 0
Source/CMakeLists.txt

@@ -384,6 +384,8 @@ set(SRCS
   cm_sha2.c
   cm_utf8.h
   cm_utf8.c
+  cm_codecvt.hxx
+  cm_codecvt.cxx
   )
 
 set(COMMAND_INCLUDES "#include \"cmTargetPropCommandBase.cxx\"\n")

+ 17 - 2
Source/cmGeneratedFileStream.cxx

@@ -10,13 +10,21 @@
 #include <cm_zlib.h>
 #endif
 
-cmGeneratedFileStream::cmGeneratedFileStream()
+cmGeneratedFileStream::cmGeneratedFileStream(Encoding encoding)
   : cmGeneratedFileStreamBase()
   , Stream()
 {
+#ifdef CMAKE_BUILD_WITH_CMAKE
+  if (encoding != codecvt::None) {
+    imbue(std::locale(getloc(), new codecvt(encoding)));
+  }
+#else
+  static_cast<void>(encoding);
+#endif
 }
 
-cmGeneratedFileStream::cmGeneratedFileStream(const char* name, bool quiet)
+cmGeneratedFileStream::cmGeneratedFileStream(const char* name, bool quiet,
+                                             Encoding encoding)
   : cmGeneratedFileStreamBase(name)
   , Stream(TempName.c_str())
 {
@@ -26,6 +34,13 @@ cmGeneratedFileStream::cmGeneratedFileStream(const char* name, bool quiet)
                          this->TempName.c_str());
     cmSystemTools::ReportLastSystemError("");
   }
+#ifdef CMAKE_BUILD_WITH_CMAKE
+  if (encoding != codecvt::None) {
+    imbue(std::locale(getloc(), new codecvt(encoding)));
+  }
+#else
+  static_cast<void>(encoding);
+#endif
 }
 
 cmGeneratedFileStream::~cmGeneratedFileStream()

+ 5 - 2
Source/cmGeneratedFileStream.h

@@ -5,6 +5,7 @@
 
 #include <cmConfigure.h>
 
+#include <cm_codecvt.hxx>
 #include <cmsys/FStream.hxx>
 #include <string>
 
@@ -71,12 +72,13 @@ class cmGeneratedFileStream : private cmGeneratedFileStreamBase,
 {
 public:
   typedef cmsys::ofstream Stream;
+  typedef codecvt::Encoding Encoding;
 
   /**
    * This constructor prepares a default stream.  The open method must
    * be used before writing to the stream.
    */
-  cmGeneratedFileStream();
+  cmGeneratedFileStream(Encoding encoding = codecvt::None);
 
   /**
    * This constructor takes the name of the file to be generated.  It
@@ -84,7 +86,8 @@ public:
    * file cannot be opened an error message is produced unless the
    * second argument is set to true.
    */
-  cmGeneratedFileStream(const char* name, bool quiet = false);
+  cmGeneratedFileStream(const char* name, bool quiet = false,
+                        Encoding encoding = codecvt::None);
 
   /**
    * The destructor checks the stream status to be sure the temporary

+ 7 - 0
Source/cmGlobalGenerator.h

@@ -10,6 +10,7 @@
 #include "cmSystemTools.h"
 #include "cmTarget.h"
 #include "cmTargetDepend.h"
+#include "cm_codecvt.hxx"
 
 #include <iosfwd>
 #include <map>
@@ -60,6 +61,12 @@ public:
     return this->GetName() == name;
   }
 
+  /** Get encoding used by generator for makefile files */
+  virtual codecvt::Encoding GetMakefileEncoding() const
+  {
+    return codecvt::None;
+  }
+
   /** Tell the generator about the target system.  */
   virtual bool SetSystemName(std::string const&, cmMakefile*) { return true; }
 

+ 6 - 0
Source/cmGlobalNMakeMakefileGenerator.h

@@ -26,6 +26,12 @@ public:
   }
   static std::string GetActualName() { return "NMake Makefiles"; }
 
+  /** Get encoding used by generator for makefile files */
+  codecvt::Encoding GetMakefileEncoding() const CM_OVERRIDE
+  {
+    return codecvt::ANSI;
+  }
+
   /** Get the documentation entry for this generator.  */
   static void GetDocumentation(cmDocumentationEntry& entry);
 

+ 2 - 1
Source/cmGlobalUnixMakefileGenerator3.cxx

@@ -201,7 +201,8 @@ void cmGlobalUnixMakefileGenerator3::WriteMainMakefile2()
     this->GetCMakeInstance()->GetHomeOutputDirectory();
   makefileName += cmake::GetCMakeFilesDirectory();
   makefileName += "/Makefile2";
-  cmGeneratedFileStream makefileStream(makefileName.c_str());
+  cmGeneratedFileStream makefileStream(makefileName.c_str(), false,
+                                       this->GetMakefileEncoding());
   if (!makefileStream) {
     return;
   }

+ 8 - 3
Source/cmLocalUnixMakefileGenerator3.cxx

@@ -230,7 +230,9 @@ void cmLocalUnixMakefileGenerator3::WriteLocalMakefile()
   // Open the rule file.  This should be copy-if-different because the
   // rules may depend on this file itself.
   std::string ruleFileNameFull = this->ConvertToFullPath(ruleFileName);
-  cmGeneratedFileStream ruleFileStream(ruleFileNameFull.c_str());
+  cmGeneratedFileStream ruleFileStream(
+    ruleFileNameFull.c_str(), false,
+    this->GlobalGenerator->GetMakefileEncoding());
   if (!ruleFileStream) {
     return;
   }
@@ -1417,7 +1419,9 @@ bool cmLocalUnixMakefileGenerator3::ScanDependencies(
   // because the make tool may try to reload it needlessly otherwise.
   std::string ruleFileNameFull = dir;
   ruleFileNameFull += "/depend.make";
-  cmGeneratedFileStream ruleFileStream(ruleFileNameFull.c_str());
+  cmGeneratedFileStream ruleFileStream(
+    ruleFileNameFull.c_str(), false,
+    this->GlobalGenerator->GetMakefileEncoding());
   ruleFileStream.SetCopyIfDifferent(true);
   if (!ruleFileStream) {
     return false;
@@ -1429,7 +1433,8 @@ bool cmLocalUnixMakefileGenerator3::ScanDependencies(
   std::string internalRuleFileNameFull = dir;
   internalRuleFileNameFull += "/depend.internal";
   cmGeneratedFileStream internalRuleFileStream(
-    internalRuleFileNameFull.c_str());
+    internalRuleFileNameFull.c_str(), false,
+    this->GlobalGenerator->GetMakefileEncoding());
   if (!internalRuleFileStream) {
     return false;
   }

+ 7 - 3
Source/cmMakefileTargetGenerator.cxx

@@ -110,7 +110,8 @@ void cmMakefileTargetGenerator::CreateRuleFile()
   // Open the rule file.  This should be copy-if-different because the
   // rules may depend on this file itself.
   this->BuildFileStream =
-    new cmGeneratedFileStream(this->BuildFileNameFull.c_str());
+    new cmGeneratedFileStream(this->BuildFileNameFull.c_str(), false,
+                              this->GlobalGenerator->GetMakefileEncoding());
   this->BuildFileStream->SetCopyIfDifferent(true);
   if (!this->BuildFileStream) {
     return;
@@ -231,7 +232,9 @@ void cmMakefileTargetGenerator::WriteCommonCodeRules()
   // make sure the depend file exists
   if (!cmSystemTools::FileExists(dependFileNameFull.c_str())) {
     // Write an empty dependency file.
-    cmGeneratedFileStream depFileStream(dependFileNameFull.c_str());
+    cmGeneratedFileStream depFileStream(
+      dependFileNameFull.c_str(), false,
+      this->GlobalGenerator->GetMakefileEncoding());
     depFileStream << "# Empty dependencies file for "
                   << this->GeneratorTarget->GetName() << ".\n"
                   << "# This may be replaced when dependencies are built."
@@ -243,7 +246,8 @@ void cmMakefileTargetGenerator::WriteCommonCodeRules()
   this->FlagFileNameFull = this->TargetBuildDirectoryFull;
   this->FlagFileNameFull += "/flags.make";
   this->FlagFileStream =
-    new cmGeneratedFileStream(this->FlagFileNameFull.c_str());
+    new cmGeneratedFileStream(this->FlagFileNameFull.c_str(), false,
+                              this->GlobalGenerator->GetMakefileEncoding());
   this->FlagFileStream->SetCopyIfDifferent(true);
   if (!this->FlagFileStream) {
     return;

+ 215 - 0
Source/cm_codecvt.cxx

@@ -0,0 +1,215 @@
+/* Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
+   file Copyright.txt or https://cmake.org/licensing for details.  */
+#include "cm_codecvt.hxx"
+#include <limits>
+
+#if defined(_WIN32)
+#include <windows.h>
+#undef max
+#include <cmsys/Encoding.hxx>
+#endif
+
+codecvt::codecvt(Encoding e)
+  : m_lastState(0)
+#if defined(_WIN32)
+  , m_codepage(0)
+#endif
+{
+  switch (e) {
+    case codecvt::ANSI:
+#if defined(_WIN32)
+      m_noconv = false;
+      m_codepage = CP_ACP;
+      break;
+#endif
+    // We don't know which ANSI encoding to use for other platforms than
+    // Windows so we don't do any conversion there
+    case codecvt::UTF8:
+    // Assume internal encoding is UTF-8
+    case codecvt::None:
+    // No encoding
+    default:
+      m_noconv = true;
+  }
+}
+
+codecvt::~codecvt(){};
+
+bool codecvt::do_always_noconv() const throw()
+{
+  return m_noconv;
+};
+
+std::codecvt_base::result codecvt::do_out(mbstate_t& state, const char* from,
+                                          const char* from_end,
+                                          const char*& from_next, char* to,
+                                          char* to_end, char*& to_next) const
+{
+  if (m_noconv) {
+    return noconv;
+  }
+  std::codecvt_base::result res = error;
+#if defined(_WIN32)
+  from_next = from;
+  to_next = to;
+  bool convert = true;
+  size_t count = from_end - from;
+  const char* data = from;
+  unsigned int& stateId = reinterpret_cast<unsigned int&>(state);
+  if (count == 0) {
+    return codecvt::ok;
+  } else if (count == 1) {
+    if (stateId == 0) {
+      // decode first byte for UTF-8
+      if ((*from & 0xF8) == 0xF0 || // 1111 0xxx; 4 bytes for codepoint
+          (*from & 0xF0) == 0xE0 || // 1110 xxxx; 3 bytes for codepoint
+          (*from & 0xE0) == 0xC0)   // 110x xxxx; 2 bytes for codepoint
+      {
+        stateId = findStateId();
+        codecvt::State& s = m_states.at(stateId - 1);
+        s.bytes[0] = *from;
+        convert = false;
+        if ((*from & 0xF8) == 0xF0) {
+          s.totalBytes = 4;
+        } else if ((*from & 0xF0) == 0xE0) {
+          s.totalBytes = 3;
+        } else if ((*from & 0xE0) == 0xC0) {
+          s.totalBytes = 2;
+        }
+        s.bytesLeft = s.totalBytes - 1;
+      };
+      // else 1 byte for codepoint
+    } else {
+      codecvt::State& s = m_states.at(stateId - 1);
+      s.bytes[s.totalBytes - s.bytesLeft] = *from;
+      s.bytesLeft--;
+      data = s.bytes;
+      count = s.totalBytes - s.bytesLeft;
+      if ((*from & 0xC0) == 0x80) { // 10xx xxxx
+        convert = s.bytesLeft == 0;
+      } else {
+        // invalid multi-byte
+        convert = true;
+      }
+      if (convert) {
+        s.used = false;
+        if (stateId == m_lastState) {
+          m_lastState--;
+        }
+        stateId = 0;
+      }
+    }
+    if (convert) {
+      std::wstring wide = cmsys::Encoding::ToWide(std::string(data, count));
+      int r = WideCharToMultiByte(m_codepage, 0, wide.c_str(),
+                                  static_cast<int>(wide.size()), to,
+                                  to_end - to, NULL, NULL);
+      if (r > 0) {
+        from_next = from_end;
+        to_next = to + r;
+        res = ok;
+      }
+    } else {
+      res = partial;
+      from_next = from_end;
+      to_next = to;
+    }
+  }
+#else
+  static_cast<void>(state);
+  static_cast<void>(from);
+  static_cast<void>(from_end);
+  static_cast<void>(from_next);
+  static_cast<void>(to);
+  static_cast<void>(to_end);
+  static_cast<void>(to_next);
+  res = codecvt::noconv;
+#endif
+  return res;
+};
+
+std::codecvt_base::result codecvt::do_unshift(mbstate_t& state, char* to,
+                                              char* to_end,
+                                              char*& to_next) const
+{
+  std::codecvt_base::result res = error;
+  to_next = to;
+#if defined(_WIN32)
+  unsigned int& stateId = reinterpret_cast<unsigned int&>(state);
+  if (stateId > 0) {
+    codecvt::State& s = m_states.at(stateId - 1);
+    s.used = false;
+    if (stateId == m_lastState) {
+      m_lastState--;
+    }
+    stateId = 0;
+    std::wstring wide = cmsys::Encoding::ToWide(
+      std::string(s.bytes, s.totalBytes - s.bytesLeft));
+    int r = WideCharToMultiByte(m_codepage, 0, wide.c_str(),
+                                static_cast<int>(wide.size()), to, to_end - to,
+                                NULL, NULL);
+    if (r > 0) {
+      to_next = to + r;
+      res = ok;
+    }
+  } else {
+    res = ok;
+  }
+#else
+  static_cast<void>(state);
+  static_cast<void>(to_end);
+  res = ok;
+#endif
+  return res;
+};
+
+int codecvt::do_max_length() const throw()
+{
+  return 4;
+};
+
+int codecvt::do_encoding() const throw()
+{
+  return 0;
+};
+
+unsigned int codecvt::findStateId() const
+{
+  unsigned int stateId = 0;
+  bool add = false;
+  const unsigned int maxSize = std::numeric_limits<unsigned int>::max();
+  if (m_lastState >= maxSize) {
+    m_lastState = 0;
+  }
+  if (m_states.size() <= m_lastState) {
+    add = true;
+  } else {
+    unsigned int i = m_lastState;
+    while (i < maxSize) {
+      codecvt::State& s = m_states.at(i);
+      i++;
+      if (!s.used) {
+        m_lastState = i;
+        stateId = m_lastState;
+        s.used = true;
+        s.totalBytes = 0;
+        s.bytesLeft = 0;
+        break;
+      }
+      if (i >= m_states.size()) {
+        i = 0;
+      }
+      if (i == m_lastState) {
+        add = true;
+        break;
+      }
+    }
+  };
+  if (add) {
+    codecvt::State s = { true, 0, 0, { 0, 0, 0, 0 } };
+    m_states.push_back(s);
+    m_lastState = (unsigned int)m_states.size();
+    stateId = m_lastState;
+  }
+  return stateId;
+};

+ 57 - 0
Source/cm_codecvt.hxx

@@ -0,0 +1,57 @@
+/* Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
+   file Copyright.txt or https://cmake.org/licensing for details.  */
+#ifndef cm_codecvt_hxx
+#define cm_codecvt_hxx
+
+#include <cmConfigure.h> // IWYU pragma: keep
+
+#include <locale>
+#include <vector>
+
+class codecvt : public std::codecvt<char, char, mbstate_t>
+{
+public:
+  enum Encoding
+  {
+    None,
+    UTF8,
+    ANSI
+  };
+
+#ifdef CMAKE_BUILD_WITH_CMAKE
+
+  codecvt(Encoding e);
+
+protected:
+  ~codecvt() CM_OVERRIDE;
+  bool do_always_noconv() const throw() CM_OVERRIDE;
+  result do_out(mbstate_t& state, const char* from, const char* from_end,
+                const char*& from_next, char* to, char* to_end,
+                char*& to_next) const CM_OVERRIDE;
+  result do_unshift(mbstate_t& state, char* to, char*,
+                    char*& to_next) const CM_OVERRIDE;
+  int do_max_length() const throw() CM_OVERRIDE;
+  int do_encoding() const throw() CM_OVERRIDE;
+
+private:
+  typedef struct
+  {
+    bool used;
+    unsigned char totalBytes;
+    unsigned char bytesLeft;
+    char bytes[4];
+  } State;
+
+  unsigned int findStateId() const;
+
+  bool m_noconv;
+  mutable std::vector<State> m_states;
+  mutable unsigned int m_lastState;
+#if defined(_WIN32)
+  unsigned int m_codepage;
+#endif
+
+#endif
+};
+
+#endif