16 лет назад · e4beefeb6d
--- a/Source/CMakeLists.txt
+++ b/Source/CMakeLists.txt
@@ -246,6 +246,9 @@ SET(SRCS
 
				   cmake.h
			
 
				   cmakewizard.cxx
			
 
				   cmakewizard.h
			
 
				+
			
 
				+  cm_utf8.h
			
 
				+  cm_utf8.c
			
 
				   )
			
 
				 
			
 
				 # Kdevelop only works on UNIX and not windows
			
--- a/Source/cmXMLSafe.cxx
+++ b/Source/cmXMLSafe.cxx
@@ -11,6 +11,8 @@
 
				 ============================================================================*/
			
 
				 #include "cmXMLSafe.h"
			
 
				 
			
 
				+#include "cm_utf8.h"
			
 
				+
			
 
				 #include <cmsys/ios/iostream>
			
 
				 #include <cmsys/ios/sstream>
			
 
				 
			
@@ -53,44 +55,47 @@ cmsys_ios::ostream& operator<<(cmsys_ios::ostream& os, cmXMLSafe const& self)
 
				 {
			
 
				   char const* first = self.Data;
			
 
				   char const* last = self.Data + self.Size;
			
 
				-  for(char const* ci = first; ci != last; ++ci)
			
 
				+  while(first != last)
			
 
				     {
			
 
				-    unsigned char c = static_cast<unsigned char>(*ci);
			
 
				-    switch(c)
			
 
				+    unsigned int ch;
			
 
				+    if(const char* next = cm_utf8_decode_character(first, last, &ch))
			
 
				       {
			
 
				-      case '&': os << "&amp;"; break;
			
 
				-      case '<': os << "&lt;"; break;
			
 
				-      case '>': os << "&gt;"; break;
			
 
				-      case '"': os << (self.DoQuotes? "&quot;" : "\""); break;
			
 
				-      case '\'': os << (self.DoQuotes? "&apos;" : "'"); break;
			
 
				-      case '\t': os << "\t"; break;
			
 
				-      case '\n': os << "\n"; break;
			
 
				-      case '\r': break; // Ignore CR
			
 
				-      default:
			
 
				-        if(c >= 0x20 && c <= 0x7f)
			
 
				-          {
			
 
				-          os.put(static_cast<char>(c));
			
 
				-          }
			
 
				-        else
			
 
				+      // http://www.w3.org/TR/REC-xml/#NT-Char
			
 
				+      if((ch >= 0x20 && ch <= 0xD7FF) ||
			
 
				+         (ch >= 0xE000 && ch <= 0xFFFD) ||
			
 
				+         (ch >= 0x10000 && ch <= 0x10FFFF) ||
			
 
				+          ch == 0x9 || ch == 0xA || ch == 0xD)
			
 
				+        {
			
 
				+        switch(ch)
			
 
				           {
			
 
				-          // TODO: More complete treatment of program output character
			
 
				-          // encoding.  Instead of escaping these bytes, we should
			
 
				-          // handle the current locale and its encoding.
			
 
				-          char buf[16];
			
 
				-          // http://www.w3.org/TR/REC-xml/#NT-Char
			
 
				-          if(c >= 0x80)
			
 
				-            {
			
 
				-            sprintf(buf, "&#x%hx;", static_cast<unsigned short>(c));
			
 
				-            }
			
 
				-          else
			
 
				-            {
			
 
				-            // We cannot use "&#x%hx;" here because this value is not
			
 
				-            // valid in XML.  Instead use a human-readable hex value.
			
 
				-            sprintf(buf, "&lt;0x%hx&gt;", static_cast<unsigned short>(c));
			
 
				-            }
			
 
				-          os << buf;
			
 
				+          // Escape XML control characters.
			
 
				+          case '&': os << "&amp;"; break;
			
 
				+          case '<': os << "&lt;"; break;
			
 
				+          case '>': os << "&gt;"; break;
			
 
				+          case '"': os << (self.DoQuotes? "&quot;" : "\""); break;
			
 
				+          case '\'': os << (self.DoQuotes? "&apos;" : "'"); break;
			
 
				+          case '\r': break; // Ignore CR
			
 
				+          // Print the UTF-8 character.
			
 
				+          default: os.write(first, next-first); break;
			
 
				           }
			
 
				-        break;
			
 
				+        }
			
 
				+      else
			
 
				+        {
			
 
				+        // Use a human-readable hex value for this invalid character.
			
 
				+        char buf[16];
			
 
				+        sprintf(buf, "%X", ch);
			
 
				+        os << "[NON-XML-CHAR-0x" << buf << "]";
			
 
				+        }
			
 
				+
			
 
				+      first = next;
			
 
				+      }
			
 
				+    else
			
 
				+      {
			
 
				+      ch = static_cast<unsigned char>(*first++);
			
 
				+      // Use a human-readable hex value for this invalid byte.
			
 
				+      char buf[16];
			
 
				+      sprintf(buf, "%X", ch);
			
 
				+      os << "[NON-UTF-8-BYTE-0x" << buf << "]";
			
 
				       }
			
 
				     }
			
 
				   return os;
			
--- a/Source/cm_utf8.c
+++ b/Source/cm_utf8.c
@@ -0,0 +1,84 @@
 
				+/*============================================================================
			
 
				+  CMake - Cross Platform Makefile Generator
			
 
				+  Copyright 2000-2009 Kitware, Inc., Insight Software Consortium
			
 
				+
			
 
				+  Distributed under the OSI-approved BSD License (the "License");
			
 
				+  see accompanying file Copyright.txt for details.
			
 
				+
			
 
				+  This software is distributed WITHOUT ANY WARRANTY; without even the
			
 
				+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+  See the License for more information.
			
 
				+============================================================================*/
			
 
				+#include "cm_utf8.h"
			
 
				+
			
 
				+/*
			
 
				+  RFC 3629
			
 
				+  07-bit: 0xxxxxxx
			
 
				+  11-bit: 110xxxxx 10xxxxxx
			
 
				+  16-bit: 1110xxxx 10xxxxxx 10xxxxxx
			
 
				+  21-bit: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
			
 
				+
			
 
				+  Pre-RFC Compatibility
			
 
				+  26-bit: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
			
 
				+  31-bit: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
			
 
				+*/
			
 
				+
			
 
				+/* Number of leading ones before a zero in the byte.  */
			
 
				+static unsigned char const cm_utf8_ones[256] = {
			
 
				+  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
			
 
				+  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
			
 
				+  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
			
 
				+  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
			
 
				+  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
			
 
				+  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
			
 
				+  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
			
 
				+  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,7,8
			
 
				+};
			
 
				+
			
 
				+/* Mask away control bits from bytes with n leading ones.  */
			
 
				+static unsigned char const cm_utf8_mask[7] = {
			
 
				+  0xEF, 0x3F, 0x1F, 0x0F, 0x07, 0x03, 0x01
			
 
				+};
			
 
				+
			
 
				+/* Minimum allowed value when first byte has n leading ones.  */
			
 
				+static unsigned int const cm_utf8_min[7] = {
			
 
				+  0, 0, 1u<<7, 1u<<11, 1u<<16, 1u<<21, 1u<<26 /*, 1u<<31 */
			
 
				+};
			
 
				+
			
 
				+/*--------------------------------------------------------------------------*/
			
 
				+const char* cm_utf8_decode_character(const char* first, const char* last,
			
 
				+                                     unsigned int* pc)
			
 
				+{
			
 
				+  /* Count leading ones in the first byte.  */
			
 
				+  unsigned char c = *first++;
			
 
				+  unsigned char const ones = cm_utf8_ones[c];
			
 
				+  switch(ones)
			
 
				+    {
			
 
				+    case 0: *pc = c; return first;    /* One-byte character.  */
			
 
				+    case 1: case 7: case 8: return 0; /* Invalid leading byte.  */
			
 
				+    default: break;
			
 
				+    }
			
 
				+
			
 
				+  /* Extract bits from this multi-byte character.  */
			
 
				+  {
			
 
				+  unsigned int uc = c & cm_utf8_mask[ones];
			
 
				+  unsigned char left;
			
 
				+  for(left = ones-1; left && first != last; --left)
			
 
				+    {
			
 
				+    c = *first++;
			
 
				+    if(cm_utf8_ones[c] != 1)
			
 
				+      {
			
 
				+      return 0;
			
 
				+      }
			
 
				+    uc = (uc << 6) | (c & cm_utf8_mask[1]);
			
 
				+    }
			
 
				+
			
 
				+  if(left > 0 || uc < cm_utf8_min[ones])
			
 
				+    {
			
 
				+    return 0;
			
 
				+    }
			
 
				+
			
 
				+  *pc = uc;
			
 
				+  return first;
			
 
				+  }
			
 
				+}
			
--- a/Source/cm_utf8.h
+++ b/Source/cm_utf8.h
@@ -0,0 +1,29 @@
 
				+/*============================================================================
			
 
				+  CMake - Cross Platform Makefile Generator
			
 
				+  Copyright 2000-2009 Kitware, Inc., Insight Software Consortium
			
 
				+
			
 
				+  Distributed under the OSI-approved BSD License (the "License");
			
 
				+  see accompanying file Copyright.txt for details.
			
 
				+
			
 
				+  This software is distributed WITHOUT ANY WARRANTY; without even the
			
 
				+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+  See the License for more information.
			
 
				+============================================================================*/
			
 
				+#ifndef cm_utf8_h
			
 
				+#define cm_utf8_h
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+/** Decode one UTF-8 character from the input byte range.  On success,
			
 
				+    stores the unicode character number in *pc and returns the first
			
 
				+    position not extracted.  On failure, returns 0.  */
			
 
				+const char* cm_utf8_decode_character(const char* first, const char* last,
			
 
				+                                     unsigned int* pc);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+} /* extern "C" */
			
 
				+#endif
			
 
				+
			
 
				+#endif