TextEncoding.h 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. //
  2. // TextEncoding.h
  3. //
  4. // $Id: //poco/Main/Foundation/include/Poco/TextEncoding.h#4 $
  5. //
  6. // Library: Foundation
  7. // Package: Text
  8. // Module: TextEncoding
  9. //
  10. // Definition of the abstract TextEncoding class.
  11. //
  12. // Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH.
  13. // and Contributors.
  14. //
  15. // Permission is hereby granted, free of charge, to any person or organization
  16. // obtaining a copy of the software and accompanying documentation covered by
  17. // this license (the "Software") to use, reproduce, display, distribute,
  18. // execute, and transmit the Software, and to prepare derivative works of the
  19. // Software, and to permit third-parties to whom the Software is furnished to
  20. // do so, all subject to the following:
  21. //
  22. // The copyright notices in the Software and this entire statement, including
  23. // the above license grant, this restriction and the following disclaimer,
  24. // must be included in all copies of the Software, in whole or in part, and
  25. // all derivative works of the Software, unless such copies or derivative
  26. // works are solely in the form of machine-executable object code generated by
  27. // a source language processor.
  28. //
  29. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  30. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  31. // FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
  32. // SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
  33. // FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
  34. // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  35. // DEALINGS IN THE SOFTWARE.
  36. //
  37. #ifndef Foundation_TextEncoding_INCLUDED
  38. #define Foundation_TextEncoding_INCLUDED
  39. #include "Poco/Foundation.h"
  40. #include "Poco/SharedPtr.h"
  41. namespace Poco {
  42. class TextEncodingManager;
  43. class Foundation_API TextEncoding
  44. /// An abstract base class for implementing text encodings
  45. /// like UTF-8 or ISO 8859-1.
  46. ///
  47. /// Subclasses must override the canonicalName(), isA(),
  48. /// characterMap() and convert() methods and need to be
  49. /// thread safe and stateless.
  50. ///
  51. /// TextEncoding also provides static member functions
  52. /// for managing mappings from encoding names to
  53. /// TextEncoding objects.
  54. {
  55. public:
  56. typedef SharedPtr<TextEncoding> Ptr;
  57. enum
  58. {
  59. MAX_SEQUENCE_LENGTH = 6 /// The maximum character byte sequence length supported.
  60. };
  61. typedef int CharacterMap[256];
  62. /// The map[b] member gives information about byte sequences
  63. /// whose first byte is b.
  64. /// If map[b] is c where c is >= 0, then b by itself encodes the Unicode scalar value c.
  65. /// If map[b] is -1, then the byte sequence is malformed.
  66. /// If map[b] is -n, where n >= 2, then b is the first byte of an n-byte
  67. /// sequence that encodes a single Unicode scalar value. Byte sequences up
  68. /// to 6 bytes in length are supported.
  69. virtual ~TextEncoding();
  70. /// Destroys the encoding.
  71. virtual const char* canonicalName() const = 0;
  72. /// Returns the canonical name of this encoding,
  73. /// e.g. "ISO-8859-1". Encoding name comparisons are case
  74. /// insensitive.
  75. virtual bool isA(const std::string& encodingName) const = 0;
  76. /// Returns true if the given name is one of the names of this encoding.
  77. /// For example, the "ISO-8859-1" encoding is also known as "Latin-1".
  78. ///
  79. /// Encoding name comparision are be case insensitive.
  80. virtual const CharacterMap& characterMap() const = 0;
  81. /// Returns the CharacterMap for the encoding.
  82. /// The CharacterMap should be kept in a static member. As
  83. /// characterMap() can be called frequently, it should be
  84. /// implemented in such a way that it just returns a static
  85. /// map. If the map is built at runtime, this should be
  86. /// done in the constructor.
  87. virtual int convert(const unsigned char* bytes) const;
  88. /// The convert function is used to convert multibyte sequences;
  89. /// bytes will point to a byte sequence of n bytes where
  90. /// getCharacterMap()[*bytes] == -n.
  91. ///
  92. /// The convert function must return the Unicode scalar value
  93. /// represented by this byte sequence or -1 if the byte sequence is malformed.
  94. /// The default implementation returns (int) bytes[0].
  95. virtual int convert(int ch, unsigned char* bytes, int length) const;
  96. /// Transform the Unicode character ch into the encoding's
  97. /// byte sequence. The method returns the number of bytes
  98. /// used. The method must not use more than length characters.
  99. /// Bytes and length can also be null - in this case only the number
  100. /// of bytes required to represent ch is returned.
  101. /// If the character cannot be converted, 0 is returned and
  102. /// the byte sequence remains unchanged.
  103. /// The default implementation simply returns 0.
  104. static TextEncoding& byName(const std::string& encodingName);
  105. /// Returns the TextEncoding object for the given encoding name.
  106. ///
  107. /// Throws a NotFoundException if the encoding with given name is not available.
  108. static TextEncoding::Ptr find(const std::string& encodingName);
  109. /// Returns a pointer to the TextEncoding object for the given encodingName,
  110. /// or NULL if no such TextEncoding object exists.
  111. static void add(TextEncoding::Ptr encoding);
  112. /// Adds the given TextEncoding to the table of text encodings,
  113. /// under the encoding's canonical name.
  114. ///
  115. /// If an encoding with the given name is already registered,
  116. /// it is replaced.
  117. static void add(TextEncoding::Ptr encoding, const std::string& name);
  118. /// Adds the given TextEncoding to the table of text encodings,
  119. /// under the given name.
  120. ///
  121. /// If an encoding with the given name is already registered,
  122. /// it is replaced.
  123. static void remove(const std::string& encodingName);
  124. /// Removes the encoding with the given name from the table
  125. /// of text encodings.
  126. static TextEncoding::Ptr global(TextEncoding::Ptr encoding);
  127. /// Sets global TextEncoding object.
  128. ///
  129. /// This function sets the global encoding to the argument and returns a
  130. /// reference of the previous global encoding.
  131. static TextEncoding& global();
  132. /// Return the current global TextEncoding object
  133. static const std::string GLOBAL;
  134. /// Name of the global TextEncoding, which is the empty string.
  135. protected:
  136. static TextEncodingManager& manager();
  137. /// Returns the TextEncodingManager.
  138. };
  139. } // namespace Poco
  140. #endif // Foundation_TextEncoding_INCLUDED