1
0

EncodingDetectionTests.cs 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. using System.Text;
  2. using Apq.Cfg.EncodingSupport;
  3. using Apq.Cfg.Sources.File;
  4. namespace Apq.Cfg.Tests;
  5. /// <summary>
  6. /// 编码检测功能测试
  7. /// </summary>
  8. public class EncodingDetectionTests : IDisposable
  9. {
  10. private readonly string _testDir;
  11. private readonly float _originalThreshold;
  12. public EncodingDetectionTests()
  13. {
  14. _testDir = Path.Combine(Path.GetTempPath(), $"ApqCfgEncodingTests_{Guid.NewGuid():N}");
  15. Directory.CreateDirectory(_testDir);
  16. _originalThreshold = FileCfgSourceBase.EncodingConfidenceThreshold;
  17. }
  18. public void Dispose()
  19. {
  20. // 恢复原始阈值
  21. FileCfgSourceBase.EncodingConfidenceThreshold = _originalThreshold;
  22. if (Directory.Exists(_testDir))
  23. {
  24. try { Directory.Delete(_testDir, true); }
  25. catch { }
  26. }
  27. }
  28. [Fact]
  29. public void DetectEncoding_Utf8File_ReturnsUtf8()
  30. {
  31. // Arrange
  32. var path = Path.Combine(_testDir, "utf8.json");
  33. File.WriteAllText(path, """{"Key": "Value", "中文": "测试"}""", new UTF8Encoding(false));
  34. // Act
  35. var result = FileCfgSourceBase.EncodingDetector.Detect(path);
  36. // Assert
  37. Assert.NotNull(result.Encoding);
  38. // UTF-8 编码名称可能是 "utf-8" 或 "UTF-8"
  39. Assert.Contains("utf", result.Encoding.WebName, StringComparison.OrdinalIgnoreCase);
  40. }
  41. [Fact]
  42. public void DetectEncoding_Utf8BomFile_ReturnsUtf8()
  43. {
  44. // Arrange
  45. var path = Path.Combine(_testDir, "utf8bom.json");
  46. File.WriteAllText(path, """{"Key": "Value", "中文": "测试"}""", new UTF8Encoding(true));
  47. // Act
  48. var result = FileCfgSourceBase.EncodingDetector.Detect(path);
  49. // Assert
  50. Assert.NotNull(result.Encoding);
  51. Assert.Contains("utf", result.Encoding.WebName, StringComparison.OrdinalIgnoreCase);
  52. }
  53. [Fact]
  54. public void DetectEncoding_NonExistentFile_ReturnsUtf8()
  55. {
  56. // Arrange
  57. var path = Path.Combine(_testDir, "nonexistent.json");
  58. // Act
  59. var result = FileCfgSourceBase.EncodingDetector.Detect(path);
  60. // Assert
  61. Assert.Equal(Encoding.UTF8, result.Encoding);
  62. }
  63. [Fact]
  64. public void DetectEncoding_EmptyFile_ReturnsEncoding()
  65. {
  66. // Arrange
  67. var path = Path.Combine(_testDir, "empty.json");
  68. File.WriteAllText(path, "");
  69. // Act
  70. var result = FileCfgSourceBase.EncodingDetector.Detect(path);
  71. // Assert
  72. Assert.NotNull(result.Encoding);
  73. }
  74. [Fact]
  75. public void DefaultWriteEncoding_IsUtf8WithoutBom()
  76. {
  77. // Act & Assert
  78. var encoding = EncodingDetector.DefaultWriteEncoding;
  79. Assert.NotNull(encoding);
  80. Assert.Equal("utf-8", encoding.WebName);
  81. // 验证是无 BOM 的 UTF-8
  82. var preamble = encoding.GetPreamble();
  83. Assert.Empty(preamble);
  84. }
  85. [Fact]
  86. public void EncodingConfidenceThreshold_DefaultValue_Is0Point6()
  87. {
  88. // 重置为默认值
  89. FileCfgSourceBase.EncodingConfidenceThreshold = 0.6f;
  90. // Assert
  91. Assert.Equal(0.6f, FileCfgSourceBase.EncodingConfidenceThreshold);
  92. }
  93. [Fact]
  94. public void EncodingConfidenceThreshold_SetValue_Works()
  95. {
  96. // Act
  97. FileCfgSourceBase.EncodingConfidenceThreshold = 0.8f;
  98. // Assert
  99. Assert.Equal(0.8f, FileCfgSourceBase.EncodingConfidenceThreshold);
  100. }
  101. [Fact]
  102. public void EncodingConfidenceThreshold_ClampsToValidRange()
  103. {
  104. // Act - 设置超过 1.0 的值
  105. FileCfgSourceBase.EncodingConfidenceThreshold = 1.5f;
  106. Assert.Equal(1.0f, FileCfgSourceBase.EncodingConfidenceThreshold);
  107. // Act - 设置小于 0.0 的值
  108. FileCfgSourceBase.EncodingConfidenceThreshold = -0.5f;
  109. Assert.Equal(0.0f, FileCfgSourceBase.EncodingConfidenceThreshold);
  110. }
  111. [Fact]
  112. public void DetectEncoding_WithLowThreshold_DetectsMoreEncodings()
  113. {
  114. // Arrange
  115. var path = Path.Combine(_testDir, "ascii.txt");
  116. File.WriteAllText(path, "Simple ASCII text without special characters");
  117. // Act - 使用低阈值
  118. var options = new EncodingOptions { ConfidenceThreshold = 0.1f };
  119. var result = FileCfgSourceBase.EncodingDetector.Detect(path, options);
  120. // Assert - 应该能检测到编码
  121. Assert.NotNull(result.Encoding);
  122. }
  123. [Fact]
  124. public void DetectEncoding_WithHighThreshold_FallsBackToUtf8()
  125. {
  126. // Arrange
  127. var path = Path.Combine(_testDir, "ambiguous.txt");
  128. // 写入一些可能导致编码检测不确定的内容
  129. File.WriteAllText(path, "abc");
  130. // Act - 使用非常高的阈值
  131. var options = new EncodingOptions { ConfidenceThreshold = 0.99f };
  132. var result = FileCfgSourceBase.EncodingDetector.Detect(path, options);
  133. // Assert - 应该回退到 UTF-8
  134. Assert.NotNull(result.Encoding);
  135. }
  136. [Fact]
  137. public void DetectEncoding_GbkFile_DetectsCorrectly()
  138. {
  139. // Arrange
  140. var path = Path.Combine(_testDir, "gbk.txt");
  141. // 注册 GBK 编码提供程序
  142. Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
  143. var gbkEncoding = Encoding.GetEncoding("GBK");
  144. File.WriteAllText(path, "这是一段中文测试文本,用于测试GBK编码检测功能。", gbkEncoding);
  145. // Act
  146. var options = new EncodingOptions { ConfidenceThreshold = 0.5f };
  147. var result = FileCfgSourceBase.EncodingDetector.Detect(path, options);
  148. // Assert - 应该能检测到某种编码(可能是 GBK 或兼容编码)
  149. Assert.NotNull(result.Encoding);
  150. }
  151. [Fact]
  152. public void CfgBuilder_WithEncodingThreshold_AffectsDetection()
  153. {
  154. // Arrange
  155. var jsonPath = Path.Combine(_testDir, "config.json");
  156. File.WriteAllText(jsonPath, """{"Key": "Value"}""", new UTF8Encoding(false));
  157. // Act - 使用 CfgBuilder 设置阈值
  158. using var cfg = new CfgBuilder()
  159. .WithEncodingConfidenceThreshold(0.7f)
  160. .AddJson(jsonPath, level: 0, writeable: false)
  161. .Build();
  162. // Assert - 配置应该能正常读取
  163. Assert.Equal("Value", cfg.Get("Key"));
  164. }
  165. [Fact]
  166. public void DetectEncoding_LargeFile_Works()
  167. {
  168. // Arrange
  169. var path = Path.Combine(_testDir, "large.json");
  170. var sb = new StringBuilder();
  171. sb.Append("{");
  172. for (int i = 0; i < 1000; i++)
  173. {
  174. if (i > 0) sb.Append(",");
  175. sb.Append($"\"Key{i}\": \"Value{i}\"");
  176. }
  177. sb.Append("}");
  178. File.WriteAllText(path, sb.ToString(), new UTF8Encoding(false));
  179. // Act
  180. var result = FileCfgSourceBase.EncodingDetector.Detect(path);
  181. // Assert - 应该能检测到编码(可能是 UTF-8 或 ASCII,因为纯 ASCII 内容)
  182. Assert.NotNull(result.Encoding);
  183. }
  184. [Fact]
  185. public void DetectEncoding_BinaryFile_ReturnsEncoding()
  186. {
  187. // Arrange
  188. var path = Path.Combine(_testDir, "binary.bin");
  189. var bytes = new byte[] { 0x00, 0x01, 0x02, 0xFF, 0xFE, 0xFD };
  190. File.WriteAllBytes(path, bytes);
  191. // Act - 二进制文件的编码检测
  192. var result = FileCfgSourceBase.EncodingDetector.Detect(path);
  193. // Assert - 应该返回某种编码(可能是 UTF-8 作为回退)
  194. Assert.NotNull(result.Encoding);
  195. }
  196. }