comment.py 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. # -*- coding:utf-8 -*-
  2. """
  3. Comment removal utility for JSON configuration files.
  4. Supports both # and // style single line comments.
  5. @author: GitHub Copilot
  6. """
  7. def remove_comment(content):
  8. # type: (str) -> str
  9. """
  10. 移除字符串中的单行注释。
  11. 支持 # 和 // 两种注释风格。
  12. Args:
  13. content (str): 包含注释的字符串内容
  14. Returns:
  15. str: 移除注释后的字符串
  16. Examples:
  17. >>> remove_comment('{"key": "value"} // comment')
  18. '{"key": "value"} '
  19. >>> remove_comment('# This is a comment\\n{"key": "value"}')
  20. '\\n{"key": "value"}'
  21. """
  22. if not content:
  23. return content
  24. lines = content.splitlines()
  25. cleaned_lines = []
  26. for line in lines:
  27. # 移除行内注释,但要小心不要破坏字符串内的内容
  28. cleaned_line = _remove_line_comment(line)
  29. cleaned_lines.append(cleaned_line)
  30. return "\n".join(cleaned_lines)
  31. def _remove_line_comment(line):
  32. # type: (str) -> str
  33. """
  34. 移除单行中的注释部分。
  35. Args:
  36. line (str): 要处理的行
  37. Returns:
  38. str: 移除注释后的行
  39. """
  40. # 检查是否是整行注释
  41. stripped = line.lstrip()
  42. if stripped.startswith("#") or stripped.startswith("//"):
  43. return ""
  44. # 查找行内注释,需要考虑字符串内容
  45. in_string = False
  46. quote_char = None
  47. i = 0
  48. while i < len(line):
  49. char = line[i]
  50. # 处理字符串内的转义序列
  51. if in_string and char == "\\" and i + 1 < len(line):
  52. i += 2 # 跳过转义字符
  53. continue
  54. # 处理引号字符
  55. if char in ('"', "'"):
  56. if not in_string:
  57. in_string = True
  58. quote_char = char
  59. elif char == quote_char:
  60. in_string = False
  61. quote_char = None
  62. # 在字符串外检查注释标记
  63. elif not in_string:
  64. if char == "#":
  65. return line[:i].rstrip()
  66. elif char == "/" and i + 1 < len(line) and line[i + 1] == "/":
  67. return line[:i].rstrip()
  68. i += 1
  69. return line