tools.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. import json
  2. import os
  3. import zipfile
  4. def unzip_file(zip_path, extract_dir):
  5. """
  6. 解压zip文件到指定目录,并在指定目录下创建一个新的目录存放解压后的文件
  7. 参数:
  8. zip_path (str): zip压缩包的地址
  9. extract_dir (str): 指定解压的目录
  10. 返回:
  11. str: 解压后的路径
  12. """
  13. if not os.path.exists(extract_dir):
  14. os.makedirs(extract_dir)
  15. base_name = os.path.basename(zip_path)
  16. dir_name = os.path.splitext(base_name)[0]
  17. new_extract_dir = os.path.join(extract_dir, dir_name)
  18. if not os.path.exists(new_extract_dir):
  19. os.makedirs(new_extract_dir)
  20. with zipfile.ZipFile(zip_path, "r") as zip_ref:
  21. zip_ref.extractall(new_extract_dir)
  22. return new_extract_dir
  23. def get_project_files_with_content(project_dir):
  24. """
  25. 获取项目目录下所有文件的相对路径和内容
  26. 参数:
  27. project_dir (str): 项目目录地址
  28. 返回:
  29. list: 包含字典的列表,每个字典包含文件的相对路径和内容
  30. """
  31. files_list = []
  32. for root, dirs, files in os.walk(project_dir):
  33. for file in files:
  34. if filter_data(file):
  35. file_path = os.path.join(root, file)
  36. relative_path = os.path.relpath(file_path, project_dir)
  37. if "__MACOSX" in relative_path:
  38. continue
  39. with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
  40. content = f.read()
  41. files_list.append({"path": relative_path, "content": content})
  42. else:
  43. continue
  44. return files_list
  45. def filter_data(obj):
  46. LANGUAGE_TAG = {
  47. "c++": "// C++",
  48. "cpp": "// C++",
  49. "c": "// C",
  50. "c#": "// C#",
  51. "c-sharp": "// C#",
  52. "css": "/* CSS */",
  53. "cuda": "// Cuda",
  54. "fortran": "! Fortran",
  55. "go": "// Go",
  56. "html": "<!-- HTML -->",
  57. "java": "// Java",
  58. "js": "// JavaScript",
  59. "javascript": "// JavaScript",
  60. "kotlin": "// Kotlin",
  61. "lean": "-- Lean",
  62. "lua": "-- Lua",
  63. "objectivec": "// Objective-C",
  64. "objective-c": "// Objective-C",
  65. "objective-c++": "// Objective-C++",
  66. "pascal": "// Pascal",
  67. "php": "// PHP",
  68. "python": "# Python",
  69. "r": "# R",
  70. "rust": "// Rust",
  71. "ruby": "# Ruby",
  72. "scala": "// Scala",
  73. "shell": "# Shell",
  74. "sql": "-- SQL",
  75. "tex": f"% TeX",
  76. "typescript": "// TypeScript",
  77. "vue": "<!-- Vue -->",
  78. "assembly": "; Assembly",
  79. "dart": "// Dart",
  80. "perl": "# Perl",
  81. "prolog": f"% Prolog",
  82. "swift": "// swift",
  83. "lisp": "; Lisp",
  84. "vb": "' Visual Basic",
  85. "visual basic": "' Visual Basic",
  86. "matlab": f"% Matlab",
  87. "delphi": "{ Delphi }",
  88. "scheme": "; Scheme",
  89. "basic": "' Basic",
  90. "groovy": "// Groovy",
  91. "abap": "* Abap",
  92. "gdscript": "# GDScript",
  93. "haskell": "-- Haskell",
  94. "julia": "# Julia",
  95. "elixir": "# Elixir",
  96. "excel": "' Excel",
  97. "clojure": "; Clojure",
  98. "actionscript": "// ActionScript",
  99. "solidity": "// Solidity",
  100. "powershell": "# PowerShell",
  101. "erlang": f"% Erlang",
  102. "cobol": "// Cobol",
  103. "batchfile": ":: Batch file",
  104. "makefile": "# Makefile",
  105. "dockerfile": "# Dockerfile",
  106. "markdown": "<!-- Markdown -->",
  107. "cmake": "# CMake",
  108. }
  109. programming_languages_to_file_extensions = json.load(
  110. open("utils/programming-languages-to-file-extensions.json")
  111. )
  112. need2del = []
  113. for key in programming_languages_to_file_extensions.keys():
  114. if key.lower() not in LANGUAGE_TAG:
  115. need2del.append(key)
  116. for key in need2del:
  117. del programming_languages_to_file_extensions[key]
  118. ext_to_programming_languages = {}
  119. want_languages = []
  120. for key in programming_languages_to_file_extensions:
  121. for item in programming_languages_to_file_extensions[key]:
  122. ext_to_programming_languages[item] = key
  123. want_languages.append(item)
  124. ext = "." + obj.split(".")[-1]
  125. with open("utils/keep.txt", "r") as f:
  126. keep_files = f.readlines()
  127. keep_files = [l.strip() for l in keep_files]
  128. # print(ext)
  129. if ext not in want_languages:
  130. if obj in keep_files:
  131. return True
  132. return False
  133. else:
  134. return True