| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154 |
- import json
- import os
- import zipfile
- def unzip_file(zip_path, extract_dir):
- """
- 解压zip文件到指定目录,并在指定目录下创建一个新的目录存放解压后的文件
- 参数:
- zip_path (str): zip压缩包的地址
- extract_dir (str): 指定解压的目录
- 返回:
- str: 解压后的路径
- """
- if not os.path.exists(extract_dir):
- os.makedirs(extract_dir)
- base_name = os.path.basename(zip_path)
- dir_name = os.path.splitext(base_name)[0]
- new_extract_dir = os.path.join(extract_dir, dir_name)
- if not os.path.exists(new_extract_dir):
- os.makedirs(new_extract_dir)
- with zipfile.ZipFile(zip_path, "r") as zip_ref:
- zip_ref.extractall(new_extract_dir)
- return new_extract_dir
- def get_project_files_with_content(project_dir):
- """
- 获取项目目录下所有文件的相对路径和内容
- 参数:
- project_dir (str): 项目目录地址
- 返回:
- list: 包含字典的列表,每个字典包含文件的相对路径和内容
- """
- files_list = []
- for root, dirs, files in os.walk(project_dir):
- for file in files:
- if filter_data(file):
- file_path = os.path.join(root, file)
- relative_path = os.path.relpath(file_path, project_dir)
- if "__MACOSX" in relative_path:
- continue
- with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
- content = f.read()
- files_list.append({"path": relative_path, "content": content})
- else:
- continue
- return files_list
- def filter_data(obj):
- LANGUAGE_TAG = {
- "c++": "// C++",
- "cpp": "// C++",
- "c": "// C",
- "c#": "// C#",
- "c-sharp": "// C#",
- "css": "/* CSS */",
- "cuda": "// Cuda",
- "fortran": "! Fortran",
- "go": "// Go",
- "html": "<!-- HTML -->",
- "java": "// Java",
- "js": "// JavaScript",
- "javascript": "// JavaScript",
- "kotlin": "// Kotlin",
- "lean": "-- Lean",
- "lua": "-- Lua",
- "objectivec": "// Objective-C",
- "objective-c": "// Objective-C",
- "objective-c++": "// Objective-C++",
- "pascal": "// Pascal",
- "php": "// PHP",
- "python": "# Python",
- "r": "# R",
- "rust": "// Rust",
- "ruby": "# Ruby",
- "scala": "// Scala",
- "shell": "# Shell",
- "sql": "-- SQL",
- "tex": f"% TeX",
- "typescript": "// TypeScript",
- "vue": "<!-- Vue -->",
- "assembly": "; Assembly",
- "dart": "// Dart",
- "perl": "# Perl",
- "prolog": f"% Prolog",
- "swift": "// swift",
- "lisp": "; Lisp",
- "vb": "' Visual Basic",
- "visual basic": "' Visual Basic",
- "matlab": f"% Matlab",
- "delphi": "{ Delphi }",
- "scheme": "; Scheme",
- "basic": "' Basic",
- "groovy": "// Groovy",
- "abap": "* Abap",
- "gdscript": "# GDScript",
- "haskell": "-- Haskell",
- "julia": "# Julia",
- "elixir": "# Elixir",
- "excel": "' Excel",
- "clojure": "; Clojure",
- "actionscript": "// ActionScript",
- "solidity": "// Solidity",
- "powershell": "# PowerShell",
- "erlang": f"% Erlang",
- "cobol": "// Cobol",
- "batchfile": ":: Batch file",
- "makefile": "# Makefile",
- "dockerfile": "# Dockerfile",
- "markdown": "<!-- Markdown -->",
- "cmake": "# CMake",
- }
- programming_languages_to_file_extensions = json.load(
- open("utils/programming-languages-to-file-extensions.json")
- )
- need2del = []
- for key in programming_languages_to_file_extensions.keys():
- if key.lower() not in LANGUAGE_TAG:
- need2del.append(key)
- for key in need2del:
- del programming_languages_to_file_extensions[key]
- ext_to_programming_languages = {}
- want_languages = []
- for key in programming_languages_to_file_extensions:
- for item in programming_languages_to_file_extensions[key]:
- ext_to_programming_languages[item] = key
- want_languages.append(item)
- ext = "." + obj.split(".")[-1]
- with open("utils/keep.txt", "r") as f:
- keep_files = f.readlines()
- keep_files = [l.strip() for l in keep_files]
- # print(ext)
- if ext not in want_languages:
- if obj in keep_files:
- return True
- return False
- else:
- return True
|