main.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. # -*- coding: utf-8 -*-
  2. import re
  3. import requests
  4. from flask import Flask, Response, redirect, request
  5. from requests.exceptions import (
  6. ChunkedEncodingError,
  7. ContentDecodingError, ConnectionError, StreamConsumedError)
  8. from requests.utils import (
  9. stream_decode_response_unicode, iter_slices, CaseInsensitiveDict)
  10. from urllib3.exceptions import (
  11. DecodeError, ReadTimeoutError, ProtocolError)
  12. from urllib.parse import quote
  13. # config
  14. # 分支文件使用jsDelivr镜像的开关,0为关闭,默认关闭
  15. jsdelivr = 0
  16. size_limit = 1024 * 1024 * 1024 * 999 # 允许的文件大小,默认999GB,相当于无限制了 https://github.com/hunshcn/gh-proxy/issues/8
  17. """
  18. 先生效白名单再匹配黑名单,pass_list匹配到的会直接302到jsdelivr而忽略设置
  19. 生效顺序 白->黑->pass,可以前往https://github.com/hunshcn/gh-proxy/issues/41 查看示例
  20. 每个规则一行,可以封禁某个用户的所有仓库,也可以封禁某个用户的特定仓库,下方用黑名单示例,白名单同理
  21. user1 # 封禁user1的所有仓库
  22. user1/repo1 # 封禁user1的repo1
  23. */repo1 # 封禁所有叫做repo1的仓库
  24. """
  25. white_list = '''
  26. '''
  27. black_list = '''
  28. '''
  29. pass_list = '''
  30. '''
  31. HOST = '127.0.0.1' # 监听地址,建议监听本地然后由web服务器反代
  32. PORT = 80 # 监听端口
  33. ASSET_URL = 'https://hunshcn.github.io/gh-proxy' # 主页
  34. white_list = [tuple([x.replace(' ', '') for x in i.split('/')]) for i in white_list.split('\n') if i]
  35. black_list = [tuple([x.replace(' ', '') for x in i.split('/')]) for i in black_list.split('\n') if i]
  36. pass_list = [tuple([x.replace(' ', '') for x in i.split('/')]) for i in pass_list.split('\n') if i]
  37. app = Flask(__name__)
  38. CHUNK_SIZE = 1024 * 10
  39. index_html = requests.get(ASSET_URL, timeout=10).text
  40. icon_r = requests.get(ASSET_URL + '/favicon.ico', timeout=10).content
  41. exp1 = re.compile(r'^(?:https?://)?github\.com/(?P<author>.+?)/(?P<repo>.+?)/(?:releases|archive)/.*$')
  42. exp2 = re.compile(r'^(?:https?://)?github\.com/(?P<author>.+?)/(?P<repo>.+?)/(?:blob|raw)/.*$')
  43. exp3 = re.compile(r'^(?:https?://)?github\.com/(?P<author>.+?)/(?P<repo>.+?)/(?:info|git-).*$')
  44. exp4 = re.compile(r'^(?:https?://)?raw\.(?:githubusercontent|github)\.com/(?P<author>.+?)/(?P<repo>.+?)/.+?/.+$')
  45. exp5 = re.compile(r'^(?:https?://)?gist\.(?:githubusercontent|github)\.com/(?P<author>.+?)/.+?/.+$')
  46. requests.sessions.default_headers = lambda: CaseInsensitiveDict()
  47. @app.route('/')
  48. def index():
  49. if 'q' in request.args:
  50. return redirect('/' + request.args.get('q'))
  51. return index_html
  52. @app.route('/favicon.ico')
  53. def icon():
  54. return Response(icon_r, content_type='image/vnd.microsoft.icon')
  55. def iter_content(self, chunk_size=1, decode_unicode=False):
  56. """rewrite requests function, set decode_content with False"""
  57. def generate():
  58. # Special case for urllib3.
  59. if hasattr(self.raw, 'stream'):
  60. try:
  61. for chunk in self.raw.stream(chunk_size, decode_content=False):
  62. yield chunk
  63. except ProtocolError as e:
  64. raise ChunkedEncodingError(e)
  65. except DecodeError as e:
  66. raise ContentDecodingError(e)
  67. except ReadTimeoutError as e:
  68. raise ConnectionError(e)
  69. else:
  70. # Standard file-like object.
  71. while True:
  72. chunk = self.raw.read(chunk_size)
  73. if not chunk:
  74. break
  75. yield chunk
  76. self._content_consumed = True
  77. if self._content_consumed and isinstance(self._content, bool):
  78. raise StreamConsumedError()
  79. elif chunk_size is not None and not isinstance(chunk_size, int):
  80. raise TypeError("chunk_size must be an int, it is instead a %s." % type(chunk_size))
  81. # simulate reading small chunks of the content
  82. reused_chunks = iter_slices(self._content, chunk_size)
  83. stream_chunks = generate()
  84. chunks = reused_chunks if self._content_consumed else stream_chunks
  85. if decode_unicode:
  86. chunks = stream_decode_response_unicode(chunks, self)
  87. return chunks
  88. def check_url(u):
  89. for exp in (exp1, exp2, exp3, exp4, exp5):
  90. m = exp.match(u)
  91. if m:
  92. return m
  93. return False
  94. @app.route('/<path:u>', methods=['GET', 'POST'])
  95. def handler(u):
  96. u = u if u.startswith('http') else 'https://' + u
  97. if u.rfind('://', 3, 9) == -1:
  98. u = u.replace('s:/', 's://', 1) # uwsgi会将//传递为/
  99. pass_by = False
  100. m = check_url(u)
  101. if m:
  102. m = tuple(m.groups())
  103. if white_list:
  104. for i in white_list:
  105. if m[:len(i)] == i or i[0] == '*' and len(m) == 2 and m[1] == i[1]:
  106. break
  107. else:
  108. return Response('Forbidden by white list.', status=403)
  109. for i in black_list:
  110. if m[:len(i)] == i or i[0] == '*' and len(m) == 2 and m[1] == i[1]:
  111. return Response('Forbidden by black list.', status=403)
  112. for i in pass_list:
  113. if m[:len(i)] == i or i[0] == '*' and len(m) == 2 and m[1] == i[1]:
  114. pass_by = True
  115. break
  116. else:
  117. return Response('Invalid input.', status=403)
  118. if (jsdelivr or pass_by) and exp2.match(u):
  119. u = u.replace('/blob/', '@', 1).replace('github.com', 'cdn.jsdelivr.net/gh', 1)
  120. return redirect(u)
  121. elif (jsdelivr or pass_by) and exp4.match(u):
  122. u = re.sub(r'(\.com/.*?/.+?)/(.+?/)', r'\1@\2', u, 1)
  123. _u = u.replace('raw.githubusercontent.com', 'cdn.jsdelivr.net/gh', 1)
  124. u = u.replace('raw.github.com', 'cdn.jsdelivr.net/gh', 1) if _u == u else _u
  125. return redirect(u)
  126. else:
  127. if exp2.match(u):
  128. u = u.replace('/blob/', '/raw/', 1)
  129. if pass_by:
  130. url = u + request.url.replace(request.base_url, '', 1)
  131. if url.startswith('https:/') and not url.startswith('https://'):
  132. url = 'https://' + url[7:]
  133. return redirect(url)
  134. u = quote(u, safe='/:')
  135. return proxy(u)
  136. def proxy(u, allow_redirects=False):
  137. headers = {}
  138. r_headers = dict(request.headers)
  139. if 'Host' in r_headers:
  140. r_headers.pop('Host')
  141. try:
  142. url = u + request.url.replace(request.base_url, '', 1)
  143. if url.startswith('https:/') and not url.startswith('https://'):
  144. url = 'https://' + url[7:]
  145. r = requests.request(method=request.method, url=url, data=request.data, headers=r_headers, stream=True, allow_redirects=allow_redirects)
  146. headers = dict(r.headers)
  147. if 'Content-length' in r.headers and int(r.headers['Content-length']) > size_limit:
  148. return redirect(u + request.url.replace(request.base_url, '', 1))
  149. def generate():
  150. for chunk in iter_content(r, chunk_size=CHUNK_SIZE):
  151. yield chunk
  152. if 'Location' in r.headers:
  153. _location = r.headers.get('Location')
  154. if check_url(_location):
  155. headers['Location'] = '/' + _location
  156. else:
  157. return proxy(_location, True)
  158. return Response(generate(), headers=headers, status=r.status_code)
  159. except Exception as e:
  160. headers['content-type'] = 'text/html; charset=UTF-8'
  161. return Response('server error ' + str(e), status=500, headers=headers)
  162. app.debug = True
  163. if __name__ == '__main__':
  164. app.run(host=HOST, port=PORT)