http.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. # coding=utf-8
  2. """
  3. HTTP请求工具模块,SSL 代理,重试,basic-auth
  4. HTTP utilities module, including ssl, proxies, retry and basicAuth.
  5. @author: NewFuture
  6. """
  7. from logging import getLogger
  8. from re import compile
  9. import ssl
  10. import os
  11. import time
  12. import socket
  13. from .. import __version__
  14. try: # python 3
  15. from urllib.request import (
  16. BaseHandler,
  17. build_opener,
  18. HTTPBasicAuthHandler,
  19. HTTPDefaultErrorHandler,
  20. HTTPPasswordMgrWithDefaultRealm,
  21. HTTPSHandler,
  22. ProxyHandler,
  23. Request,
  24. )
  25. from urllib.parse import quote, urlencode, unquote
  26. from http.client import HTTPSConnection
  27. except ImportError: # python 2
  28. from urllib2 import ( # type: ignore[no-redef]
  29. BaseHandler,
  30. build_opener,
  31. HTTPBasicAuthHandler,
  32. HTTPDefaultErrorHandler,
  33. HTTPPasswordMgrWithDefaultRealm,
  34. HTTPSHandler,
  35. ProxyHandler,
  36. Request,
  37. )
  38. from urllib import urlencode, quote, unquote # type: ignore[no-redef]
  39. from httplib import HTTPSConnection # type: ignore[no-redef]
  40. __all__ = ["request", "HttpResponse", "quote", "urlencode", "USER_AGENT"]
  41. # Default user-agent for DDNS requests
  42. USER_AGENT = "DDNS/{} ([email protected])".format(__version__ if __version__ != "${BUILD_VERSION}" else "dev")
  43. logger = getLogger().getChild("http")
  44. _AUTH_URL_RE = compile(r"^(https?://)([^:/?#]+):([^@]+)@(.+)$")
  45. def _proxy_handler(proxy):
  46. # type: (str | None) -> ProxyHandler | None
  47. """标准化代理格式并返回ProxyHandler对象"""
  48. if not proxy or proxy.upper() in ("SYSTEM", "DEFAULT"):
  49. return ProxyHandler() # 系统代理
  50. elif proxy.upper() in ("DIRECT"):
  51. return ProxyHandler({}) # 不使用代理
  52. elif "://" not in proxy:
  53. # 检查是否是 host:port 格式
  54. logger.warning("Legacy proxy format '%s' detected, converting to 'http://%s'", proxy, proxy)
  55. proxy = "http://" + proxy
  56. return ProxyHandler({"http": proxy, "https": proxy})
  57. def request(method, url, data=None, headers=None, proxies=None, verify=True, auth=None, retries=1):
  58. # type: (str, str, str | bytes | None, dict[str, str] | None, list[str] | None, bool | str, BaseHandler | None, int) -> HttpResponse # noqa: E501
  59. """
  60. 发送HTTP/HTTPS请求,支持自动重试和类似requests.request的参数接口
  61. Args:
  62. method (str): HTTP方法,如GET、POST等
  63. url (str): 请求的URL,支持嵌入式认证格式 https://user:[email protected]
  64. data (str | bytes | None): 请求体数据
  65. headers (dict[str, str] | None): 请求头字典
  66. proxies (list[str | None] | None): 代理列表,支持以下格式:
  67. - "http://host:port" - 具体代理地址
  68. - "DIRECT" - 直连,不使用代理
  69. - "SYSTEM" - 使用系统默认代理设置
  70. verify (bool | str): SSL验证配置
  71. - True: 启用标准SSL验证
  72. - False: 禁用SSL验证
  73. - "auto": 启用验证,失败时自动回退到不验证
  74. - str: 自定义CA证书文件路径
  75. auth (BaseHandler | None): 自定义认证处理器
  76. retries (int): 最大重试次数,默认1次
  77. Returns:
  78. HttpResponse: 响应对象
  79. Raises:
  80. URLError: 如果请求失败
  81. ssl.SSLError: 如果SSL验证失败
  82. ValueError: 如果参数无效
  83. """
  84. # 解析URL以检查是否包含嵌入式认证信息
  85. m = _AUTH_URL_RE.match(url)
  86. if m:
  87. protocol, username, password, rest = m.groups()
  88. url = protocol + rest
  89. auth = HTTPBasicAuthHandler(HTTPPasswordMgrWithDefaultRealm())
  90. auth.add_password(None, url, unquote(username), unquote(password)) # type: ignore[no-untyped-call]
  91. # 准备请求
  92. if isinstance(data, str):
  93. data = data.encode("utf-8")
  94. if headers is None:
  95. headers = {}
  96. if not any(k.lower() == "user-agent" for k in headers.keys()):
  97. headers["User-Agent"] = USER_AGENT # 设置默认User-Agent
  98. handlers = [NoHTTPErrorHandler(), AutoSSLHandler(verify), RetryHandler(retries)]
  99. handlers += [auth] if auth else []
  100. def run(proxy_handler):
  101. req = Request(url, data=data, headers=headers)
  102. req.get_method = lambda: method.upper() # python 2 兼容
  103. h = handlers + ([proxy_handler] if proxy_handler else [])
  104. return build_opener(*h).open(req, timeout=60 if method == "GET" else 120) # 创建处理器链
  105. if not proxies:
  106. response = run(None) # 默认
  107. else:
  108. last_err = None # type: Exception # type: ignore[assignment]
  109. for p in proxies:
  110. logger.debug("Trying proxy: %s", p)
  111. try:
  112. response = run(_proxy_handler(p)) # 尝试使用代理
  113. break # 成功后退出循环
  114. except Exception as e:
  115. last_err = e
  116. else:
  117. logger.error("All proxies failed")
  118. raise last_err # 如果所有代理都失败,抛出最后一个错误
  119. # 处理响应
  120. response_headers = response.info()
  121. raw_body = response.read()
  122. decoded_body = _decode_response_body(raw_body, response_headers.get("Content-Type"))
  123. status_code = response.getcode()
  124. reason = getattr(response, "msg", "")
  125. return HttpResponse(status_code, reason, response_headers, decoded_body)
  126. def _decode_response_body(raw_body, content_type):
  127. # type: (bytes, str | None) -> str
  128. """解码HTTP响应体,优先使用UTF-8"""
  129. if not raw_body:
  130. return ""
  131. # 从Content-Type提取charset
  132. charsets = ["utf-8", "gbk", "ascii", "latin-1"]
  133. if content_type and "charset=" in content_type.lower():
  134. start = content_type.lower().find("charset=") + 8
  135. end = content_type.find(";", start)
  136. if end == -1:
  137. end = len(content_type)
  138. charset = content_type[start:end].strip("'\" ").lower()
  139. # 处理常见别名映射
  140. charset_aliases = {"gb2312": "gbk", "iso-8859-1": "latin-1"}
  141. charset = charset_aliases.get(charset, charset)
  142. if charset in charsets:
  143. charsets.remove(charset)
  144. charsets.insert(0, charset)
  145. # 按优先级尝试解码
  146. for encoding in charsets:
  147. try:
  148. return raw_body.decode(encoding)
  149. except (UnicodeDecodeError, LookupError):
  150. continue
  151. return raw_body.decode("utf-8", errors="replace") # 最终后备:UTF-8替换错误字符
  152. class HttpResponse(object):
  153. """HTTP响应封装类"""
  154. def __init__(self, status, reason, headers, body):
  155. # type: (int, str, Any, str) -> None
  156. """初始化HTTP响应对象"""
  157. self.status = status
  158. self.reason = reason
  159. self.headers = headers
  160. self.body = body
  161. class NoHTTPErrorHandler(HTTPDefaultErrorHandler): # type: ignore[misc]
  162. """自定义HTTP错误处理器,处理所有HTTP错误状态码,返回响应而不抛出异常"""
  163. def http_error_default(self, req, fp, code, msg, hdrs):
  164. """处理所有HTTP错误状态码,返回响应而不抛出异常"""
  165. logger.info("HTTP error %s: %s", code, msg)
  166. return fp
  167. class AutoSSLHandler(HTTPSHandler): # type: ignore[misc]
  168. """SSL自动降级处理器,处理 unable to get local issuer certificate 错误"""
  169. _ssl_cache = {} # type: dict[str, ssl.SSLContext|None]
  170. def __init__(self, verify):
  171. # type: (bool | str) -> None
  172. self._verify = verify
  173. self._context = self._ssl_context()
  174. # 兼容性:优先使用context参数,失败时降级
  175. try: # python 3 / python 2.7.9+
  176. HTTPSHandler.__init__(self, context=self._context)
  177. except (TypeError, AttributeError): # python 2.7.8-
  178. HTTPSHandler.__init__(self)
  179. def https_open(self, req):
  180. """处理HTTPS请求,自动处理SSL错误"""
  181. try:
  182. return self._open(req)
  183. except OSError as e: # SSL auto模式:处理本地证书错误
  184. ssl_errors = ("unable to get local issuer certificate", "Basic Constraints of CA cert not marked critical")
  185. if self._verify == "auto" and any(err in str(e) for err in ssl_errors):
  186. logger.warning("SSL error (%s), switching to unverified connection", str(e))
  187. self._verify = False # 不验证SSL
  188. self._context = self._ssl_context() # 确保上下文已更新
  189. return self._open(req) # 重试请求
  190. else:
  191. logger.debug("error: (%s)", e)
  192. raise
  193. def _open(self, req):
  194. try: # python 3
  195. return self.do_open(HTTPSConnection, req, context=self._context)
  196. except (TypeError, AttributeError): # python 2.7.6- Fallback for older Python versions
  197. logger.info("Falling back to parent https_open method for compatibility")
  198. return HTTPSHandler.https_open(self, req)
  199. def _ssl_context(self):
  200. # type: () -> ssl.SSLContext | None
  201. """创建或获取缓存的SSLContext"""
  202. cache_key = "default" # 缓存键
  203. if not self._verify:
  204. cache_key = "unverified"
  205. if cache_key not in self._ssl_cache:
  206. self._ssl_cache[cache_key] = (
  207. ssl._create_unverified_context() if hasattr(ssl, "_create_unverified_context") else None
  208. )
  209. elif hasattr(self._verify, "lower") and self._verify.lower() not in ("auto", "true"): # type: ignore
  210. cache_key = str(self._verify)
  211. if cache_key not in self._ssl_cache:
  212. self._ssl_cache[cache_key] = ssl.create_default_context(cafile=cache_key)
  213. elif cache_key not in self._ssl_cache:
  214. self._ssl_cache[cache_key] = ssl.create_default_context()
  215. if not self._ssl_cache[cache_key].get_ca_certs(): # type: ignore
  216. logger.info("No system CA certificates found, loading default CA certificates")
  217. self._load_system_ca_certs(self._ssl_cache[cache_key]) # type: ignore
  218. return self._ssl_cache[cache_key]
  219. def _load_system_ca_certs(self, ssl_context):
  220. # type: (ssl.SSLContext) -> None
  221. """加载系统CA证书"""
  222. ca_paths = [
  223. # Linux/Unix常用路径
  224. "/etc/ssl/certs/ca-certificates.crt", # Debian/Ubuntu
  225. "/etc/pki/tls/certs/ca-bundle.crt", # RedHat/CentOS
  226. "/etc/ssl/ca-bundle.pem", # OpenSUSE
  227. "/etc/ssl/cert.pem", # OpenBSD
  228. "/usr/local/share/certs/ca-root-nss.crt", # FreeBSD
  229. "/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem", # Fedora/RHEL
  230. # macOS路径
  231. "/usr/local/etc/openssl/cert.pem", # macOS with Homebrew
  232. "/opt/local/etc/openssl/cert.pem", # macOS with MacPorts
  233. ]
  234. for ca_path in ca_paths:
  235. if os.path.isfile(ca_path):
  236. try:
  237. ssl_context.load_verify_locations(ca_path)
  238. logger.info("Loaded CA certificates from: %s", ca_path)
  239. return # 成功加载后立即返回
  240. except Exception as e:
  241. logger.warning("Failed to load CA certificates from %s: %s", ca_path, e)
  242. class RetryHandler(BaseHandler): # type: ignore[misc]
  243. """HTTP重试处理器,自动重试指定状态码和网络错误"""
  244. handler_order = 100
  245. RETRY_CODES = (408, 429, 500, 502, 503, 504)
  246. def __init__(self, retries=3):
  247. # type: (int) -> None
  248. """初始化重试处理器"""
  249. self._in_retry = False # 防止递归调用的标志
  250. self.retries = retries # 始终设置retries属性
  251. if retries > 0:
  252. self.default_open = self._open
  253. def _open(self, req):
  254. """实际的重试逻辑,处理所有协议"""
  255. if self._in_retry:
  256. return None # 防止递归调用
  257. self._in_retry = True
  258. try:
  259. for attempt in range(1, self.retries + 1):
  260. try:
  261. res = self.parent.open(req, timeout=req.timeout)
  262. if not hasattr(res, "getcode") or res.getcode() not in self.RETRY_CODES:
  263. return res # 成功响应直接返回
  264. logger.warning("HTTP %d error, retrying in %d seconds", res.getcode(), 2**attempt)
  265. except (socket.timeout, socket.gaierror, socket.herror) as e:
  266. logger.warning("Request failed, retrying in %d seconds: %s", 2**attempt, str(e))
  267. time.sleep(2**attempt)
  268. continue
  269. return self.parent.open(req, timeout=req.timeout) # 最后一次尝试
  270. finally:
  271. self._in_retry = False