瀏覽代碼

增加校验功能:采集到的代理IP是否为匿名代理 (#74)

* anonymous proxy ip

Add verification of whether proxy IP is anonymous

* catch AssertionError

fix: AssertionError will now be caught
王路路 5 年之前
父節點
當前提交
50a8f7e711
共有 2 個文件被更改,包括 17 次插入2 次删除
  1. 15 2
      proxypool/processors/tester.py
  2. 2 0
      proxypool/setting.py

+ 15 - 2
proxypool/processors/tester.py

@@ -3,7 +3,7 @@ import aiohttp
 from loguru import logger
 from proxypool.schemas import Proxy
 from proxypool.storages.redis import RedisClient
-from proxypool.setting import TEST_TIMEOUT, TEST_BATCH, TEST_URL, TEST_VALID_STATUS
+from proxypool.setting import TEST_TIMEOUT, TEST_BATCH, TEST_URL, TEST_VALID_STATUS, TEST_ANONYMOUS
 from aiohttp import ClientProxyConnectionError, ServerDisconnectedError, ClientOSError, ClientHttpProxyError
 from asyncio import TimeoutError
 
@@ -14,7 +14,8 @@ EXCEPTIONS = (
     TimeoutError,
     ServerDisconnectedError,
     ClientOSError,
-    ClientHttpProxyError
+    ClientHttpProxyError,
+    AssertionError
 )
 
 
@@ -39,6 +40,18 @@ class Tester(object):
         async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) as session:
             try:
                 logger.debug(f'testing {proxy.string()}')
+                # if TEST_ANONYMOUS is True, make sure that
+                # the proxy has the effect of hiding the real IP
+                if TEST_ANONYMOUS:
+                    url = 'https://httpbin.org/ip'
+                    async with session.get(url, timeout=TEST_TIMEOUT) as response:
+                        resp_json = await response.json()
+                        origin_ip = resp_json['origin']
+                    async with session.get(url, proxy=f'http://{proxy.string()}', timeout=TEST_TIMEOUT) as response:
+                        resp_json = await response.json()
+                        anonymous_ip = resp_json['origin']
+                    assert origin_ip != anonymous_ip
+                    assert proxy.host == anonymous_ip
                 async with session.get(TEST_URL, proxy=f'http://{proxy.string()}', timeout=TEST_TIMEOUT,
                                        allow_redirects=False) as response:
                     if response.status in TEST_VALID_STATUS:

+ 2 - 0
proxypool/setting.py

@@ -56,6 +56,8 @@ CYCLE_GETTER = env.int('CYCLE_GETTER', 100)
 TEST_URL = env.str('TEST_URL', 'http://www.baidu.com')
 TEST_TIMEOUT = env.int('TEST_TIMEOUT', 10)
 TEST_BATCH = env.int('TEST_BATCH', 20)
+# only save anonymous proxy
+TEST_ANONYMOUS = True
 # TEST_HEADERS = env.json('TEST_HEADERS', {
 #     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36',
 # })