Germey 1 год назад
Родитель
Сommit
7c77ad0b07
4 измененных файлов с 29 добавлено и 44 удалено
  1. 0 11
      README.md
  2. 0 18
      build.yaml
  3. 2 3
      docker-compose.yml
  4. 27 12
      proxypool/processors/tester.py

+ 0 - 11
README.md

@@ -74,12 +74,6 @@ proxypool    | 2020-02-19 17:09:46,596 INFO success: tester entered RUNNING stat
 
 这时候访问 [http://localhost:5555/random](http://localhost:5555/random) 即可获取一个随机可用代理。
 
-当然你也可以选择自己 Build,直接运行如下命令即可:
-
-```
-docker-compose -f build.yaml up
-```
-
 如果下载速度特别慢,可以自行修改 Dockerfile,修改:
 
 ```diff
@@ -347,11 +341,6 @@ class Daili66Crawler(BaseCrawler):
 
 本项目提供了 Kubernetes 部署脚本,如需部署到 Kubernetes,请参考 [kubernetes](./kubernetes)。
 
-## 待开发
-
-- [ ] 前端页面管理
-- [ ] 使用情况统计分析
-
 如有一起开发的兴趣可以在 Issue 留言,非常感谢!
 
 ## LICENSE

+ 0 - 18
build.yaml

@@ -1,18 +0,0 @@
-version: "3"
-services:
-  redis4proxypool:
-    image: redis:alpine
-    container_name: redis4proxypool
-    ports:
-      - "6374:6379"
-  proxypool:
-    build: .
-    image: "germey/proxypool:master"
-    container_name: proxypool
-    ports:
-      - "5555:5555"
-    restart: always
-    #    volumes:
-    #      - proxypool/crawlers/private:/app/proxypool/crawlers/private
-    environment:
-      PROXYPOOL_REDIS_CONNECTION_STRING: redis://@redis4proxypool:6379/0

+ 2 - 3
docker-compose.yml

@@ -3,16 +3,15 @@ services:
   redis4proxypool:
     image: redis:alpine
     container_name: redis4proxypool
-    # ports:
-    #   - "6374:6379"
   proxypool:
+    build: .
     image: "germey/proxypool:master"
     container_name: proxypool
     ports:
       - "5555:5555"
     restart: always
     # volumes:
-    #   - proxypool/crawlers/private:/app/proxypool/crawlers/private
+    #   - proxypool/crawlers/private:~/proxypool/crawlers/private
     environment:
       PROXYPOOL_REDIS_HOST: redis4proxypool
       

+ 27 - 12
proxypool/processors/tester.py

@@ -45,27 +45,33 @@ class Tester(object):
                 logger.debug(f'testing {proxy.string()}')
                 # if TEST_ANONYMOUS is True, make sure that
                 # the proxy has the effect of hiding the real IP
+                # logger.debug(f'TEST_ANONYMOUS {TEST_ANONYMOUS}')
                 if TEST_ANONYMOUS:
                     url = 'https://httpbin.org/ip'
                     async with session.get(url, timeout=TEST_TIMEOUT) as response:
                         resp_json = await response.json()
                         origin_ip = resp_json['origin']
+                        # logger.debug(f'origin ip is {origin_ip}')
                     async with session.get(url, proxy=f'http://{proxy.string()}', timeout=TEST_TIMEOUT) as response:
                         resp_json = await response.json()
                         anonymous_ip = resp_json['origin']
+                        logger.debug(f'anonymous ip is {anonymous_ip}')
                     assert origin_ip != anonymous_ip
                     assert proxy.host == anonymous_ip
                 async with session.get(TEST_URL, proxy=f'http://{proxy.string()}', timeout=TEST_TIMEOUT,
                                        allow_redirects=False) as response:
                     if response.status in TEST_VALID_STATUS:
                         if TEST_DONT_SET_MAX_SCORE:
-                            logger.debug(f'proxy {proxy.string()} is valid, remain current score')
+                            logger.debug(
+                                f'proxy {proxy.string()} is valid, remain current score')
                         else:
                             self.redis.max(proxy)
-                            logger.debug(f'proxy {proxy.string()} is valid, set max score')
+                            logger.debug(
+                                f'proxy {proxy.string()} is valid, set max score')
                     else:
                         self.redis.decrease(proxy)
-                        logger.debug(f'proxy {proxy.string()} is invalid, decrease score')
+                        logger.debug(
+                            f'proxy {proxy.string()} is invalid, decrease score')
                 # if independent tester class found, create new set of storage and do the extra test
                 for tester in self.testers:
                     key = tester.key
@@ -82,18 +88,25 @@ class Tester(object):
                             is_valid = await tester.parse(resp_text, test_url, proxy.string())
                             if is_valid:
                                 if tester.test_dont_set_max_score:
-                                    logger.info(f'key[{key}] proxy {proxy.string()} is valid, remain current score')
+                                    logger.info(
+                                        f'key[{key}] proxy {proxy.string()} is valid, remain current score')
                                 else:
-                                    self.redis.max(proxy, key, tester.proxy_score_max)
-                                    logger.info(f'key[{key}] proxy {proxy.string()} is valid, set max score')
+                                    self.redis.max(
+                                        proxy, key, tester.proxy_score_max)
+                                    logger.info(
+                                        f'key[{key}] proxy {proxy.string()} is valid, set max score')
                             else:
-                                self.redis.decrease(proxy, tester.key, tester.proxy_score_min)
-                                logger.info(f'key[{key}] proxy {proxy.string()} is invalid, decrease score')
+                                self.redis.decrease(
+                                    proxy, tester.key, tester.proxy_score_min)
+                                logger.info(
+                                    f'key[{key}] proxy {proxy.string()} is invalid, decrease score')
 
             except EXCEPTIONS:
                 self.redis.decrease(proxy)
-                [self.redis.decrease(proxy, tester.key, tester.proxy_score_min) for tester in self.testers]
-                logger.debug(f'proxy {proxy.string()} is invalid, decrease score')
+                [self.redis.decrease(proxy, tester.key, tester.proxy_score_min)
+                 for tester in self.testers]
+                logger.debug(
+                    f'proxy {proxy.string()} is invalid, decrease score')
 
     @logger.catch
     def run(self):
@@ -107,10 +120,12 @@ class Tester(object):
         logger.debug(f'{count} proxies to test')
         cursor = 0
         while True:
-            logger.debug(f'testing proxies use cursor {cursor}, count {TEST_BATCH}')
+            logger.debug(
+                f'testing proxies use cursor {cursor}, count {TEST_BATCH}')
             cursor, proxies = self.redis.batch(cursor, count=TEST_BATCH)
             if proxies:
-                tasks = [self.loop.create_task(self.test(proxy)) for proxy in proxies]
+                tasks = [self.loop.create_task(
+                    self.test(proxy)) for proxy in proxies]
                 self.loop.run_until_complete(asyncio.wait(tasks))
             if not cursor:
                 break