getter.py 943 B

123456789101112131415161718192021222324252627282930
  1. from proxypool.tester import Tester
  2. from proxypool.db import RedisClient
  3. from proxypool.crawler import Crawler
  4. from proxypool.setting import *
  5. import sys
  6. class Getter():
  7. def __init__(self):
  8. self.redis = RedisClient()
  9. self.crawler = Crawler()
  10. def is_over_threshold(self):
  11. """
  12. 判断是否达到了代理池限制
  13. """
  14. if self.redis.count() >= POOL_UPPER_THRESHOLD:
  15. return True
  16. else:
  17. return False
  18. def run(self):
  19. print('获取器开始执行')
  20. if not self.is_over_threshold():
  21. for callback_label in range(self.crawler.__CrawlFuncCount__):
  22. callback = self.crawler.__CrawlFunc__[callback_label]
  23. # 获取代理
  24. proxies = self.crawler.get_proxies(callback)
  25. sys.stdout.flush()
  26. for proxy in proxies:
  27. self.redis.add(proxy)