getter.py 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. from loguru import logger
  2. from proxypool.storages.redis import RedisClient
  3. from proxypool.setting import PROXY_NUMBER_MAX
  4. from proxypool.crawlers import __all__ as crawlers_cls
  5. from proxypool.testers import __all__ as testers_cls
  6. class Getter(object):
  7. """
  8. getter of proxypool
  9. """
  10. def __init__(self):
  11. """
  12. init db and crawlers
  13. """
  14. self.redis = RedisClient()
  15. self.crawlers_cls = crawlers_cls
  16. self.crawlers = [crawler_cls() for crawler_cls in self.crawlers_cls]
  17. self.testers_cls = testers_cls
  18. self.testers = [tester_cls() for tester_cls in self.testers_cls]
  19. def is_full(self):
  20. """
  21. if proxypool if full
  22. return: bool
  23. """
  24. return self.redis.count() >= PROXY_NUMBER_MAX
  25. @logger.catch
  26. def run(self):
  27. """
  28. run crawlers to get proxy
  29. :return:
  30. """
  31. if self.is_full():
  32. return
  33. for crawler in self.crawlers:
  34. logger.info(f'crawler {crawler} to get proxy')
  35. for proxy in crawler.crawl():
  36. self.redis.add(proxy)
  37. [self.redis.add(proxy, redis_key=tester.key) for tester in self.testers]
  38. if __name__ == '__main__':
  39. getter = Getter()
  40. getter.run()