getter.py 1.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. from loguru import logger
  2. from proxypool.storages.redis import RedisClient
  3. from proxypool.setting import PROXY_NUMBER_MAX
  4. from proxypool.crawlers import __all__ as crawlers_cls
  5. class Getter(object):
  6. """
  7. getter of proxypool
  8. """
  9. def __init__(self):
  10. """
  11. init db and crawlers
  12. """
  13. self.redis = RedisClient()
  14. self.crawlers_cls = crawlers_cls
  15. self.crawlers = [crawler_cls() for crawler_cls in self.crawlers_cls]
  16. def is_full(self):
  17. """
  18. if proxypool if full
  19. return: bool
  20. """
  21. return self.redis.count() >= PROXY_NUMBER_MAX
  22. @logger.catch
  23. def run(self):
  24. """
  25. run crawlers to get proxy
  26. :return:
  27. """
  28. if self.is_full():
  29. return
  30. for crawler in self.crawlers:
  31. logger.info(f'crawler {crawler} to get proxy')
  32. for proxy in crawler.crawl():
  33. self.redis.add(proxy)
  34. if __name__ == '__main__':
  35. getter = Getter()
  36. getter.run()