getter.py 971 B

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. from proxypool.db import RedisClient
  2. from proxypool.setting import PROXY_NUMBER_MAX
  3. from proxypool.crawlers import __all__ as crawlers_cls
  4. class Getter():
  5. """
  6. getter of proxypool
  7. """
  8. def __init__(self):
  9. """
  10. init db and crawlers
  11. """
  12. self.redis = RedisClient()
  13. self.crawlers_cls = crawlers_cls
  14. self.crawlers = [crawler_cls() for crawler_cls in self.crawlers_cls]
  15. def is_full(self):
  16. """
  17. if proxypool if full
  18. return: bool
  19. """
  20. return self.redis.count() >= PROXY_NUMBER_MAX
  21. def run(self):
  22. """
  23. run crawlers to get proxy
  24. :return:
  25. """
  26. if self.is_full():
  27. return
  28. for crawler in self.crawlers:
  29. for proxy in crawler.crawl():
  30. print('proxy', proxy)
  31. self.redis.add(proxy)
  32. if __name__ == '__main__':
  33. getter = Getter()
  34. getter.run()