|
@@ -4,9 +4,11 @@ from loguru import logger
|
|
|
from proxypool.setting import GET_TIMEOUT
|
|
|
from fake_headers import Headers
|
|
|
import time
|
|
|
+
|
|
|
+
|
|
|
class BaseCrawler(object):
|
|
|
urls = []
|
|
|
-
|
|
|
+
|
|
|
@retry(stop_max_attempt_number=3, retry_on_result=lambda x: x is None, wait_fixed=2000)
|
|
|
def fetch(self, url, **kwargs):
|
|
|
try:
|
|
@@ -14,13 +16,13 @@ class BaseCrawler(object):
|
|
|
kwargs.setdefault('timeout', GET_TIMEOUT)
|
|
|
kwargs.setdefault('verify', False)
|
|
|
kwargs.setdefault('headers', headers)
|
|
|
- response = requests.get(url ,**kwargs)
|
|
|
+ response = requests.get(url, **kwargs)
|
|
|
if response.status_code == 200:
|
|
|
response.encoding = 'utf-8'
|
|
|
return response.text
|
|
|
except requests.ConnectionError:
|
|
|
return
|
|
|
-
|
|
|
+
|
|
|
@logger.catch
|
|
|
def crawl(self):
|
|
|
"""
|