utils.py 793 B

123456789101112131415161718192021222324252627
  1. import requests
  2. from requests.exceptions import ConnectionError
  3. base_headers = {
  4. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36',
  5. 'Accept-Encoding': 'gzip, deflate, sdch',
  6. 'Accept-Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7'
  7. }
  8. def get_page(url, options={}):
  9. """
  10. 抓取代理
  11. :param url:
  12. :param options:
  13. :return:
  14. """
  15. headers = dict(base_headers, **options)
  16. print('正在抓取', url)
  17. try:
  18. response = requests.get(url, headers=headers)
  19. print('抓取成功', url, response.status_code)
  20. if response.status_code == 200:
  21. return response.text
  22. except ConnectionError:
  23. print('抓取失败', url)
  24. return None