浏览代码

update random

Germey 8 年之前
父节点
当前提交
062d32db4f
共有 7 个文件被更改,包括 23 次插入56 次删除
  1. 0 13
      .travis.yml
  2. 1 1
      examples/proxytest.py
  3. 8 0
      proxypool/crawler.py
  4. 7 3
      proxypool/db.py
  5. 7 4
      proxypool/setting.py
  6. 0 1
      run.py
  7. 0 34
      setup.py

+ 0 - 13
.travis.yml

@@ -1,13 +0,0 @@
-language: python
-python:
-  - "3.5"
-
-services:
-  - redis-server
-
-script: 
-  - python3 setup.py install
-  - cd tests
-  - python3 test_api.py
-  - python3 test_db.py
-  - python3 test_schedule.py

+ 1 - 1
examples/proxytest.py

@@ -1,7 +1,7 @@
 import requests
 from proxypool.setting import TEST_URL
 
-proxy = '49.86.62.22:808'
+proxy = '96.9.90.90:8080'
 
 proxies = {
     'http': 'http://' + proxy,

+ 8 - 0
proxypool/crawler.py

@@ -24,6 +24,14 @@ class Crawler(object, metaclass=ProxyMetaclass):
             proxies.append(proxy)
         return proxies
     
+    def crawl_daxiang(self):
+        url = 'http://vtp.daxiangdaili.com/ip/?tid=555546364094534&num=100'
+        html = get_page(url)
+        if html:
+            urls = html.split('\n')
+            for url in urls:
+                yield url
+    
     def crawl_kuaidaili(self):
         url = 'http://dev.kuaidaili.com/api/getproxy/?orderid=959961765125099&num=100&b_pcchrome=1&b_pcie=1&b_pcff=1&protocol=1&method=1&an_an=1&an_ha=1&quality=1&format=json&sep=2'
         html = get_page(url)

+ 7 - 3
proxypool/db.py

@@ -37,14 +37,18 @@ class RedisClient(object):
     
     def random(self):
         """
-        随机获取有效代理
+        随机获取有效代理,首先尝试获取最高分数代理,如果不存在,按照排名获取,否则异常
         :return:
         """
         result = self.db.zrangebyscore(REDIS_KEY, MAX_SCORE, MAX_SCORE)
         if len(result):
             return choice(result).decode('utf-8')
         else:
-            raise PoolEmptyError
+            result = self.db.zrevrange(REDIS_KEY, 0, 100)
+            if len(result):
+                return choice(result).decode('utf-8')
+            else:
+                raise PoolEmptyError
     
     def decrease(self, proxy):
         """
@@ -90,7 +94,7 @@ class RedisClient(object):
         """
         all = self.db.zrangebyscore(REDIS_KEY, MIN_SCORE, MAX_SCORE)
         return [item.decode('utf-8') for item in all]
-    
+
 
 if __name__ == '__main__':
     conn = RedisClient()

+ 7 - 4
proxypool/setting.py

@@ -1,9 +1,12 @@
-# Redis数据库的地址和端口
-REDIS_HOST = 'localhost'
+# Redis数据库地址
+REDIS_HOST = 'DataCrawl-Pool.redis.cache.chinacloudapi.cn'
+
+# Redis端口
 REDIS_PORT = 6379
 
-# 如果Redis有密码,则添加这句密码,否则设置为None
-REDIS_PASSWORD = 'foobared'
+# Redis密码,如无填None
+REDIS_PASSWORD = None
+
 REDIS_KEY = 'proxies'
 
 # 代理分数

+ 0 - 1
run.py

@@ -1,4 +1,3 @@
-from proxypool.api import app
 from proxypool.scheduler import Scheduler
 
 

+ 0 - 34
setup.py

@@ -1,34 +0,0 @@
-from setuptools import setup
-
-setup(
-    name='proxypool',
-    version='1.0.0',
-    description='High performance proxy pool',
-    long_description='A proxy pool project modified from WiseDoge/ProxyPool',
-    author=['Germey', 'WiseDoge'],
-    author_email='[email protected]',
-    url='https://github.com/Germey/ProxyPool',
-    packages=[
-        'proxypool'
-    ],
-    py_modules=['run'],
-    include_package_data=True,
-    platforms='any',
-    install_requires=[
-        'aiohttp',
-        'requests',
-        'flask',
-        'redis',
-        'pyquery'
-    ],
-    entry_points={
-        'console_scripts': ['proxy_pool_run=run:main']
-    },
-    license='apache 2.0',
-    zip_safe=False,
-    classifiers=[
-        'Environment :: Console',
-        'Programming Language :: Python :: 3.5',
-        'Programming Language :: Python :: Implementation :: CPython'
-    ]
-)