2 years ago · 2344ee1db0
--- a/proxypool/crawlers/public/docip.py
+++ b/proxypool/crawlers/public/docip.py
@@ -0,0 +1,38 @@
 
				+import time
			
 
				+from retrying import RetryError
			
 
				+from loguru import logger
			
 
				+from proxypool.schemas.proxy import Proxy
			
 
				+from proxypool.crawlers.base import BaseCrawler
			
 
				+import json
			
 
				+
			
 
				+BASE_URL = 'https://www.docip.net/data/free.json?t={date}'
			
 
				+
			
 
				+
			
 
				+
			
 
				+class DocipCrawler(BaseCrawler):
			
 
				+    """
			
 
				+    Docip crawler, https://www.docip.net/data/free.json
			
 
				+    """
			
 
				+    urls = [BASE_URL.format(date=time.strftime("%Y%m%d", time.localtime()))]
			
 
				+
			
 
				+    def parse(self, html):
			
 
				+        """
			
 
				+        parse html file to get proxies
			
 
				+        :return:
			
 
				+        """
			
 
				+        try:
			
 
				+            result = json.loads(html)
			
 
				+            proxy_list = result['data']
			
 
				+            for proxy_item in proxy_list:
			
 
				+                host = proxy_item['ip']
			
 
				+                port = proxy_item['port']
			
 
				+                yield Proxy(host=host, port=port)
			
 
				+        except json.JSONDecodeError:
			
 
				+            print("json.JSONDecodeError")
			
 
				+            return
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    crawler = DocipCrawler()
			
 
				+    for proxy in crawler.crawl():
			
 
				+        print(proxy)
			
--- a/proxypool/crawlers/public/uqidata.py
+++ b/proxypool/crawlers/public/uqidata.py
@@ -0,0 +1,49 @@
 
				+from pyquery import PyQuery as pq
			
 
				+from proxypool.schemas.proxy import Proxy
			
 
				+from proxypool.crawlers.base import BaseCrawler
			
 
				+from loguru import logger
			
 
				+
			
 
				+BASE_URL = 'https://ip.uqidata.com/free/index.html'
			
 
				+
			
 
				+
			
 
				+class UqidataCrawler(BaseCrawler):
			
 
				+    """
			
 
				+    Uqidata crawler, https://ip.uqidata.com/free/index.html
			
 
				+    """
			
 
				+    urls = [BASE_URL]
			
 
				+    ignore = True
			
 
				+
			
 
				+    def encode(input_str):
			
 
				+        tmp = []
			
 
				+        for i in range(len(input_str)):
			
 
				+            tmp.append("ABCDEFGHIZ".find(input_str[i]))
			
 
				+        result = "".join(str(i) for i in tmp)
			
 
				+        result = int(result) >> 0x03
			
 
				+        return result
			
 
				+
			
 
				+    def parse(self, html):
			
 
				+        """
			
 
				+        parse html file to get proxies
			
 
				+        :return:
			
 
				+        """
			
 
				+        doc = pq(html)
			
 
				+        trs = doc('#main_container .inner table tbody tr:nth-child(n+3)').items()
			
 
				+        for tr in trs:
			
 
				+            ip_html = tr('td.ip').find("*").items()
			
 
				+            host = ''
			
 
				+            for i in ip_html:
			
 
				+                if i.attr('style') is not None and 'none' in i.attr('style'):
			
 
				+                    continue
			
 
				+                if i.text() == '':
			
 
				+                    continue
			
 
				+                host += i.text()
			
 
				+
			
 
				+            port_code = tr('td.port').attr('class').split(' ')[1]
			
 
				+            port = UqidataCrawler.encode(port_code)
			
 
				+            yield Proxy(host=host, port=port)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    crawler = UqidataCrawler()
			
 
				+    for proxy in crawler.crawl():
			
 
				+        print(proxy)