Germey 3 vuotta sitten
vanhempi
sitoutus
598d02dcf8
3 muutettua tiedostoa jossa 14 lisäystä ja 4 poistoa
  1. 1 1
      Dockerfile
  2. 8 0
      kubernetes/templates/proxypool-deployment.yaml
  3. 5 3
      proxypool/crawlers/base.py

+ 1 - 1
Dockerfile

@@ -1,4 +1,4 @@
-FROM nginx:alpine
+FROM alpine:3.7
 WORKDIR /app
 WORKDIR /app
 RUN apk add --no-cache --virtual .build-deps g++ python3-dev libffi-dev \
 RUN apk add --no-cache --virtual .build-deps g++ python3-dev libffi-dev \
     openssl-dev libxml2-dev libxslt-dev gcc musl-dev py3-pip && \
     openssl-dev libxml2-dev libxslt-dev gcc musl-dev py3-pip && \

+ 8 - 0
kubernetes/templates/proxypool-deployment.yaml

@@ -23,6 +23,14 @@ spec:
             - containerPort: 5555
             - containerPort: 5555
               protocol: TCP
               protocol: TCP
           imagePullPolicy: {{ .Values.deployment.imagePullPolicy }}
           imagePullPolicy: {{ .Values.deployment.imagePullPolicy }}
+          livenessProbe:
+            httpGet:
+              path: /random
+              port: 5555
+            initialDelaySeconds: 60
+            periodSeconds: 5
+            failureThreshold: 5
+            timeoutSeconds: 10
           resources:
           resources:
             {{- toYaml .Values.deployment.resources | nindent 12 }}
             {{- toYaml .Values.deployment.resources | nindent 12 }}
           env:
           env:

+ 5 - 3
proxypool/crawlers/base.py

@@ -4,9 +4,11 @@ from loguru import logger
 from proxypool.setting import GET_TIMEOUT
 from proxypool.setting import GET_TIMEOUT
 from fake_headers import Headers
 from fake_headers import Headers
 import time
 import time
+
+
 class BaseCrawler(object):
 class BaseCrawler(object):
     urls = []
     urls = []
-    
+
     @retry(stop_max_attempt_number=3, retry_on_result=lambda x: x is None, wait_fixed=2000)
     @retry(stop_max_attempt_number=3, retry_on_result=lambda x: x is None, wait_fixed=2000)
     def fetch(self, url, **kwargs):
     def fetch(self, url, **kwargs):
         try:
         try:
@@ -14,13 +16,13 @@ class BaseCrawler(object):
             kwargs.setdefault('timeout', GET_TIMEOUT)
             kwargs.setdefault('timeout', GET_TIMEOUT)
             kwargs.setdefault('verify', False)
             kwargs.setdefault('verify', False)
             kwargs.setdefault('headers', headers)
             kwargs.setdefault('headers', headers)
-            response = requests.get(url ,**kwargs)
+            response = requests.get(url, **kwargs)
             if response.status_code == 200:
             if response.status_code == 200:
                 response.encoding = 'utf-8'
                 response.encoding = 'utf-8'
                 return response.text
                 return response.text
         except requests.ConnectionError:
         except requests.ConnectionError:
             return
             return
-    
+
     @logger.catch
     @logger.catch
     def crawl(self):
     def crawl(self):
         """
         """