Apq
/
Sansui233_proxypool
mirror of https://github.com/Sansui233/proxypool.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
							package getter

import (
	"fmt"
	"sync"

	"github.com/gocolly/colly"
	"github.com/zu1k/proxypool/proxy"
	"github.com/zu1k/proxypool/tool"
)

func init() {
	Register("web-fanqiangdang", NewWebFanqiangdangGetter)
}

type WebFanqiangdang struct {
	c         *colly.Collector
	NumNeeded int
	Url       string
	results   []string
}

func NewWebFanqiangdangGetter(options tool.Options) (getter Getter, err error) {
	num, found := options["num"]

	t := 200
	switch num.(type) {
	case int:
		t = num.(int)
	case float64:
		t = int(num.(float64))
	}

	if !found || t <= 0 {
		t = 200
	}
	urlInterface, found := options["url"]
	if found {
		url, err := AssertTypeStringNotNull(urlInterface)
		if err != nil {
			return nil, err
		}
		return &WebFanqiangdang{
			c:         colly.NewCollector(),
			NumNeeded: t,
			Url:       url,
		}, nil
	}
	return nil, ErrorUrlNotFound
}

func (w *WebFanqiangdang) Get() []proxy.Proxy {
	w.results = make([]string, 0)
	// 找到所有的文字消息
	w.c.OnHTML("td.t_f", func(e *colly.HTMLElement) {
		w.results = append(w.results, GrepLinksFromString(e.Text)...)
	})

	// 从订阅中取出每一页，因为是订阅，所以都比较新
	w.c.OnXML("//item//link", func(e *colly.XMLElement) {
		if len(w.results) < w.NumNeeded {
			_ = e.Request.Visit(e.Text)
		}
	})

	w.results = make([]string, 0)
	err := w.c.Visit(w.Url)
	if err != nil {
		_ = fmt.Errorf("%s", err.Error())
	}

	return StringArray2ProxyArray(w.results)
}

func (w *WebFanqiangdang) Get2Chan(pc chan proxy.Proxy, wg *sync.WaitGroup) {
	defer wg.Done()
	nodes := w.Get()
	for _, node := range nodes {
		pc <- node
	}
}