| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081 |
- package getter
- import (
- "fmt"
- "sync"
- "github.com/gocolly/colly"
- "github.com/zu1k/proxypool/proxy"
- "github.com/zu1k/proxypool/tool"
- )
- func init() {
- Register("web-fanqiangdang", NewWebFanqiangdangGetter)
- }
- type WebFanqiangdang struct {
- c *colly.Collector
- NumNeeded int
- Url string
- results []string
- }
- func NewWebFanqiangdangGetter(options tool.Options) (getter Getter, err error) {
- num, found := options["num"]
- t := 200
- switch num.(type) {
- case int:
- t = num.(int)
- case float64:
- t = int(num.(float64))
- }
- if !found || t <= 0 {
- t = 200
- }
- urlInterface, found := options["url"]
- if found {
- url, err := AssertTypeStringNotNull(urlInterface)
- if err != nil {
- return nil, err
- }
- return &WebFanqiangdang{
- c: colly.NewCollector(),
- NumNeeded: t,
- Url: url,
- }, nil
- }
- return nil, ErrorUrlNotFound
- }
- func (w *WebFanqiangdang) Get() []proxy.Proxy {
- w.results = make([]string, 0)
- // 找到所有的文字消息
- w.c.OnHTML("td.t_f", func(e *colly.HTMLElement) {
- w.results = append(w.results, GrepLinksFromString(e.Text)...)
- })
- // 从订阅中取出每一页,因为是订阅,所以都比较新
- w.c.OnXML("//item//link", func(e *colly.XMLElement) {
- if len(w.results) < w.NumNeeded {
- _ = e.Request.Visit(e.Text)
- }
- })
- w.results = make([]string, 0)
- err := w.c.Visit(w.Url)
- if err != nil {
- _ = fmt.Errorf("%s", err.Error())
- }
- return StringArray2ProxyArray(w.results)
- }
- func (w *WebFanqiangdang) Get2Chan(pc chan proxy.Proxy, wg *sync.WaitGroup) {
- defer wg.Done()
- nodes := w.Get()
- for _, node := range nodes {
- pc <- node
- }
- }
|