web_fanqiangdang.go 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. package getter
  2. import (
  3. "fmt"
  4. "sync"
  5. "github.com/gocolly/colly"
  6. "github.com/zu1k/proxypool/proxy"
  7. "github.com/zu1k/proxypool/tool"
  8. )
  9. func init() {
  10. Register("web-fanqiangdang", NewWebFanqiangdangGetter)
  11. }
  12. type WebFanqiangdang struct {
  13. c *colly.Collector
  14. NumNeeded int
  15. Url string
  16. results []string
  17. }
  18. func NewWebFanqiangdangGetter(options tool.Options) (getter Getter, err error) {
  19. num, found := options["num"]
  20. t := 200
  21. switch num.(type) {
  22. case int:
  23. t = num.(int)
  24. case float64:
  25. t = int(num.(float64))
  26. }
  27. if !found || t <= 0 {
  28. t = 200
  29. }
  30. urlInterface, found := options["url"]
  31. if found {
  32. url, err := AssertTypeStringNotNull(urlInterface)
  33. if err != nil {
  34. return nil, err
  35. }
  36. return &WebFanqiangdang{
  37. c: colly.NewCollector(),
  38. NumNeeded: t,
  39. Url: url,
  40. }, nil
  41. }
  42. return nil, ErrorUrlNotFound
  43. }
  44. func (w *WebFanqiangdang) Get() []proxy.Proxy {
  45. w.results = make([]string, 0)
  46. // 找到所有的文字消息
  47. w.c.OnHTML("td.t_f", func(e *colly.HTMLElement) {
  48. w.results = append(w.results, GrepLinksFromString(e.Text)...)
  49. })
  50. // 从订阅中取出每一页,因为是订阅,所以都比较新
  51. w.c.OnXML("//item//link", func(e *colly.XMLElement) {
  52. if len(w.results) < w.NumNeeded {
  53. _ = e.Request.Visit(e.Text)
  54. }
  55. })
  56. w.results = make([]string, 0)
  57. err := w.c.Visit(w.Url)
  58. if err != nil {
  59. _ = fmt.Errorf("%s", err.Error())
  60. }
  61. return StringArray2ProxyArray(w.results)
  62. }
  63. func (w *WebFanqiangdang) Get2Chan(pc chan proxy.Proxy, wg *sync.WaitGroup) {
  64. defer wg.Done()
  65. nodes := w.Get()
  66. for _, node := range nodes {
  67. pc <- node
  68. }
  69. }