web_fuzz_sub.go 3.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. package getter
  2. import (
  3. "io/ioutil"
  4. "regexp"
  5. "sync"
  6. "github.com/zu1k/proxypool/pkg/proxy"
  7. "github.com/zu1k/proxypool/pkg/tool"
  8. )
  9. func init() {
  10. Register("webfuzzsub", NewWebFuzzSubGetter)
  11. }
  12. type WebFuzzSub struct {
  13. Url string
  14. }
  15. func (w *WebFuzzSub) Get() proxy.ProxyList {
  16. resp, err := tool.GetHttpClient().Get(w.Url)
  17. if err != nil {
  18. return nil
  19. }
  20. defer resp.Body.Close()
  21. body, err := ioutil.ReadAll(resp.Body)
  22. if err != nil {
  23. return nil
  24. }
  25. text := string(body)
  26. subUrls := urlRe.FindAllString(text, -1)
  27. result := make(proxy.ProxyList, 0)
  28. for _, url := range subUrls {
  29. result = append(result, (&Subscribe{Url: url}).Get()...)
  30. }
  31. return result
  32. }
  33. func (w *WebFuzzSub) Get2Chan(pc chan proxy.Proxy, wg *sync.WaitGroup) {
  34. defer wg.Done()
  35. nodes := w.Get()
  36. for _, node := range nodes {
  37. pc <- node
  38. }
  39. }
  40. func NewWebFuzzSubGetter(options tool.Options) (getter Getter, err error) {
  41. urlInterface, found := options["url"]
  42. if found {
  43. url, err := AssertTypeStringNotNull(urlInterface)
  44. if err != nil {
  45. return nil, err
  46. }
  47. return &WebFuzzSub{Url: url}, nil
  48. }
  49. return nil, ErrorUrlNotFound
  50. }
  51. var urlRe = regexp.MustCompile(urlPattern)
  52. const (
  53. // 匹配 IP4
  54. ip4Pattern = `((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)`
  55. // 匹配 IP6,参考以下网页内容:
  56. // http://blog.csdn.net/jiangfeng08/article/details/7642018
  57. ip6Pattern = `(([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|` +
  58. `(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|` +
  59. `(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|` +
  60. `(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|` +
  61. `(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|` +
  62. `(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|` +
  63. `(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|` +
  64. `(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))`
  65. // 同时匹配 IP4 和 IP6
  66. ipPattern = "(" + ip4Pattern + ")|(" + ip6Pattern + ")"
  67. // 匹配域名
  68. domainPattern = `[a-zA-Z0-9][a-zA-Z0-9_-]{0,62}(\.[a-zA-Z0-9][a-zA-Z0-9_-]{0,62})*(\.[a-zA-Z][a-zA-Z0-9]{0,10}){1}`
  69. // 匹配 URL
  70. urlPattern = `((https|http)?://)?` + // 协议
  71. `(([0-9a-zA-Z]+:)?[0-9a-zA-Z_-]+@)?` + // pwd:user@
  72. "(" + ipPattern + "|(" + domainPattern + "))" + // IP 或域名
  73. `(:\d{1,5})?` + // 端口
  74. `(/+[a-zA-Z0-9][a-zA-Z0-9_.-]*)*/*` + // path
  75. `(\?([a-zA-Z0-9_-]+(=.*&?)*)*)*` // query
  76. )