| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889 |
- package getter
- import (
- "io/ioutil"
- "regexp"
- "sync"
- "github.com/zu1k/proxypool/pkg/proxy"
- "github.com/zu1k/proxypool/pkg/tool"
- )
- func init() {
- Register("webfuzzsub", NewWebFuzzSubGetter)
- }
- type WebFuzzSub struct {
- Url string
- }
- func (w *WebFuzzSub) Get() proxy.ProxyList {
- resp, err := tool.GetHttpClient().Get(w.Url)
- if err != nil {
- return nil
- }
- defer resp.Body.Close()
- body, err := ioutil.ReadAll(resp.Body)
- if err != nil {
- return nil
- }
- text := string(body)
- subUrls := urlRe.FindAllString(text, -1)
- result := make(proxy.ProxyList, 0)
- for _, url := range subUrls {
- result = append(result, (&Subscribe{Url: url}).Get()...)
- }
- return result
- }
- func (w *WebFuzzSub) Get2Chan(pc chan proxy.Proxy, wg *sync.WaitGroup) {
- defer wg.Done()
- nodes := w.Get()
- for _, node := range nodes {
- pc <- node
- }
- }
- func NewWebFuzzSubGetter(options tool.Options) (getter Getter, err error) {
- urlInterface, found := options["url"]
- if found {
- url, err := AssertTypeStringNotNull(urlInterface)
- if err != nil {
- return nil, err
- }
- return &WebFuzzSub{Url: url}, nil
- }
- return nil, ErrorUrlNotFound
- }
- var urlRe = regexp.MustCompile(urlPattern)
- const (
- // 匹配 IP4
- ip4Pattern = `((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)`
- // 匹配 IP6,参考以下网页内容:
- // http://blog.csdn.net/jiangfeng08/article/details/7642018
- ip6Pattern = `(([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|` +
- `(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|` +
- `(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|` +
- `(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|` +
- `(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|` +
- `(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|` +
- `(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|` +
- `(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))`
- // 同时匹配 IP4 和 IP6
- ipPattern = "(" + ip4Pattern + ")|(" + ip6Pattern + ")"
- // 匹配域名
- domainPattern = `[a-zA-Z0-9][a-zA-Z0-9_-]{0,62}(\.[a-zA-Z0-9][a-zA-Z0-9_-]{0,62})*(\.[a-zA-Z][a-zA-Z0-9]{0,10}){1}`
- // 匹配 URL
- urlPattern = `((https|http)?://)?` + // 协议
- `(([0-9a-zA-Z]+:)?[0-9a-zA-Z_-]+@)?` + // pwd:user@
- "(" + ipPattern + "|(" + domainPattern + "))" + // IP 或域名
- `(:\d{1,5})?` + // 端口
- `(/+[a-zA-Z0-9][a-zA-Z0-9_.-]*)*/*` + // path
- `(\?([a-zA-Z0-9_-]+(=.*&?)*)*)*` // query
- )
|