package proxy import ( "context" "math/rand" "net" "runtime" "sync" "time" "github.com/Neur0toxine/wa-profile-api/internal/log" "github.com/gocolly/colly" ) const proxiesSource = "https://free-proxy-list.net" type RandomListProvider struct { r *rand.Rand proxies []string ctx context.Context cancel func() lock sync.RWMutex } func NewRandomListProvider() Provider { ctx, cancel := context.WithCancel(context.Background()) pr := &RandomListProvider{ r: rand.New(rand.NewSource(time.Now().UnixNano())), ctx: ctx, cancel: cancel, } go pr.refreshRegular() runtime.SetFinalizer(pr, randomListProviderDestructor) return pr } func randomListProviderDestructor(pr *RandomListProvider) { pr.cancel() } func (pr *RandomListProvider) Provide() string { defer pr.lock.RUnlock() pr.lock.RLock() if len(pr.proxies) == 0 { log.Debug("empty proxy list, waiting for 0.5s to populate...") time.Sleep(time.Millisecond * 500) if len(pr.proxies) == 0 { log.Debug("no proxies found, direct will be used.") return "" } } proxyAddress := pr.proxies[pr.r.Intn(len(pr.proxies)-1)] log.Debug("providing proxy", proxyAddress) return proxyAddress } func (pr *RandomListProvider) Bad(addr string) { defer pr.lock.Unlock() pr.lock.Lock() for i, val := range pr.proxies { if val == addr { pr.proxies[i] = pr.proxies[len(pr.proxies)-1] pr.proxies = pr.proxies[:len(pr.proxies)-1] break } } } func (pr *RandomListProvider) refreshRegular() { for { select { case <-pr.ctx.Done(): return default: pr.refresh() time.Sleep(time.Hour) } } } func (pr *RandomListProvider) refresh() { log.Debug("preparing to download fresh proxies") defer pr.lock.Unlock() pr.lock.Lock() proxies := pr.loadProxies() if len(proxies) > 0 { pr.proxies = proxies } log.Debug("downloaded", len(proxies), "proxies") } func (pr *RandomListProvider) loadProxies() []string { proxies := []string{} c := colly.NewCollector(colly.AllowURLRevisit()) c.OnRequest(func(r *colly.Request) { r.Headers.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36") log.Debug("proxy crawler: visiting", r.URL) }) c.OnHTML("div.table-responsive.fpl-list > table > tbody", func(tbody *colly.HTMLElement) { tbody.ForEach("tr", func(_ int, el *colly.HTMLElement) { host, port := el.ChildText("td:nth-child(1)"), el.ChildText("td:nth-child(2)") if port == "80" || port == "8080" { proxies = append(proxies, "http://"+net.JoinHostPort(host, port)) } }) }) c.Visit(proxiesSource) return proxies }