wa-profile-api/internal/proxy/random_list_provider.go

113 lines
2.6 KiB
Go
Raw Permalink Normal View History

2023-10-13 14:37:41 +03:00
package proxy
import (
"context"
"math/rand"
"net"
"runtime"
"sync"
"time"
"github.com/Neur0toxine/wa-profile-api/internal/log"
"github.com/gocolly/colly"
)
const proxiesSource = "https://free-proxy-list.net"
type RandomListProvider struct {
r *rand.Rand
proxies []string
ctx context.Context
cancel func()
lock sync.RWMutex
}
func NewRandomListProvider() Provider {
ctx, cancel := context.WithCancel(context.Background())
pr := &RandomListProvider{
r: rand.New(rand.NewSource(time.Now().UnixNano())),
ctx: ctx,
cancel: cancel,
}
go pr.refreshRegular()
runtime.SetFinalizer(pr, randomListProviderDestructor)
return pr
}
func randomListProviderDestructor(pr *RandomListProvider) {
pr.cancel()
}
func (pr *RandomListProvider) Provide() string {
defer pr.lock.RUnlock()
pr.lock.RLock()
if len(pr.proxies) == 0 {
log.Debug("empty proxy list, waiting for 0.5s to populate...")
time.Sleep(time.Millisecond * 500)
if len(pr.proxies) == 0 {
log.Debug("no proxies found, direct will be used.")
return ""
}
}
proxyAddress := pr.proxies[pr.r.Intn(len(pr.proxies)-1)]
log.Debug("providing proxy", proxyAddress)
return proxyAddress
}
func (pr *RandomListProvider) Bad(addr string) {
defer pr.lock.Unlock()
pr.lock.Lock()
for i, val := range pr.proxies {
if val == addr {
pr.proxies[i] = pr.proxies[len(pr.proxies)-1]
pr.proxies = pr.proxies[:len(pr.proxies)-1]
break
}
}
}
func (pr *RandomListProvider) refreshRegular() {
for {
select {
case <-pr.ctx.Done():
return
default:
pr.refresh()
time.Sleep(time.Hour)
}
}
}
func (pr *RandomListProvider) refresh() {
log.Debug("preparing to download fresh proxies")
defer pr.lock.Unlock()
pr.lock.Lock()
proxies := pr.loadProxies()
if len(proxies) > 0 {
pr.proxies = proxies
}
log.Debug("downloaded", len(proxies), "proxies")
}
func (pr *RandomListProvider) loadProxies() []string {
proxies := []string{}
c := colly.NewCollector(colly.AllowURLRevisit())
c.OnRequest(func(r *colly.Request) {
r.Headers.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36")
log.Debug("proxy crawler: visiting", r.URL)
})
c.OnHTML("div.table-responsive.fpl-list > table > tbody", func(tbody *colly.HTMLElement) {
tbody.ForEach("tr", func(_ int, el *colly.HTMLElement) {
host, port := el.ChildText("td:nth-child(1)"), el.ChildText("td:nth-child(2)")
if port == "80" || port == "8080" {
proxies = append(proxies, "http://"+net.JoinHostPort(host, port))
}
})
})
c.Visit(proxiesSource)
return proxies
}