113 lines
2.6 KiB
Go
113 lines
2.6 KiB
Go
|
package proxy
|
||
|
|
||
|
import (
|
||
|
"context"
|
||
|
"math/rand"
|
||
|
"net"
|
||
|
"runtime"
|
||
|
"sync"
|
||
|
"time"
|
||
|
|
||
|
"github.com/Neur0toxine/wa-profile-api/internal/log"
|
||
|
"github.com/gocolly/colly"
|
||
|
)
|
||
|
|
||
|
const proxiesSource = "https://free-proxy-list.net"
|
||
|
|
||
|
type RandomListProvider struct {
|
||
|
r *rand.Rand
|
||
|
proxies []string
|
||
|
ctx context.Context
|
||
|
cancel func()
|
||
|
lock sync.RWMutex
|
||
|
}
|
||
|
|
||
|
func NewRandomListProvider() Provider {
|
||
|
ctx, cancel := context.WithCancel(context.Background())
|
||
|
pr := &RandomListProvider{
|
||
|
r: rand.New(rand.NewSource(time.Now().UnixNano())),
|
||
|
ctx: ctx,
|
||
|
cancel: cancel,
|
||
|
}
|
||
|
go pr.refreshRegular()
|
||
|
runtime.SetFinalizer(pr, randomListProviderDestructor)
|
||
|
return pr
|
||
|
}
|
||
|
|
||
|
func randomListProviderDestructor(pr *RandomListProvider) {
|
||
|
pr.cancel()
|
||
|
}
|
||
|
|
||
|
func (pr *RandomListProvider) Provide() string {
|
||
|
defer pr.lock.RUnlock()
|
||
|
pr.lock.RLock()
|
||
|
if len(pr.proxies) == 0 {
|
||
|
log.Debug("empty proxy list, waiting for 0.5s to populate...")
|
||
|
time.Sleep(time.Millisecond * 500)
|
||
|
if len(pr.proxies) == 0 {
|
||
|
log.Debug("no proxies found, direct will be used.")
|
||
|
return ""
|
||
|
}
|
||
|
}
|
||
|
|
||
|
proxyAddress := pr.proxies[pr.r.Intn(len(pr.proxies)-1)]
|
||
|
log.Debug("providing proxy", proxyAddress)
|
||
|
return proxyAddress
|
||
|
}
|
||
|
|
||
|
func (pr *RandomListProvider) Bad(addr string) {
|
||
|
defer pr.lock.Unlock()
|
||
|
pr.lock.Lock()
|
||
|
for i, val := range pr.proxies {
|
||
|
if val == addr {
|
||
|
pr.proxies[i] = pr.proxies[len(pr.proxies)-1]
|
||
|
pr.proxies = pr.proxies[:len(pr.proxies)-1]
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (pr *RandomListProvider) refreshRegular() {
|
||
|
for {
|
||
|
select {
|
||
|
case <-pr.ctx.Done():
|
||
|
return
|
||
|
default:
|
||
|
pr.refresh()
|
||
|
time.Sleep(time.Hour)
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (pr *RandomListProvider) refresh() {
|
||
|
log.Debug("preparing to download fresh proxies")
|
||
|
defer pr.lock.Unlock()
|
||
|
pr.lock.Lock()
|
||
|
|
||
|
proxies := pr.loadProxies()
|
||
|
if len(proxies) > 0 {
|
||
|
pr.proxies = proxies
|
||
|
}
|
||
|
|
||
|
log.Debug("downloaded", len(proxies), "proxies")
|
||
|
}
|
||
|
|
||
|
func (pr *RandomListProvider) loadProxies() []string {
|
||
|
proxies := []string{}
|
||
|
c := colly.NewCollector(colly.AllowURLRevisit())
|
||
|
c.OnRequest(func(r *colly.Request) {
|
||
|
r.Headers.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36")
|
||
|
log.Debug("proxy crawler: visiting", r.URL)
|
||
|
})
|
||
|
c.OnHTML("div.table-responsive.fpl-list > table > tbody", func(tbody *colly.HTMLElement) {
|
||
|
tbody.ForEach("tr", func(_ int, el *colly.HTMLElement) {
|
||
|
host, port := el.ChildText("td:nth-child(1)"), el.ChildText("td:nth-child(2)")
|
||
|
if port == "80" || port == "8080" {
|
||
|
proxies = append(proxies, "http://"+net.JoinHostPort(host, port))
|
||
|
}
|
||
|
})
|
||
|
})
|
||
|
c.Visit(proxiesSource)
|
||
|
return proxies
|
||
|
}
|