Initial commit

This commit is contained in:
ValdikSS 2019-12-01 04:58:25 +03:00
commit cb1a2f53ff
22 changed files with 609 additions and 0 deletions

30
README.md Normal file
View File

@ -0,0 +1,30 @@
Russian PAC file generator, light version
=========================================
Генератор PAC-файла сервиса [АнтиЗапрет](https://antizapret.prostovpn.org/).
Данный набор скриптов создаёт файл [автоконфигурации прокси](https://en.wikipedia.org/wiki/Proxy_auto-config) со списком сайтов, заблокированных на территории Российской Федерации Роскомнадзором и другими государственными органами, который можно использовать в браузерах, для автоматического проксирования заблокированных ресурсов.
Помимо основного назнчения скрипта (генерации PAC-файла), он также умеет создавать:
* Файл клиентской конфигурации (client-config, CCD) с заблокированными диапазонами IP-адресов для OpenVPN;
* Файл с заблокированными доменными зонами для Squid;
* Файл с заблокированными доменными зонами в LUA-переменной, для использования с DNS-резолвером knot-resolver.
### Зависимости
* Bash
* GNU coreutils
* GNU AWK (gawk)
* sipcalc
### Конфигурационные файлы
* **{in,ex}clude-{hosts,ips}-dist** — конфигурация дистрибутива, предназначена для изменения автором репозитория;
* **{in,ex}clude-{hosts,ips}-custom** — пользовательская конфигурация, предназначена для изменения конечным пользователем скрипта;
* **exclude-regexp-dist.awk** — файл с различным заблокированным «мусором», раздувающим PAC-файл: зеркалами сайтов, неработающими сайтами, и т.д.
* **config.sh** — файл с адресами прокси.
### Установка и запуск
Склонируйте git-репозиторий, отредактируйте **doall.sh** и **process.sh** под собственные нужды, запустите **doall.sh**.

13
config/config.sh Executable file
View File

@ -0,0 +1,13 @@
#!/bin/bash
# HTTPS (TLS) proxy address
PACHTTPSHOST='proxy-ssl.antizapret.prostovpn.org:3143'
# Usual proxy address
PACPROXYHOST='proxy-nossl.antizapret.prostovpn.org:29976'
# Special proxy address for ranges
PACPROXYSPECIAL='CCAHIHA.antizapret.prostovpn.org:3128'
PACFILE="result/proxy-host-ssl.pac"
PACFILE_NOSSL="result/proxy-host-nossl.pac"

View File

@ -0,0 +1 @@

View File

@ -0,0 +1,3 @@
youtube.com
googleusercontent.com
pornhub.com

View File

@ -0,0 +1 @@

View File

@ -0,0 +1 @@

View File

@ -0,0 +1,156 @@
(/duckdns/) {next}
(/linode\.com/) {next}
(/upcloud\.com/) {next}
(/googleusercontent\.com/) {next}
(/\.sl\.pt/) {next}
(/\.biz\.ski/) {next}
(/\.sloat\.biz/) {next}
(/\.new-rutor\.org/) {next}
(/\.traderc\.biz/) {next}
(/\.o-q\.biz/) {next}
(/\.dcge\.biz/) {next}
(/fonbet/) {next}
(/betcity/) {next}
(/zerkalo/) {next}
(/zigzag/) {next}
(/zenitbet/) {next}
(/winline/) {next}
(/ttrcasino/) {next}
(/paripartners/) {next}
(/parimatch/) {next}
(/ligastavok/) {next}
(/liga-stavok/) {next}
(/baltplay/) {next}
(/azino777/) {next}
(/vulkan/) {next}
(/leonbet/) {next}
(/ru\.leon/) {next}
(/ru\.adleon/) {next}
(/leonaccess/) {next}
(/leon-[0-9]{3}/) {next}
(/pm-[0-9]{2,3}\./) {next}
(/mf-[0-9]{2,3}\.online/) {next}
(/fon-[0-9]{2,3}\./) {next}
(/most.{3}\./) {next}
(/casino\-/) {next}
(/bcity\-/) {next}
(/1x\-/) {next}
(/1xbet/) {next}
(/1xmob/) {next}
#(/melbet/) {next}
(/bk\-info/) {next}
(/bkinfo/) {next}
(/marathon/) {next}
(/gaminator/) {next}
(/joycasino/) {next}
(/goldenstar/) {next}
/marafon/ {next}
/olimp/ {next}
/kasino/ {next}
/depozit/ {next}
/kazino/ {next}
/777/ {next}
/casino/ {next}
/admiral/ {next}
/zerkala/ {next}
/avtomat/ {next}
/igrat/ {next}
/azart/ {next}
#/besplatno/ {next}
/sloty/ {next}
/bet-boom/ {next}
/betsbc/ {next}
/^bk-/ {next}
/^bkr/ {next}
/bkinf0/ {next}
/bukmeker/ {next}
/ruletka/ {next}
/vulcan/ {next}
/vylkan/ {next}
/wulcan/ {next}
/wulkan/ {next}
/vullkan/ {next}
/volcan/ {next}
/^vlc/ {next}
/^vlk/ {next}
/eldorado/ {next}
/lotto/ {next}
/lottery/ {next}
/fbmetrix/ {next}
/^diplom-/ {next}
/^dosug-/ {next}
/^dosug[0-9]{2}/ {next}
/fon-bet/ {next}
/^hydra[0-9]{2}/ {next}
/^intim[0-9]{2}/ {next}
/^livetv[0-9]{2}/ {next}
/marafon/ {next}
#/^melb/ {next}
/^melm/ {next}
/^mf-[0-9]{2}/ {next}
/^most/ {next}
#/^new-/ {next}
/^pari-/ {next}
/^pokerdom/ {next}
/prostitutki/ {next}
/spravka/ {next}
/mossst/ {next}
/mostbet/ {next}
/diplom/ {next}
/pharaon/ {next}
/fortuna/ {next}
/^rotate/ {next}
/^ref.{5}\./ {next}
/play\-/ {next}
/^1w.{3,4}\./ {next}
/^mylove[0-9]{2,3}\./ {next}
/^1x.{3,4}\./ {next}
/^mirror[0-9]{2,3}\./ {next}
/^mob.{3,4}\./ {next}
/^777/ {next}
/hydra/ {next}
/spravok/ {next}
/spravka/ {next}
/zenit/ {next}
/zakladki/ {next}
/vullcan/ {next}
/vulslots/ {next}
/vulwinners/ {next}
/slots/ {next}
/traffaccess/ {next}
/tide24/ {next}
/swleon/ {next}
/sportingbull/ {next}
/sokol-24/ {next}
/silmag/ {next}
/faraon/ {next}
/marbet/ {next}
/joycazino/ {next}
/joy-cazino/ {next}
/jackpot/ {next}
/semyanich/ {next}
/semenarnia/ {next}
/prostitutki/ {next}
/shishkin-semena/ {next}
/vulkanstavka/ {next}
/bukvaved/ {next}
/rastarasha/ {next}
/errors-seeds/ {next}
/casino-x/ {next}
/kinogb/ {next}
/vulkanstars/ {next}
/vulcanwin/ {next}
/vlk-slots/ {next}
/rutorg/ {next}
/leonbets/ {next}
/parimatch/ {next}
/azartplay/ {next}
/bbplay2017/ {next}
/baltplay2017/ {next}
/hiwager/ {next}
/seedbanda/ {next}
#/gidonline/ {next}
/^alco/ {next}
(/\.r\.cloudfront\.net/) {next}
(/\*/) {next}
(/\\/) {next}

View File

@ -0,0 +1 @@

View File

@ -0,0 +1,18 @@
lostfilm.tv
archive.org
licdn.com
new-team.org
koshara.co
fast-torrent.ru
pornreactor.cc
joyreactor.cc
free-rutor.org
t-ru.org
rutrk.org
blogspot.com
blogspot.ru
nnm-club.ws
e-hentai.org
deviantart.net
pravdabeslana.ru
applelife.ru

View File

@ -0,0 +1 @@

View File

@ -0,0 +1 @@

7
doall.sh Executable file
View File

@ -0,0 +1,7 @@
#!/bin/bash
HERE="$(dirname "$(readlink -f "${0}")")"
cd "$HERE"
./update.sh
./parse.sh
./process.sh

114
generate-pac.sh Executable file
View File

@ -0,0 +1,114 @@
#!/bin/bash
set -e
source config/config.sh
echo -n > "$PACFILE"
# .pac header
echo "// ProstoVPN.AntiZapret PAC-host File
// Generated on $(date)
// NOTE 1: Proxy.pac file content varies on User-Agent HTTP header.
// NOTE 2: Some badly behaving User-Agents are banned, they get empty response.
// NOTE 3: Do not request PAC file faster than once a minute, rate limiting is applied.
// NOTE 4: Do not use the proxy servers outside of this file.
" >> "$PACFILE"
awk -f scripts/generate-pac-domains.awk result/hostlist_zones.txt >> "$PACFILE"
# Collapse IP list
scripts/collapse_blockedbyip_noid2971.py
echo "// This variable now excludes IP addresses blocked by 27-31-2018/Id2971-18 (since 27.06.2019)" >> "$PACFILE"
sort -Vu temp/include-ips.txt result/iplist_blockedbyip_noid2971_collapsed.txt | \
grep -v -F -x -f temp/exclude-ips.txt | awk -f scripts/generate-pac-ipaddrs.awk >> "$PACFILE"
SPECIAL="$(cat result/iplist_special_range.txt | xargs -n1 sipcalc | \
awk 'BEGIN {notfirst=0} /Network address/ {n=$4} /Network mask \(bits\)/ {if (notfirst) {printf ","} printf "[\"%s\", %s]", n, $5; notfirst=1;}')"
echo "var special = [
$SPECIAL
];
var az_initialized = 0;
// CIDR to netmask, for special
function nmfc(b) {var m=[];for(var i=0;i<4;i++) {var n=Math.min(b,8); m.push(256-Math.pow(2, 8-n)); b-=n;} return m.join('.');}
function FindProxyForURL(url, host) {" >> "$PACFILE"
echo " if (domains.length < 10) return \"DIRECT\"; // list is broken
if (!('indexOf' in Array.prototype)) {
Array.prototype.indexOf= function(find, i /*opt*/) {
if (i===undefined) i= 0;
if (i<0) i+= this.length;
if (i<0) i= 0;
for (var n= this.length; i<n; i++)
if (i in this && this[i]===find)
return i;
return -1;
};
}
if (!az_initialized) {
var prev_ipval = 0;
var cur_ipval = 0;
for (var i = 0; i < d_ipaddr.length; i++) {
cur_ipval = parseInt(d_ipaddr[i], 36) + prev_ipval;
d_ipaddr[i] = cur_ipval;
prev_ipval = cur_ipval;
}
for (var i = 0; i < special.length; i++) {
special[i][1] = nmfc(special[i][1]);
}
for (var i in domains) {
for (var j in domains[i]) {
var regex = new RegExp('.{' + j.toString() + '}', 'g');
domains[i][j] = domains[i][j].match(regex);
}
}
az_initialized = 1;
}
var shost;
if (/\.(ru|co|cu|com|info|net|org|gov|edu|int|mil|biz|pp|ne|msk|spb|nnov|od|in|ho|cc|dn|i|tut|v|dp|sl|ddns|dyndns|livejournal|herokuapp|azurewebsites|cloudfront|ucoz|3dn|nov|linode|amazonaws|sl-reverse|kiev|beget|kirov|akadns|scaleway)\.[^.]+$/.test(host))
shost = host.replace(/(.+)\.([^.]+\.[^.]+\.[^.]+$)/, \"\$2\");
else
shost = host.replace(/(.+)\.([^.]+\.[^.]+$)/, \"\$2\");
var curdomain = shost.match(/(.*)\\.([^.]+\$)/);
if (!curdomain || !curdomain[2]) {return \"DIRECT\";}
var curhost = curdomain[1];
var curzone = curdomain[2];
var curarr = []; // dummy empty array
if (domains.hasOwnProperty(curzone) && domains[curzone].hasOwnProperty(curhost.length)) {
var curarr = domains[curzone][curhost.length];
}
var oip = dnsResolve(host);
var iphex = \"\";
if (oip) {
iphex = oip.toString().split(\".\");
iphex = parseInt(iphex[3]) + parseInt(iphex[2])*256 + parseInt(iphex[1])*65536 + parseInt(iphex[0])*16777216;
}
var yip = 0;
if (iphex && d_ipaddr.indexOf(iphex) !== -1) {yip = 1;}
if (yip === 1 || curarr.indexOf(curhost) !== -1) {
// WARNING! WARNING! WARNING!
// You should NOT use these proxy servers outside of PAC file!
// DO NOT enter it manually in any program!
// By doing this, you harm the service!" >> "$PACFILE"
cp "$PACFILE" "$PACFILE_NOSSL"
echo " return \"HTTPS ${PACHTTPSHOST}; PROXY ${PACPROXYHOST}; DIRECT\";" >> "$PACFILE"
echo " return \"PROXY ${PACPROXYHOST}; DIRECT\";" >> "$PACFILE_NOSSL"
echo " }
for (var i = 0; i < special.length; i++) {
if (isInNet(oip, special[i][0], special[i][1])) {return \"PROXY ${PACPROXYSPECIAL}; DIRECT\";}
}
return \"DIRECT\";
}" | tee -a "$PACFILE" "$PACFILE_NOSSL" >/dev/null

79
parse.sh Executable file
View File

@ -0,0 +1,79 @@
#!/bin/bash
set -e
HERE="$(dirname "$(readlink -f "${0}")")"
cd "$HERE"
# Extract domains from list
awk -F ';' '{print $2}' temp/list.csv | sort -u | awk '/^$/ {next} /\\/ {next} /^[a-zA-Z0-9\-\_\.\*]*+$/ {gsub(/\*\./, ""); gsub(/\.$/, ""); print}' | idn > result/hostlist_original.txt
# Generate zones from domains
# FIXME: nxdomain list parsing is disabled due to its instability on z-i
###cat exclude.txt temp/nxdomain.txt > temp/exclude.txt
sort -u config/exclude-hosts-{dist,custom}.txt > temp/exclude-hosts.txt
sort -u config/exclude-ips-{dist,custom}.txt > temp/exclude-ips.txt
sort -u config/include-hosts-{dist,custom}.txt > temp/include-hosts.txt
sort -u config/include-ips-{dist,custom}.txt > temp/include-ips.txt
sort -u temp/include-hosts.txt result/hostlist_original.txt > temp/hostlist_original_with_include.txt
awk -f scripts/getzones.awk temp/hostlist_original_with_include.txt | grep -v -F -x -f temp/exclude-hosts.txt | sort -u > result/hostlist_zones.txt
# Generate a list of IP addresses
awk -F';' '$1 ~ /\// {print $1}' temp/list.csv | grep -P '([0-9]{1,3}\.){3}[0-9]{1,3}\/[0-9]{1,2}' -o | sort -Vu > result/iplist_special_range.txt
awk -F ';' '($1 ~ /^([0-9]{1,3}\.){3}[0-9]{1,3}/) {gsub(/ \| /, RS, $1); print $1}' temp/list.csv | \
awk '/^([0-9]{1,3}\.){3}[0-9]{1,3}$/' | sort -u > result/iplist_all.txt
awk -F ';' '($1 ~ /^([0-9]{1,3}\.){3}[0-9]{1,3}/) && (($2 == "" && $3 == "") || ($1 == $2)) {gsub(/ \| /, RS); print $1}' temp/list.csv | \
awk '/^([0-9]{1,3}\.){3}[0-9]{1,3}$/' | sort -u > result/iplist_blockedbyip.txt
grep -F -v 'Ид2971-18' temp/list.csv | \
awk -F ';' '($1 ~ /^([0-9]{1,3}\.){3}[0-9]{1,3}/) && (($2 == "" && $3 == "") || ($1 == $2)) {gsub(/ \| /, RS); print $1}' | \
awk '/^([0-9]{1,3}\.){3}[0-9]{1,3}$/' | sort -u > result/iplist_blockedbyip_noid2971.txt
awk -F ';' '$1 ~ /\// {print $1}' temp/list.csv | egrep -o '([0-9]{1,3}\.){3}[0-9]{1,3}\/[0-9]{1,2}' | sort -u > result/blocked-ranges.txt
# Generate OpenVPN route file
echo -n > result/openvpn-blocked-ranges.txt
while read -r line
do
C_NET="$(echo $line | awk -F '/' '{print $1}')"
C_NETMASK="$(sipcalc -- "$line" | awk '/Network mask/ {print $4; exit;}')"
echo $"push \"route ${C_NET} ${C_NETMASK}\"" >> result/openvpn-blocked-ranges.txt
done < result/blocked-ranges.txt
# Generate dnsmasq aliases
echo -n > result/dnsmasq-aliases-alt.conf
while read -r line
do
echo "server=/$line/127.0.0.4" >> result/dnsmasq-aliases-alt.conf
done < result/hostlist_zones.txt
# Generate knot-resolver aliases
echo 'blocked_hosts = {' > result/knot-aliases-alt.conf
while read -r line
do
line="$line."
echo "${line@Q}," >> result/knot-aliases-alt.conf
done < result/hostlist_zones.txt
echo '}' >> result/knot-aliases-alt.conf
# Generate squid zone file
echo -n > result/squid-whitelist-zones.conf
while read -r line
do
echo ".$line" >> result/squid-whitelist-zones.conf
done < result/hostlist_zones.txt
# Print results
echo "Blocked domains: $(wc -l result/hostlist_zones.txt)" >&2
echo "iplist_all: $(wc -l result/iplist_all.txt)" >&2
echo "iplist_special_range: $(wc -l result/iplist_special_range.txt)" >&2
echo "iplist_blockedbyip: $(wc -l result/iplist_blockedbyip.txt)" >&2
echo "iplist_blockedbyip_noid2971: $(wc -l result/iplist_blockedbyip_noid2971.txt)" >&2

19
process.sh Executable file
View File

@ -0,0 +1,19 @@
#!/bin/bash
set -e
#cp result/dnsmasq-aliases-alt.conf /etc/dnsmasq.d/aliases-alt.conf
#service dnsmasq restart
#cp result/knot-aliases-alt.conf /etc/knot-resolver/knot-aliases-alt.conf
#systemctl restart kresd@1.service
#cp result/openvpn-blocked-ranges.txt /etc/openvpn/server/ccd/DEFAULT
#iptables -F azvpnwhitelist
#while read -r line
#do
# iptables -w -A azvpnwhitelist -d "$line" -j ACCEPT
#done < result/blocked-ranges.txt
#cp result/squid-whitelist-zones.conf /etc/squid/whitelistedhosts.txt
#cp result/iplist_all.txt /etc/squid/whitelistedips.txt
#systemctl reload squid || true

1
result/.gitkeep Normal file
View File

@ -0,0 +1 @@

View File

@ -0,0 +1,19 @@
#!/usr/bin/env python3
import ipaddress
addrlist = open('result/iplist_blockedbyip_noid2971.txt', 'r').read()
speciallist = open('result/iplist_special_range.txt', 'r').read()
nlist = [ipaddress.IPv4Network(addr) for addr in addrlist.split()]
slist = [ipaddress.IPv4Network(addr) for addr in speciallist.split()]
print('IP Addresses before collapsing:', len(nlist))
for i, v in enumerate(nlist):
if any([addr.overlaps(v) for addr in slist]):
del nlist[i]
print('IP Addresses after removing special ranges:', len(nlist))
collapsed_file_prefix = open('result/iplist_blockedbyip_noid2971_collapsed.txt', 'w')
cnt = 0
for addr in nlist:
print(str(addr).replace('/32', ''), file=collapsed_file_prefix)
cnt+=1

View File

@ -0,0 +1,40 @@
{
domainzone = gensub(/(.*)\.([^.]+$)/, "\\2", 1)
domainname = gensub(/(.*)\.([^.]+$)/, "\\1", 1)
domainlength = length(domainname)
domainarray[domainzone][domainlength][domainname] = domainname
#print "adding", $0, ":", domainzone, domainlength, domainname
}
function printarray(arrname, arr) {
firsttime_1 = 1
firsttime_2 = 1
print arrname, "= {"
for (domainzone in arr) {
if (firsttime_1 == 0) {printf ",\n"} firsttime_1 = 0;
print "\"" domainzone "\":{"
for (domainlength in arr[domainzone]) {
if (firsttime_2 == 0) {printf ",\n"} firsttime_2 = 0;
printf " %s", "" domainlength ":\""
for (domainname in arr[domainzone][domainlength]) {
printf "%s", domainname
}
printf "\""
}
firsttime_2 = 1;
printf "\n}"
}
print "};"
}
# Final function
END {
printarray("domains", domainarray)
}

View File

@ -0,0 +1,49 @@
#BEGIN {PROCINFO["sorted_in"] = "@unsorted"}
BEGIN {PROCINFO["sorted_in"] = "@ind_num_asc"; qq = 0}
# Skipping empty strings
(!$1) {next}
{d_ip[qq] = $1; qq+=1;}
function iptodec(v) {
split(v,s,".")
return s[4] + s[3]*256 + s[2]*65536 + s[1]*16777216
}
function ipdecto36(r) {
baselen = split("0123456789abcdefghijklmnopqrstuvwxyz", base, "")
rr = ""
do {
rr = base[(r % baselen) + 1] rr
} while (r = int(r / baselen))
return rr
}
function printarray_hex(arrname, arr) {
d_printed_end = 0
previous_dec = 0
print "var", arrname, "= \"\\"
for (i in arr) {
d_printed_end = 0
printf "%s ", ipdecto36(iptodec(arr[i]) - previous_dec)
previous_dec = iptodec(arr[i])
if (i % 40 == 0) {
print "\\"
d_printed_end = 1
}
}
if (d_printed_end == 0) {
print "\\"
}
print "\".split(\" \");"
print ""
}
# Final function
END {
#asort(d_ip)
printarray_hex("d_ipaddr", d_ip)
}

38
scripts/getzones.awk Normal file
View File

@ -0,0 +1,38 @@
@include "config/exclude-regexp-dist.awk"
# Skipping empty strings
(!$1) {next}
# Exclude some domains
(/duckdns/) {next}
(/\.r\.cloudfront\.net/) {next}
# Skipping IP addresses
(/^([0-9]{1,3}\.){3}[0-9]{1,3}$/) {next}
# Removing leading "www."
{sub(/^www\./, "", $1)}
# Removing ending dot
{sub(/\.$/, "", $1)}
{
if (/\.(ru|co|cu|com|info|net|org|gov|edu|int|mil|biz|pp|ne|msk|spb|nnov|od|in|ho|cc|dn|i|tut|v|dp|sl|ddns|dyndns|livejournal|herokuapp|azurewebsites|cloudfront|ucoz|3dn|nov|linode|amazonaws|sl-reverse|kiev|beget|kirov|akadns|scaleway)\.[^.]+$/)
{$1 = gensub(/(.+)\.([^.]+\.[^.]+\.[^.]+$)/, "\\2", 1)}
else
{$1 = gensub(/(.+)\.([^.]+\.[^.]+$)/, "\\2", 1)}
}
# Sorting domains
{d_other[$1] = $1}
function printarray(arrname, arr) {
for (i in arr) {
print i
}
}
# Final function
END {
printarray("d_other", d_other)
}

1
temp/.gitkeep Normal file
View File

@ -0,0 +1 @@

16
update.sh Executable file
View File

@ -0,0 +1,16 @@
#!/bin/bash
set -e
HERE="$(dirname "$(readlink -f "${0}")")"
cd "$HERE"
LISTLINK='https://raw.githubusercontent.com/zapret-info/z-i/master/dump.csv'
NXDOMAINLINK='https://raw.githubusercontent.com/zapret-info/z-i/master/nxdomain.txt'
curl -f --fail-early --compressed -o temp/list_orig.csv "$LISTLINK" || exit 1
iconv -f cp1251 -t utf8 temp/list_orig.csv > temp/list.csv
curl -f --fail-early --compressed -o temp/nxdomain.txt "$NXDOMAINLINK" || exit 1
LISTSIZE="$(curl -sI "$LISTLINK" | awk 'BEGIN {IGNORECASE=1;} /content-length/ {sub(/[ \t\r\n]+$/, "", $2); print $2}')"
[[ "$LISTSIZE" != "$(stat -c '%s' temp/list_orig.csv)" ]] && echo "List 1 size differs" && exit 2
LISTSIZE="$(curl -sI "$NXDOMAINLINK" | awk 'BEGIN {IGNORECASE=1;} /content-length/ {sub(/[ \t\r\n]+$/, "", $2); print $2}')"
[[ "$LISTSIZE" != "$(stat -c '%s' temp/nxdomain.txt)" ]] && echo "List 2 size differs" && exit 2