2019-12-01 04:58:25 +03:00
|
|
|
|
#!/bin/bash
|
|
|
|
|
set -e
|
|
|
|
|
|
2020-09-13 22:11:20 +03:00
|
|
|
|
source config/config.sh
|
|
|
|
|
|
2019-12-01 04:58:25 +03:00
|
|
|
|
HERE="$(dirname "$(readlink -f "${0}")")"
|
|
|
|
|
cd "$HERE"
|
|
|
|
|
|
|
|
|
|
# Extract domains from list
|
2023-06-03 13:56:46 +03:00
|
|
|
|
awk -F ';' '{print $2}' temp/list.csv | sort -u | awk '/^$/ {next} /\\/ {next} /^[а-яА-Яa-zA-Z0-9\-\_\.\*]*+$/ {gsub(/\*\./, ""); gsub(/\.$/, ""); print}' | grep -Fv 'www.bеllonа.no' | CHARSET=UTF-8 idn > result/hostlist_original.txt
|
2019-12-01 04:58:25 +03:00
|
|
|
|
|
|
|
|
|
# Generate zones from domains
|
|
|
|
|
# FIXME: nxdomain list parsing is disabled due to its instability on z-i
|
|
|
|
|
###cat exclude.txt temp/nxdomain.txt > temp/exclude.txt
|
|
|
|
|
|
|
|
|
|
sort -u config/exclude-hosts-{dist,custom}.txt > temp/exclude-hosts.txt
|
|
|
|
|
sort -u config/exclude-ips-{dist,custom}.txt > temp/exclude-ips.txt
|
|
|
|
|
sort -u config/include-hosts-{dist,custom}.txt > temp/include-hosts.txt
|
|
|
|
|
sort -u config/include-ips-{dist,custom}.txt > temp/include-ips.txt
|
|
|
|
|
sort -u temp/include-hosts.txt result/hostlist_original.txt > temp/hostlist_original_with_include.txt
|
|
|
|
|
|
2022-03-26 01:37:59 +03:00
|
|
|
|
awk -F ';' '{split($1, a, /\|/); for (i in a) {print a[i]";"$2}}' temp/list.csv | \
|
2022-01-05 00:39:29 +03:00
|
|
|
|
grep -f config/exclude-hosts-by-ips-dist.txt | awk -F ';' '{print $2}' >> temp/exclude-hosts.txt
|
|
|
|
|
|
2019-12-01 04:58:25 +03:00
|
|
|
|
awk -f scripts/getzones.awk temp/hostlist_original_with_include.txt | grep -v -F -x -f temp/exclude-hosts.txt | sort -u > result/hostlist_zones.txt
|
|
|
|
|
|
2020-09-13 22:11:20 +03:00
|
|
|
|
if [[ "$RESOLVE_NXDOMAIN" == "yes" ]];
|
|
|
|
|
then
|
2022-09-04 21:20:40 +03:00
|
|
|
|
timeout 2h scripts/resolve-dns-nxdomain.py result/hostlist_zones.txt >> temp/exclude-hosts.txt
|
2020-09-13 22:11:20 +03:00
|
|
|
|
awk -f scripts/getzones.awk temp/hostlist_original_with_include.txt | grep -v -F -x -f temp/exclude-hosts.txt | sort -u > result/hostlist_zones.txt
|
|
|
|
|
fi
|
|
|
|
|
|
2023-06-03 13:56:46 +03:00
|
|
|
|
python dict/topwords.py dict/google-1000.txt result/hostlist_zones.txt > temp/replace-common-words.awk
|
|
|
|
|
|
2019-12-01 04:58:25 +03:00
|
|
|
|
# Generate a list of IP addresses
|
|
|
|
|
awk -F';' '$1 ~ /\// {print $1}' temp/list.csv | grep -P '([0-9]{1,3}\.){3}[0-9]{1,3}\/[0-9]{1,2}' -o | sort -Vu > result/iplist_special_range.txt
|
|
|
|
|
|
2022-03-26 01:37:59 +03:00
|
|
|
|
awk -F ';' '($1 ~ /^([0-9]{1,3}\.){3}[0-9]{1,3}/) {gsub(/\|/, RS, $1); print $1}' temp/list.csv | \
|
2019-12-01 04:58:25 +03:00
|
|
|
|
awk '/^([0-9]{1,3}\.){3}[0-9]{1,3}$/' | sort -u > result/iplist_all.txt
|
|
|
|
|
|
2022-03-26 01:37:59 +03:00
|
|
|
|
awk -F ';' '($1 ~ /^([0-9]{1,3}\.){3}[0-9]{1,3}/) && (($2 == "" && $3 == "") || ($1 == $2)) {gsub(/\|/, RS); print $1}' temp/list.csv | \
|
2019-12-01 04:58:25 +03:00
|
|
|
|
awk '/^([0-9]{1,3}\.){3}[0-9]{1,3}$/' | sort -u > result/iplist_blockedbyip.txt
|
|
|
|
|
|
2020-12-14 18:58:39 +03:00
|
|
|
|
grep -F -v '33-4/2018' temp/list.csv | grep -F -v '33а-5536/2019' | \
|
2022-03-26 01:37:59 +03:00
|
|
|
|
awk -F ';' '($1 ~ /^([0-9]{1,3}\.){3}[0-9]{1,3}/) && (($2 == "" && $3 == "") || ($1 == $2)) {gsub(/\|/, RS); print $1}' | \
|
2019-12-01 04:58:25 +03:00
|
|
|
|
awk '/^([0-9]{1,3}\.){3}[0-9]{1,3}$/' | sort -u > result/iplist_blockedbyip_noid2971.txt
|
|
|
|
|
|
|
|
|
|
awk -F ';' '$1 ~ /\// {print $1}' temp/list.csv | egrep -o '([0-9]{1,3}\.){3}[0-9]{1,3}\/[0-9]{1,2}' | sort -u > result/blocked-ranges.txt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Generate OpenVPN route file
|
|
|
|
|
echo -n > result/openvpn-blocked-ranges.txt
|
|
|
|
|
while read -r line
|
|
|
|
|
do
|
|
|
|
|
C_NET="$(echo $line | awk -F '/' '{print $1}')"
|
|
|
|
|
C_NETMASK="$(sipcalc -- "$line" | awk '/Network mask/ {print $4; exit;}')"
|
|
|
|
|
echo $"push \"route ${C_NET} ${C_NETMASK}\"" >> result/openvpn-blocked-ranges.txt
|
|
|
|
|
done < result/blocked-ranges.txt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Generate dnsmasq aliases
|
|
|
|
|
echo -n > result/dnsmasq-aliases-alt.conf
|
|
|
|
|
while read -r line
|
|
|
|
|
do
|
|
|
|
|
echo "server=/$line/127.0.0.4" >> result/dnsmasq-aliases-alt.conf
|
|
|
|
|
done < result/hostlist_zones.txt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Generate knot-resolver aliases
|
|
|
|
|
echo 'blocked_hosts = {' > result/knot-aliases-alt.conf
|
|
|
|
|
while read -r line
|
|
|
|
|
do
|
|
|
|
|
line="$line."
|
|
|
|
|
echo "${line@Q}," >> result/knot-aliases-alt.conf
|
|
|
|
|
done < result/hostlist_zones.txt
|
|
|
|
|
echo '}' >> result/knot-aliases-alt.conf
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Generate squid zone file
|
|
|
|
|
echo -n > result/squid-whitelist-zones.conf
|
|
|
|
|
while read -r line
|
|
|
|
|
do
|
|
|
|
|
echo ".$line" >> result/squid-whitelist-zones.conf
|
|
|
|
|
done < result/hostlist_zones.txt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Print results
|
|
|
|
|
echo "Blocked domains: $(wc -l result/hostlist_zones.txt)" >&2
|
|
|
|
|
echo "iplist_all: $(wc -l result/iplist_all.txt)" >&2
|
|
|
|
|
echo "iplist_special_range: $(wc -l result/iplist_special_range.txt)" >&2
|
|
|
|
|
echo "iplist_blockedbyip: $(wc -l result/iplist_blockedbyip.txt)" >&2
|
|
|
|
|
echo "iplist_blockedbyip_noid2971: $(wc -l result/iplist_blockedbyip_noid2971.txt)" >&2
|
2020-05-25 20:45:15 +03:00
|
|
|
|
|
|
|
|
|
exit 0
|