mirror of
https://bitbucket.org/anticensority/antizapret-pac-generator-light.git
synced 2024-11-25 22:56:05 +03:00
49 lines
1.5 KiB
Python
Executable File
49 lines
1.5 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
import sys
|
|
from pprint import pprint
|
|
|
|
wordhit = {}
|
|
wordreplace=["A", "B", "C", "D", "E", "F", "G", "H", "I", "J",
|
|
"K", "L", "M", "N", "O", "P", "Q", "R", "S", "T",
|
|
"U", "V", "W", "X", "Y", "Z",
|
|
"!", "@", "#", "$", "%", "^", "\\\\&", "*", "(", ")",
|
|
"=", "+", "/", ",", "<", ">", "~"]
|
|
|
|
with open(sys.argv[1], "r") as wfile:
|
|
with open(sys.argv[2], "r") as dfile:
|
|
domains = dfile.read().split("\n")
|
|
words = wfile.read().split("\n")
|
|
|
|
new_domains = []
|
|
for domain in domains:
|
|
new_domains.append('.'.join(domain.split(".")[:-1]))
|
|
domains = new_domains
|
|
for word in words:
|
|
wordhit[word] = 0
|
|
|
|
domain_len = len(domains)
|
|
for i, domain in enumerate(domains):
|
|
if (i % 1000) == 0:
|
|
print(i, "/", domain_len, end="\r", file=sys.stderr)
|
|
for word in words:
|
|
if word in domain:
|
|
wordhit[word] += 1
|
|
|
|
wordhit_c = {}
|
|
for word in wordhit:
|
|
value = wordhit[word]
|
|
if value != 0 and word != '':
|
|
wordhit_c[word] = value
|
|
|
|
wordhit_c = dict(sorted(wordhit_c.items(), key=lambda x: x[1]))
|
|
|
|
#print(wordhit_c)
|
|
finallist = list(wordhit_c)[-43:]
|
|
finallist = sorted(finallist, key=lambda x: 1000 - len(x))
|
|
print(finallist, file=sys.stderr)
|
|
print("{")
|
|
for i, w in enumerate(finallist):
|
|
print('gsub(/{}/, "{}", domainname)'.format(w, wordreplace[i]))
|
|
print("}")
|