2023-06-03 13:56:46 +03:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
import sys
|
|
|
|
|
2023-06-03 15:01:32 +03:00
|
|
|
patternhit = {}
|
2023-06-03 13:56:46 +03:00
|
|
|
wordreplace=["A", "B", "C", "D", "E", "F", "G", "H", "I", "J",
|
|
|
|
"K", "L", "M", "N", "O", "P", "Q", "R", "S", "T",
|
|
|
|
"U", "V", "W", "X", "Y", "Z",
|
|
|
|
"!", "@", "#", "$", "%", "^", "\\\\&", "*", "(", ")",
|
2023-06-03 15:01:32 +03:00
|
|
|
"=", "+", "/", ",", "<", ">", "~", "[", "]", "{", "}"]
|
|
|
|
|
|
|
|
with open(sys.argv[1], "r") as dfile:
|
|
|
|
domains = dfile.read().split("\n")
|
|
|
|
|
|
|
|
new_domains = []
|
|
|
|
for domain in domains:
|
|
|
|
new_domains.append('.'.join(domain.split(".")[:-1]))
|
|
|
|
domains = ''.join(new_domains)
|
|
|
|
|
|
|
|
domain_len = len(domains)
|
|
|
|
position = 0
|
|
|
|
|
|
|
|
while position <= domain_len:
|
|
|
|
cut = domains[position:position+2]
|
|
|
|
if not patternhit.get(cut):
|
|
|
|
patternhit[cut] = 0
|
|
|
|
patternhit[cut] += 1
|
|
|
|
position += 2
|
|
|
|
|
|
|
|
patternhit = dict(sorted(patternhit.items(), key=lambda x: x[1]))
|
|
|
|
|
|
|
|
#print(patternhit, file=sys.stderr)
|
|
|
|
finallist = list(patternhit)[-1 * len(wordreplace):]
|
2023-06-03 13:56:46 +03:00
|
|
|
print(finallist, file=sys.stderr)
|
2023-06-03 15:01:32 +03:00
|
|
|
|
|
|
|
with open(sys.argv[2], "w") as awkfile:
|
|
|
|
print("{", file=awkfile)
|
|
|
|
for i, w in enumerate(finallist):
|
|
|
|
print('gsub(/{}/, "{}", domainname)'.format(w, wordreplace[i]), file=awkfile)
|
|
|
|
print("}", file=awkfile)
|
|
|
|
|
|
|
|
with open(sys.argv[3], "w") as pacfile:
|
|
|
|
pacdict = {}
|
|
|
|
for i, w in enumerate(finallist):
|
|
|
|
pacdict[wordreplace[i].strip('\\')] = w
|
|
|
|
print(pacdict, file=pacfile)
|