mirror of
https://github.com/klzgrad/naiveproxy.git
synced 2024-11-24 22:36:09 +03:00
129 lines
3.8 KiB
Python
129 lines
3.8 KiB
Python
|
#!/usr/bin/env python
|
||
|
# Copyright 2016 The Chromium Authors. All rights reserved.
|
||
|
# Use of this source code is governed by a BSD-style license that can be
|
||
|
# found in the LICENSE file.
|
||
|
|
||
|
'''
|
||
|
This script "stitches" the NetLog files from a ".inprogress" directory to
|
||
|
create a single NetLog file.
|
||
|
'''
|
||
|
|
||
|
import glob
|
||
|
import os
|
||
|
import re
|
||
|
import sys
|
||
|
|
||
|
|
||
|
USAGE ='''Usage: stitch_net_log_files.py <INPROGRESS_DIR> [<OUTPUT_PATH>]
|
||
|
|
||
|
Will copy all the files in <INPROGRESS_DIR> and write the their content into a
|
||
|
NetLog file at path <OUTPUT_PATH>.
|
||
|
|
||
|
If <OUTPUT_PATH> is not specified, it should end with ".inprogress", and the
|
||
|
completed NetLog file will be written to the location with ".inprogress"
|
||
|
stripped.
|
||
|
'''
|
||
|
|
||
|
|
||
|
def get_event_file_sort_key(path):
|
||
|
'''Returns a tuple (modification timestamp, file number) for a path of the
|
||
|
form event_file_%d.json'''
|
||
|
|
||
|
m = re.match('^event_file_(\d+).json$', path)
|
||
|
file_index = int(m.group(1))
|
||
|
return (os.path.getmtime(path), file_index)
|
||
|
|
||
|
|
||
|
def get_ordered_event_files():
|
||
|
'''Returns a list of file paths to event files. The order of the files is
|
||
|
from oldest to newest. If modification times are the same, files will be
|
||
|
ordered based on the numeral in their file name.'''
|
||
|
|
||
|
paths = glob.glob("event_file_*.json")
|
||
|
paths = sorted(paths, key=get_event_file_sort_key)
|
||
|
sys.stdout.write("Identified %d event files:\n %s\n" %
|
||
|
(len(paths), "\n ".join(paths)))
|
||
|
return paths
|
||
|
|
||
|
|
||
|
def main():
|
||
|
if len(sys.argv) != 2 and len(sys.argv) != 3:
|
||
|
sys.stderr.write(USAGE)
|
||
|
sys.exit(1)
|
||
|
|
||
|
inprogress_dir = sys.argv[1]
|
||
|
output_path = None
|
||
|
|
||
|
# Pick an output path based on command line arguments.
|
||
|
if len(sys.argv) == 3:
|
||
|
output_path = sys.argv[2]
|
||
|
elif len(sys.argv) == 2:
|
||
|
m = re.match("^(.*)\.inprogress/?$", inprogress_dir)
|
||
|
if not m:
|
||
|
sys.stdout.write("Must specify OUTPUT_PATH\n")
|
||
|
sys.exit(1)
|
||
|
output_path = m.group(1)
|
||
|
|
||
|
output_path = os.path.abspath(output_path)
|
||
|
|
||
|
sys.stdout.write("Reading data from: %s\n" % inprogress_dir)
|
||
|
sys.stdout.write("Writing log file to: %s\n" % output_path)
|
||
|
|
||
|
os.chdir(inprogress_dir)
|
||
|
|
||
|
with open(output_path, "w") as stitched_file:
|
||
|
try:
|
||
|
file = open("constants.json")
|
||
|
with file:
|
||
|
for line in file:
|
||
|
stitched_file.write(line)
|
||
|
except IOError:
|
||
|
sys.stderr.write("Failed reading \"constants.json\".\n")
|
||
|
sys.exit(1)
|
||
|
|
||
|
events_written = False;
|
||
|
for event_file_path in get_ordered_event_files():
|
||
|
try:
|
||
|
file = open(event_file_path)
|
||
|
with file:
|
||
|
if not events_written:
|
||
|
line = file.readline();
|
||
|
events_written = True
|
||
|
for next_line in file:
|
||
|
if next_line.strip() == "":
|
||
|
line += next_line
|
||
|
else:
|
||
|
stitched_file.write(line)
|
||
|
line = next_line
|
||
|
except IOError:
|
||
|
sys.stderr.write("Failed reading \"%s\"\n" % event_file_path)
|
||
|
sys.exit(1)
|
||
|
# Remove hanging comma from last event
|
||
|
# TODO(dconnol): Check if the last line is a valid JSON object. If not,
|
||
|
# do not write the line to file. This handles incomplete logs.
|
||
|
line = line.strip()
|
||
|
if line[-1:] == ",":
|
||
|
stitched_file.write(line[:-1])
|
||
|
elif line:
|
||
|
raise ValueError('Last event is not properly formed')
|
||
|
|
||
|
if os.path.exists("end_netlog.json"):
|
||
|
try:
|
||
|
file = open("end_netlog.json")
|
||
|
with file:
|
||
|
for line in file:
|
||
|
stitched_file.write(line)
|
||
|
except IOError:
|
||
|
sys.stderr.write("Failed reading \"end_netlog.json\".\n")
|
||
|
sys.exit(1)
|
||
|
else:
|
||
|
# end_netlog.json won't exist when using this tool to stitch logging
|
||
|
# sessions that didn't shutdown gracefully.
|
||
|
#
|
||
|
# Close the events array and then the log (no polled_data).
|
||
|
stitched_file.write("]}\n")
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
main()
|