naiveproxy/net/tools/stitch_net_log_files.py

129 lines
3.8 KiB
Python
Raw Permalink Normal View History

2018-08-15 01:19:20 +03:00
#!/usr/bin/env python
# Copyright 2016 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
'''
This script "stitches" the NetLog files from a ".inprogress" directory to
create a single NetLog file.
'''
import glob
import os
import re
import sys
USAGE ='''Usage: stitch_net_log_files.py <INPROGRESS_DIR> [<OUTPUT_PATH>]
Will copy all the files in <INPROGRESS_DIR> and write the their content into a
NetLog file at path <OUTPUT_PATH>.
If <OUTPUT_PATH> is not specified, it should end with ".inprogress", and the
completed NetLog file will be written to the location with ".inprogress"
stripped.
'''
def get_event_file_sort_key(path):
'''Returns a tuple (modification timestamp, file number) for a path of the
form event_file_%d.json'''
m = re.match('^event_file_(\d+).json$', path)
file_index = int(m.group(1))
return (os.path.getmtime(path), file_index)
def get_ordered_event_files():
'''Returns a list of file paths to event files. The order of the files is
from oldest to newest. If modification times are the same, files will be
ordered based on the numeral in their file name.'''
paths = glob.glob("event_file_*.json")
paths = sorted(paths, key=get_event_file_sort_key)
sys.stdout.write("Identified %d event files:\n %s\n" %
(len(paths), "\n ".join(paths)))
return paths
def main():
if len(sys.argv) != 2 and len(sys.argv) != 3:
sys.stderr.write(USAGE)
sys.exit(1)
inprogress_dir = sys.argv[1]
output_path = None
# Pick an output path based on command line arguments.
if len(sys.argv) == 3:
output_path = sys.argv[2]
elif len(sys.argv) == 2:
m = re.match("^(.*)\.inprogress/?$", inprogress_dir)
if not m:
sys.stdout.write("Must specify OUTPUT_PATH\n")
sys.exit(1)
output_path = m.group(1)
output_path = os.path.abspath(output_path)
sys.stdout.write("Reading data from: %s\n" % inprogress_dir)
sys.stdout.write("Writing log file to: %s\n" % output_path)
os.chdir(inprogress_dir)
with open(output_path, "w") as stitched_file:
try:
file = open("constants.json")
with file:
for line in file:
stitched_file.write(line)
except IOError:
sys.stderr.write("Failed reading \"constants.json\".\n")
sys.exit(1)
events_written = False;
for event_file_path in get_ordered_event_files():
try:
file = open(event_file_path)
with file:
if not events_written:
line = file.readline();
events_written = True
for next_line in file:
if next_line.strip() == "":
line += next_line
else:
stitched_file.write(line)
line = next_line
except IOError:
sys.stderr.write("Failed reading \"%s\"\n" % event_file_path)
sys.exit(1)
# Remove hanging comma from last event
# TODO(dconnol): Check if the last line is a valid JSON object. If not,
# do not write the line to file. This handles incomplete logs.
line = line.strip()
if line[-1:] == ",":
stitched_file.write(line[:-1])
elif line:
raise ValueError('Last event is not properly formed')
if os.path.exists("end_netlog.json"):
try:
file = open("end_netlog.json")
with file:
for line in file:
stitched_file.write(line)
except IOError:
sys.stderr.write("Failed reading \"end_netlog.json\".\n")
sys.exit(1)
else:
# end_netlog.json won't exist when using this tool to stitch logging
# sessions that didn't shutdown gracefully.
#
# Close the events array and then the log (no polled_data).
stitched_file.write("]}\n")
if __name__ == "__main__":
main()