mirror of
https://github.com/klzgrad/naiveproxy.git
synced 2024-11-24 22:36:09 +03:00
130 lines
3.8 KiB
Python
Executable File
130 lines
3.8 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# Copyright 2017 The Chromium Authors. All rights reserved.
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
|
|
kUsage = '''Usage: truncate_net_log.py INPUT_FILE OUTPUT_FILE TRUNCATED_SIZE
|
|
|
|
Creates a smaller version of INPUT_FILE (which is a chrome-net-export-log.json
|
|
formatted NetLog file) and saves it to OUTPUT_FILE. Note that this works by
|
|
reading the file line by line and not fully parsing the JSON, so it must match
|
|
the exact format (whitespace and all).
|
|
|
|
File truncation is done by dropping the oldest events and keeping everything
|
|
else.
|
|
|
|
Parameters:
|
|
|
|
INPUT_FILE:
|
|
Path to net-export JSON file
|
|
|
|
OUTPUT_FILE:
|
|
Path to save truncated file to
|
|
|
|
TRUNCATED_SIZE:
|
|
The desired (approximate) size for the truncated file. May use a suffix to
|
|
indicate units. Examples:
|
|
2003 --> 2003 bytes
|
|
100K --> 100 KiB
|
|
8M --> 8 MiB
|
|
1.5m --> 1.5 MiB
|
|
'''
|
|
|
|
def get_file_size(path):
|
|
'''Returns the filesize of |path| in bytes'''
|
|
return os.stat(path).st_size
|
|
|
|
|
|
def truncate_log_file(in_path, out_path, desired_size):
|
|
'''Copies |in_path| to |out_path| such that it is approximately
|
|
|desired_size| bytes large. This is accomplished by dropping the oldest
|
|
events first. The final file size may not be exactly |desired_size| as only
|
|
complete event lines are skipped.'''
|
|
orig_size = get_file_size(in_path)
|
|
bytes_to_truncate = orig_size - desired_size
|
|
|
|
# This variable is True if the current line being processed is an Event line.
|
|
inside_events = False
|
|
with open(out_path, 'w') as out_file:
|
|
with open(in_path, 'r') as in_file:
|
|
for line in in_file:
|
|
# The final line before polledData closes the events array, and hence
|
|
# ends in "],". The check for polledData is more for documentation
|
|
# sake.
|
|
if inside_events and (line.startswith('"polledData": {' or
|
|
line.endswith('],\n'))):
|
|
inside_events = False
|
|
|
|
# If this is an event line and need to drop more bytes, go ahead and
|
|
# skip the line. Otherwise copy it to the output file.
|
|
if inside_events and bytes_to_truncate > 0:
|
|
bytes_to_truncate -= len(line)
|
|
else:
|
|
out_file.write(line)
|
|
|
|
# All lines after this are events (up until the closing square
|
|
# bracket).
|
|
if line.startswith('"events": ['):
|
|
inside_events = True
|
|
|
|
sys.stdout.write(
|
|
'Truncated file from %d to %d bytes\n' % (orig_size,
|
|
get_file_size(out_path)))
|
|
|
|
def parse_filesize_str(filesize_str):
|
|
'''Parses a string representation of a file size into a byte value, or None
|
|
on failure'''
|
|
filesize_str = filesize_str.lower()
|
|
m = re.match('([0-9\.]+)([km]?)', filesize_str)
|
|
|
|
if not m:
|
|
return None
|
|
|
|
# Try to parse as decimal (regex above accepts some invalid decimals too).
|
|
float_value = 0.0
|
|
try:
|
|
float_value = float(m.group(1))
|
|
except ValueError:
|
|
return None
|
|
|
|
kSuffixValueBytes = {
|
|
'k': 1024,
|
|
'm': 1024 * 1024,
|
|
'': 1,
|
|
}
|
|
|
|
suffix = m.group(2)
|
|
return int(float_value * kSuffixValueBytes[suffix])
|
|
|
|
|
|
def main():
|
|
if len(sys.argv) != 4:
|
|
sys.stderr.write('ERROR: Requires 3 command line arguments\n')
|
|
sys.stderr.write(kUsage)
|
|
sys.exit(1)
|
|
|
|
in_path = os.path.normpath(sys.argv[1])
|
|
out_path = os.path.normpath(sys.argv[2])
|
|
|
|
if in_path == out_path:
|
|
sys.stderr.write('ERROR: OUTPUT_FILE must be different from INPUT_FILE\n')
|
|
sys.stderr.write(kUsage)
|
|
sys.exit(1)
|
|
|
|
size_str = sys.argv[3]
|
|
size_bytes = parse_filesize_str(size_str)
|
|
if size_bytes is None:
|
|
sys.stderr.write('ERROR: Could not parse TRUNCATED_SIZE: %s\n' % size_str)
|
|
sys.stderr.write(kUsage)
|
|
sys.exit(1)
|
|
|
|
truncate_log_file(in_path, out_path, size_bytes)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|