mirror of
https://github.com/klzgrad/naiveproxy.git
synced 2024-11-24 22:36:09 +03:00
1510 lines
58 KiB
Python
1510 lines
58 KiB
Python
# Copyright 2017 The Chromium Authors. All rights reserved.
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
"""Main Python API for analyzing binary size."""
|
|
|
|
import argparse
|
|
import calendar
|
|
import collections
|
|
import datetime
|
|
import gzip
|
|
import itertools
|
|
import logging
|
|
import os
|
|
import posixpath
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
import zipfile
|
|
|
|
import apkanalyzer
|
|
import ar
|
|
import concurrent
|
|
import demangle
|
|
import describe
|
|
import file_format
|
|
import function_signature
|
|
import linker_map_parser
|
|
import models
|
|
import ninja_parser
|
|
import nm
|
|
import obj_analyzer
|
|
import path_util
|
|
|
|
sys.path.insert(1, os.path.join(path_util.SRC_ROOT, 'tools', 'grit'))
|
|
from grit.format import data_pack
|
|
|
|
_OWNERS_FILENAME = 'OWNERS'
|
|
_COMPONENT_REGEX = re.compile(r'\s*#\s*COMPONENT\s*:\s*(\S+)')
|
|
_FILE_PATH_REGEX = re.compile(r'\s*file://(\S+)')
|
|
|
|
# Holds computation state that is live only when an output directory exists.
|
|
_OutputDirectoryContext = collections.namedtuple('_OutputDirectoryContext', [
|
|
'elf_object_paths', # Only when elf_path is also provided.
|
|
'known_inputs', # Only when elf_path is also provided.
|
|
'output_directory',
|
|
'source_mapper',
|
|
'thin_archives',
|
|
])
|
|
|
|
|
|
# Tunable "knobs" for CreateSectionSizesAndSymbols().
|
|
class SectionSizeKnobs(object):
|
|
def __init__(self):
|
|
# A limit on the number of symbols an address can have, before these symbols
|
|
# are compacted into shared symbols. Increasing this value causes more data
|
|
# to be stored .size files, but is also more expensive.
|
|
# Effect of max_same_name_alias_count (as of Oct 2017, with min_pss = max):
|
|
# 1: shared .text syms = 1772874 bytes, file size = 9.43MiB (645476 syms).
|
|
# 2: shared .text syms = 1065654 bytes, file size = 9.58MiB (669952 syms).
|
|
# 6: shared .text syms = 464058 bytes, file size = 10.11MiB (782693 syms).
|
|
# 10: shared .text syms = 365648 bytes, file size = 10.24MiB (813758 syms).
|
|
# 20: shared .text syms = 86202 bytes, file size = 10.38MiB (854548 syms).
|
|
# 40: shared .text syms = 48424 bytes, file size = 10.50MiB (890396 syms).
|
|
# 50: shared .text syms = 41860 bytes, file size = 10.54MiB (902304 syms).
|
|
# max: shared .text syms = 0 bytes, file size = 11.10MiB (1235449 syms).
|
|
self.max_same_name_alias_count = 40 # 50kb is basically negligable.
|
|
|
|
# An estimate of pak translation compression ratio to make comparisons
|
|
# between .size files reasonable. Otherwise this can differ every pak
|
|
# change.
|
|
self.pak_compression_ratio = 0.33
|
|
|
|
# File name: Source file.
|
|
self.apk_other_files = {
|
|
'assets/icudtl.dat': '../../third_party/icu/android/icudtl.dat',
|
|
'assets/snapshot_blob_32.bin': '../../v8/snapshot_blob_32.bin',
|
|
'assets/snapshot_blob_64.bin': '../../v8/snapshot_blob_64.bin',
|
|
'assets/natives_blob.bin': '../../v8/natives_blob.bin',
|
|
'assets/unwind_cfi_32': '../../base/trace_event/cfi_backtrace_android.cc',
|
|
'assets/webapk_dex_version.txt': (
|
|
'../../chrome/android/webapk/libs/runtime_library_version.gni'),
|
|
'lib/armeabi-v7a/libarcore_sdk_c_minimal.so': (
|
|
'../../third_party/arcore-android-sdk'),
|
|
}
|
|
|
|
self.apk_expected_other_files = set([
|
|
# From Monochrome.apk
|
|
'AndroidManifest.xml',
|
|
'resources.arsc',
|
|
'assets/AndroidManifest.xml',
|
|
'assets/metaresources.arsc',
|
|
'META-INF/CHROMIUM.SF',
|
|
'META-INF/CHROMIUM.RSA',
|
|
'META-INF/MANIFEST.MF',
|
|
])
|
|
|
|
self.src_root = path_util.SRC_ROOT
|
|
|
|
|
|
def _OpenMaybeGz(path):
|
|
"""Calls `gzip.open()` if |path| ends in ".gz", otherwise calls `open()`."""
|
|
if path.endswith('.gz'):
|
|
return gzip.open(path, 'rb')
|
|
return open(path, 'rb')
|
|
|
|
|
|
def _StripLinkerAddedSymbolPrefixes(raw_symbols):
|
|
"""Removes prefixes sometimes added to symbol names during link
|
|
|
|
Removing prefixes make symbol names match up with those found in .o files.
|
|
"""
|
|
for symbol in raw_symbols:
|
|
full_name = symbol.full_name
|
|
if full_name.startswith('startup.'):
|
|
symbol.flags |= models.FLAG_STARTUP
|
|
symbol.full_name = full_name[8:]
|
|
elif full_name.startswith('unlikely.'):
|
|
symbol.flags |= models.FLAG_UNLIKELY
|
|
symbol.full_name = full_name[9:]
|
|
elif full_name.startswith('rel.local.'):
|
|
symbol.flags |= models.FLAG_REL_LOCAL
|
|
symbol.full_name = full_name[10:]
|
|
elif full_name.startswith('rel.'):
|
|
symbol.flags |= models.FLAG_REL
|
|
symbol.full_name = full_name[4:]
|
|
elif full_name.startswith('hot.'):
|
|
symbol.flags |= models.FLAG_HOT
|
|
symbol.full_name = full_name[4:]
|
|
elif full_name.startswith('.L.str'):
|
|
symbol.full_name = models.STRING_LITERAL_NAME
|
|
|
|
|
|
def _NormalizeNames(raw_symbols):
|
|
"""Ensures that all names are formatted in a useful way.
|
|
|
|
This includes:
|
|
- Deriving |name| and |template_name| from |full_name|.
|
|
- Stripping of return types (for functions).
|
|
- Moving "vtable for" and the like to be suffixes rather than prefixes.
|
|
"""
|
|
found_prefixes = set()
|
|
for symbol in raw_symbols:
|
|
full_name = symbol.full_name
|
|
|
|
# See comment in _CalculatePadding() about when this can happen. Don't
|
|
# process names for non-native sections.
|
|
if symbol.IsPak():
|
|
# full_name: "about_ui_resources.grdp: IDR_ABOUT_UI_CREDITS_HTML".
|
|
space_idx = full_name.rindex(' ')
|
|
name = full_name[space_idx + 1:]
|
|
symbol.template_name = name
|
|
symbol.name = name
|
|
elif (full_name.startswith('*') or
|
|
symbol.IsOverhead() or
|
|
symbol.IsOther()):
|
|
symbol.template_name = full_name
|
|
symbol.name = full_name
|
|
elif symbol.IsDex():
|
|
symbol.full_name, symbol.template_name, symbol.name = (
|
|
function_signature.ParseJava(full_name))
|
|
elif symbol.IsNative():
|
|
# Remove [clone] suffix, and set flag accordingly.
|
|
# Search from left-to-right, as multiple [clone]s can exist.
|
|
# Example name suffixes:
|
|
# [clone .part.322] # GCC
|
|
# [clone .isra.322] # GCC
|
|
# [clone .constprop.1064] # GCC
|
|
# [clone .11064] # clang
|
|
# http://unix.stackexchange.com/questions/223013/function-symbol-gets-part-suffix-after-compilation
|
|
idx = full_name.find(' [clone ')
|
|
if idx != -1:
|
|
full_name = full_name[:idx]
|
|
symbol.flags |= models.FLAG_CLONE
|
|
|
|
# Clones for C symbols.
|
|
if symbol.section == 't':
|
|
idx = full_name.rfind('.')
|
|
if idx != -1 and full_name[idx + 1:].isdigit():
|
|
new_name = full_name[:idx]
|
|
# Generated symbols that end with .123 but are not clones.
|
|
# Find these via:
|
|
# size_info.symbols.WhereInSection('t').WhereIsGroup().SortedByCount()
|
|
if new_name not in ('__tcf_0', 'startup'):
|
|
full_name = new_name
|
|
symbol.flags |= models.FLAG_CLONE
|
|
# Remove .part / .isra / .constprop.
|
|
idx = full_name.rfind('.', 0, idx)
|
|
if idx != -1:
|
|
full_name = full_name[:idx]
|
|
|
|
# E.g.: vtable for FOO
|
|
idx = full_name.find(' for ', 0, 30)
|
|
if idx != -1:
|
|
found_prefixes.add(full_name[:idx + 4])
|
|
full_name = '{} [{}]'.format(full_name[idx + 5:], full_name[:idx])
|
|
|
|
# E.g.: virtual thunk to FOO
|
|
idx = full_name.find(' to ', 0, 30)
|
|
if idx != -1:
|
|
found_prefixes.add(full_name[:idx + 3])
|
|
full_name = '{} [{}]'.format(full_name[idx + 4:], full_name[:idx])
|
|
|
|
# Strip out return type, and split out name, template_name.
|
|
# Function parsing also applies to non-text symbols.
|
|
# E.g. Function statics.
|
|
symbol.full_name, symbol.template_name, symbol.name = (
|
|
function_signature.Parse(full_name))
|
|
|
|
# Remove anonymous namespaces (they just harm clustering).
|
|
symbol.template_name = symbol.template_name.replace(
|
|
'(anonymous namespace)::', '')
|
|
symbol.full_name = symbol.full_name.replace(
|
|
'(anonymous namespace)::', '')
|
|
non_anonymous_name = symbol.name.replace('(anonymous namespace)::', '')
|
|
if symbol.name != non_anonymous_name:
|
|
symbol.flags |= models.FLAG_ANONYMOUS
|
|
symbol.name = non_anonymous_name
|
|
|
|
# Allow using "is" to compare names (and should help with RAM). This applies
|
|
# to all symbols.
|
|
function_signature.InternSameNames(symbol)
|
|
|
|
logging.debug('Found name prefixes of: %r', found_prefixes)
|
|
|
|
|
|
def _NormalizeObjectPath(path):
|
|
"""Normalizes object paths.
|
|
|
|
Prefixes are removed: obj/, ../../
|
|
Archive names made more pathy: foo/bar.a(baz.o) -> foo/bar.a/baz.o
|
|
"""
|
|
if path.startswith('obj/'):
|
|
# Convert obj/third_party/... -> third_party/...
|
|
path = path[4:]
|
|
elif path.startswith('../../'):
|
|
# Convert ../../third_party/... -> third_party/...
|
|
path = path[6:]
|
|
if path.endswith(')'):
|
|
# Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o so that hierarchical
|
|
# breakdowns consider the .o part to be a separate node.
|
|
start_idx = path.rindex('(')
|
|
path = os.path.join(path[:start_idx], path[start_idx + 1:-1])
|
|
return path
|
|
|
|
|
|
def _NormalizeSourcePath(path):
|
|
"""Returns (is_generated, normalized_path)"""
|
|
if path.startswith('gen/'):
|
|
# Convert gen/third_party/... -> third_party/...
|
|
return True, path[4:]
|
|
if path.startswith('../../'):
|
|
# Convert ../../third_party/... -> third_party/...
|
|
return False, path[6:]
|
|
return True, path
|
|
|
|
|
|
def _ExtractSourcePathsAndNormalizeObjectPaths(raw_symbols, source_mapper):
|
|
"""Fills in the |source_path| attribute and normalizes |object_path|."""
|
|
if source_mapper:
|
|
logging.info('Looking up source paths from ninja files')
|
|
for symbol in raw_symbols:
|
|
object_path = symbol.object_path
|
|
if symbol.IsDex() or symbol.IsOther():
|
|
if symbol.source_path:
|
|
symbol.generated_source, symbol.source_path = _NormalizeSourcePath(
|
|
symbol.source_path)
|
|
elif object_path:
|
|
# We don't have source info for prebuilt .a files.
|
|
if not os.path.isabs(object_path) and not object_path.startswith('..'):
|
|
source_path = source_mapper.FindSourceForPath(object_path)
|
|
if source_path:
|
|
symbol.generated_source, symbol.source_path = (
|
|
_NormalizeSourcePath(source_path))
|
|
symbol.object_path = _NormalizeObjectPath(object_path)
|
|
assert source_mapper.unmatched_paths_count == 0, (
|
|
'One or more source file paths could not be found. Likely caused by '
|
|
'.ninja files being generated at a different time than the .map file.')
|
|
else:
|
|
logging.info('Normalizing object paths')
|
|
for symbol in raw_symbols:
|
|
if symbol.object_path:
|
|
symbol.object_path = _NormalizeObjectPath(symbol.object_path)
|
|
|
|
|
|
def _ComputeAncestorPath(path_list, symbol_count):
|
|
"""Returns the common ancestor of the given paths."""
|
|
if not path_list:
|
|
return ''
|
|
|
|
prefix = os.path.commonprefix(path_list)
|
|
# Check if all paths were the same.
|
|
if prefix == path_list[0]:
|
|
return prefix
|
|
|
|
# Put in buckets to cut down on the number of unique paths.
|
|
if symbol_count >= 100:
|
|
symbol_count_str = '100+'
|
|
elif symbol_count >= 50:
|
|
symbol_count_str = '50-99'
|
|
elif symbol_count >= 20:
|
|
symbol_count_str = '20-49'
|
|
elif symbol_count >= 10:
|
|
symbol_count_str = '10-19'
|
|
else:
|
|
symbol_count_str = str(symbol_count)
|
|
|
|
# Put the path count as a subdirectory so that grouping by path will show
|
|
# "{shared}" as a bucket, and the symbol counts as leafs.
|
|
if not prefix:
|
|
return os.path.join('{shared}', symbol_count_str)
|
|
return os.path.join(os.path.dirname(prefix), '{shared}', symbol_count_str)
|
|
|
|
|
|
def _CompactLargeAliasesIntoSharedSymbols(raw_symbols, knobs):
|
|
"""Converts symbols with large number of aliases into single symbols.
|
|
|
|
The merged symbol's path fields are changed to common-ancestor paths in
|
|
the form: common/dir/{shared}/$SYMBOL_COUNT
|
|
|
|
Assumes aliases differ only by path (not by name).
|
|
"""
|
|
num_raw_symbols = len(raw_symbols)
|
|
num_shared_symbols = 0
|
|
src_cursor = 0
|
|
dst_cursor = 0
|
|
while src_cursor < num_raw_symbols:
|
|
symbol = raw_symbols[src_cursor]
|
|
raw_symbols[dst_cursor] = symbol
|
|
dst_cursor += 1
|
|
aliases = symbol.aliases
|
|
if aliases and len(aliases) > knobs.max_same_name_alias_count:
|
|
symbol.source_path = _ComputeAncestorPath(
|
|
[s.source_path for s in aliases if s.source_path], len(aliases))
|
|
symbol.object_path = _ComputeAncestorPath(
|
|
[s.object_path for s in aliases if s.object_path], len(aliases))
|
|
symbol.generated_source = all(s.generated_source for s in aliases)
|
|
symbol.aliases = None
|
|
num_shared_symbols += 1
|
|
src_cursor += len(aliases)
|
|
else:
|
|
src_cursor += 1
|
|
raw_symbols[dst_cursor:] = []
|
|
num_removed = src_cursor - dst_cursor
|
|
logging.debug('Converted %d aliases into %d shared-path symbols',
|
|
num_removed, num_shared_symbols)
|
|
|
|
|
|
def _ConnectNmAliases(raw_symbols):
|
|
"""Ensures |aliases| is set correctly for all symbols."""
|
|
prev_sym = raw_symbols[0]
|
|
for sym in raw_symbols[1:]:
|
|
# Don't merge bss symbols.
|
|
if sym.address > 0 and prev_sym.address == sym.address:
|
|
# Don't merge padding-only symbols (** symbol gaps).
|
|
if prev_sym.size > 0:
|
|
# Don't merge if already merged.
|
|
if prev_sym.aliases is None or prev_sym.aliases is not sym.aliases:
|
|
if prev_sym.aliases:
|
|
prev_sym.aliases.append(sym)
|
|
else:
|
|
prev_sym.aliases = [prev_sym, sym]
|
|
sym.aliases = prev_sym.aliases
|
|
prev_sym = sym
|
|
|
|
|
|
def _AssignNmAliasPathsAndCreatePathAliases(raw_symbols, object_paths_by_name):
|
|
num_found_paths = 0
|
|
num_unknown_names = 0
|
|
num_path_mismatches = 0
|
|
num_aliases_created = 0
|
|
ret = []
|
|
for symbol in raw_symbols:
|
|
ret.append(symbol)
|
|
full_name = symbol.full_name
|
|
if (symbol.IsBss() or
|
|
symbol.IsStringLiteral() or
|
|
not full_name or
|
|
full_name[0] in '*.' or # e.g. ** merge symbols, .Lswitch.table
|
|
full_name == 'startup'):
|
|
continue
|
|
|
|
object_paths = object_paths_by_name.get(full_name)
|
|
if object_paths:
|
|
num_found_paths += 1
|
|
else:
|
|
if num_unknown_names < 10:
|
|
logging.warning('Symbol not found in any .o files: %r', symbol)
|
|
num_unknown_names += 1
|
|
continue
|
|
|
|
if symbol.object_path and symbol.object_path not in object_paths:
|
|
if num_path_mismatches < 10:
|
|
logging.warning('Symbol path reported by .map not found by nm.')
|
|
logging.warning('sym=%r', symbol)
|
|
logging.warning('paths=%r', object_paths)
|
|
object_paths.append(symbol.object_path)
|
|
object_paths.sort()
|
|
num_path_mismatches += 1
|
|
|
|
symbol.object_path = object_paths[0]
|
|
|
|
if len(object_paths) > 1:
|
|
# Create one symbol for each object_path.
|
|
aliases = symbol.aliases or [symbol]
|
|
symbol.aliases = aliases
|
|
num_aliases_created += len(object_paths) - 1
|
|
for object_path in object_paths[1:]:
|
|
new_sym = models.Symbol(
|
|
symbol.section_name, symbol.size, address=symbol.address,
|
|
full_name=full_name, object_path=object_path, aliases=aliases)
|
|
aliases.append(new_sym)
|
|
ret.append(new_sym)
|
|
|
|
logging.debug('Cross-referenced %d symbols with nm output. '
|
|
'num_unknown_names=%d num_path_mismatches=%d '
|
|
'num_aliases_created=%d',
|
|
num_found_paths, num_unknown_names, num_path_mismatches,
|
|
num_aliases_created)
|
|
return ret
|
|
|
|
|
|
def _DiscoverMissedObjectPaths(raw_symbols, known_inputs):
|
|
# Missing object paths are caused by .a files added by -l flags, which are not
|
|
# listed as explicit inputs within .ninja rules.
|
|
missed_inputs = set()
|
|
for symbol in raw_symbols:
|
|
path = symbol.object_path
|
|
if path.endswith(')'):
|
|
# Convert foo/bar.a(baz.o) -> foo/bar.a
|
|
path = path[:path.rindex('(')]
|
|
if path and path not in known_inputs:
|
|
missed_inputs.add(path)
|
|
return missed_inputs
|
|
|
|
|
|
def _CreateMergeStringsReplacements(merge_string_syms,
|
|
list_of_positions_by_object_path):
|
|
"""Creates replacement symbols for |merge_syms|."""
|
|
ret = []
|
|
STRING_LITERAL_NAME = models.STRING_LITERAL_NAME
|
|
assert len(merge_string_syms) == len(list_of_positions_by_object_path)
|
|
tups = itertools.izip(merge_string_syms, list_of_positions_by_object_path)
|
|
for merge_sym, positions_by_object_path in tups:
|
|
merge_sym_address = merge_sym.address
|
|
new_symbols = []
|
|
ret.append(new_symbols)
|
|
for object_path, positions in positions_by_object_path.iteritems():
|
|
for offset, size in positions:
|
|
address = merge_sym_address + offset
|
|
symbol = models.Symbol(
|
|
models.SECTION_RODATA, size, address, STRING_LITERAL_NAME,
|
|
object_path=object_path)
|
|
new_symbols.append(symbol)
|
|
|
|
logging.debug('Created %d string literal symbols', sum(len(x) for x in ret))
|
|
logging.debug('Sorting string literals')
|
|
for symbols in ret:
|
|
# In order to achieve a total ordering in the presense of aliases, need to
|
|
# include both |address| and |object_path|.
|
|
# In order to achieve consistent deduping, need to include |size|.
|
|
symbols.sort(key=lambda x: (x.address, -x.size, x.object_path))
|
|
|
|
logging.debug('Deduping string literals')
|
|
num_removed = 0
|
|
size_removed = 0
|
|
num_aliases = 0
|
|
for i, symbols in enumerate(ret):
|
|
if not symbols:
|
|
continue
|
|
prev_symbol = symbols[0]
|
|
new_symbols = [prev_symbol]
|
|
for symbol in symbols[1:]:
|
|
padding = symbol.address - prev_symbol.end_address
|
|
if (prev_symbol.address == symbol.address and
|
|
prev_symbol.size == symbol.size):
|
|
# String is an alias.
|
|
num_aliases += 1
|
|
aliases = prev_symbol.aliases
|
|
if aliases:
|
|
aliases.append(symbol)
|
|
symbol.aliases = aliases
|
|
else:
|
|
aliases = [prev_symbol, symbol]
|
|
prev_symbol.aliases = aliases
|
|
symbol.aliases = aliases
|
|
elif padding + symbol.size <= 0:
|
|
# String is a substring of prior one.
|
|
num_removed += 1
|
|
size_removed += symbol.size
|
|
continue
|
|
elif padding < 0:
|
|
# String overlaps previous one. Adjust to not overlap.
|
|
symbol.address -= padding
|
|
symbol.size += padding
|
|
new_symbols.append(symbol)
|
|
prev_symbol = symbol
|
|
ret[i] = new_symbols
|
|
# Aliases come out in random order, so sort to be deterministic.
|
|
ret[i].sort(key=lambda s: (s.address, s.object_path))
|
|
|
|
logging.debug(
|
|
'Removed %d overlapping string literals (%d bytes) & created %d aliases',
|
|
num_removed, size_removed, num_aliases)
|
|
return ret
|
|
|
|
|
|
def _CalculatePadding(raw_symbols):
|
|
"""Populates the |padding| field based on symbol addresses.
|
|
|
|
Symbols must already be sorted by |address|.
|
|
"""
|
|
seen_sections = set()
|
|
for i, symbol in enumerate(raw_symbols[1:]):
|
|
prev_symbol = raw_symbols[i]
|
|
if symbol.IsOverhead():
|
|
# Overhead symbols are not actionable so should be padding-only.
|
|
symbol.padding = symbol.size
|
|
if prev_symbol.section_name != symbol.section_name:
|
|
assert symbol.section_name not in seen_sections, (
|
|
'Input symbols must be sorted by section, then address.')
|
|
seen_sections.add(symbol.section_name)
|
|
continue
|
|
if (symbol.address <= 0 or prev_symbol.address <= 0 or
|
|
not symbol.IsNative() or not prev_symbol.IsNative()):
|
|
continue
|
|
|
|
if symbol.address == prev_symbol.address:
|
|
if symbol.aliases and symbol.aliases is prev_symbol.aliases:
|
|
symbol.padding = prev_symbol.padding
|
|
symbol.size = prev_symbol.size
|
|
continue
|
|
# Padding-only symbols happen for ** symbol gaps.
|
|
assert prev_symbol.size_without_padding == 0, (
|
|
'Found duplicate symbols:\n%r\n%r' % (prev_symbol, symbol))
|
|
|
|
padding = symbol.address - prev_symbol.end_address
|
|
# These thresholds were found by experimenting with arm32 Chrome.
|
|
# E.g.: Set them to 0 and see what warnings get logged, then take max value.
|
|
# TODO(agrieve): See if these thresholds make sense for architectures
|
|
# other than arm32.
|
|
if (not symbol.full_name.startswith('*') and
|
|
not symbol.IsStringLiteral() and (
|
|
symbol.section in 'rd' and padding >= 256 or
|
|
symbol.section in 't' and padding >= 64)):
|
|
# Should not happen.
|
|
logging.warning('Large padding of %d between:\n A) %r\n B) %r' % (
|
|
padding, prev_symbol, symbol))
|
|
symbol.padding = padding
|
|
symbol.size += padding
|
|
assert symbol.size >= 0, (
|
|
'Symbol has negative size (likely not sorted propertly): '
|
|
'%r\nprev symbol: %r' % (symbol, prev_symbol))
|
|
|
|
|
|
def _ParseComponentFromOwners(filename):
|
|
"""Searches an OWNERS file for lines that start with `# COMPONENT:`.
|
|
|
|
If an OWNERS file has no COMPONENT but references another OWNERS file, follow
|
|
the reference and check that file instead.
|
|
|
|
Args:
|
|
filename: Path to the file to parse.
|
|
Returns:
|
|
The text that follows the `# COMPONENT:` prefix, such as 'component>name'.
|
|
Empty string if no component found or the file didn't exist.
|
|
"""
|
|
reference_paths = []
|
|
try:
|
|
with open(filename) as f:
|
|
for line in f:
|
|
component_matches = _COMPONENT_REGEX.match(line)
|
|
path_matches = _FILE_PATH_REGEX.match(line)
|
|
if component_matches:
|
|
return component_matches.group(1)
|
|
elif path_matches:
|
|
reference_paths.append(path_matches.group(1))
|
|
except IOError:
|
|
return ''
|
|
|
|
if len(reference_paths) == 1:
|
|
newpath = os.path.join(path_util.SRC_ROOT, reference_paths[0])
|
|
return _ParseComponentFromOwners(newpath)
|
|
else:
|
|
return ''
|
|
|
|
|
|
def _FindComponentRoot(start_path, cache, knobs):
|
|
"""Searches all parent directories for COMPONENT in OWNERS files.
|
|
|
|
Args:
|
|
start_path: Path of directory to start searching from. Must be relative to
|
|
SRC_ROOT.
|
|
cache: Dict of OWNERS paths. Used instead of filesystem if paths are present
|
|
in the dict.
|
|
knobs: Instance of SectionSizeKnobs. Tunable knobs and options.
|
|
|
|
Returns:
|
|
COMPONENT belonging to |start_path|, or empty string if not found.
|
|
"""
|
|
prev_dir = None
|
|
test_dir = start_path
|
|
# This loop will traverse the directory structure upwards until reaching
|
|
# SRC_ROOT, where test_dir and prev_dir will both equal an empty string.
|
|
while test_dir != prev_dir:
|
|
cached_component = cache.get(test_dir)
|
|
if cached_component:
|
|
return cached_component
|
|
elif cached_component is None:
|
|
owners_path = os.path.join(knobs.src_root, test_dir, _OWNERS_FILENAME)
|
|
component = _ParseComponentFromOwners(owners_path)
|
|
cache[test_dir] = component
|
|
if component:
|
|
return component
|
|
prev_dir = test_dir
|
|
test_dir = os.path.dirname(test_dir)
|
|
return ''
|
|
|
|
|
|
def _PopulateComponents(raw_symbols, knobs):
|
|
"""Populates the |component| field based on |source_path|.
|
|
|
|
Symbols without a |source_path| are skipped.
|
|
|
|
Args:
|
|
raw_symbols: list of Symbol objects.
|
|
knobs: Instance of SectionSizeKnobs. Tunable knobs and options.
|
|
"""
|
|
seen_paths = {}
|
|
for symbol in raw_symbols:
|
|
if symbol.source_path:
|
|
folder_path = os.path.dirname(symbol.source_path)
|
|
symbol.component = _FindComponentRoot(folder_path, seen_paths, knobs)
|
|
|
|
|
|
def _AddNmAliases(raw_symbols, names_by_address):
|
|
"""Adds symbols that were removed by identical code folding."""
|
|
# Step 1: Create list of (index_of_symbol, name_list).
|
|
logging.debug('Creating alias list')
|
|
replacements = []
|
|
num_new_symbols = 0
|
|
missing_names = collections.defaultdict(list)
|
|
for i, s in enumerate(raw_symbols):
|
|
# Don't alias padding-only symbols (e.g. ** symbol gap)
|
|
if s.size_without_padding == 0:
|
|
continue
|
|
name_list = names_by_address.get(s.address)
|
|
if name_list:
|
|
if s.full_name not in name_list:
|
|
missing_names[s.full_name].append(s.address)
|
|
logging.warning('Name missing from aliases: %s %s', s.full_name,
|
|
name_list)
|
|
continue
|
|
replacements.append((i, name_list))
|
|
num_new_symbols += len(name_list) - 1
|
|
|
|
if missing_names and logging.getLogger().isEnabledFor(logging.INFO):
|
|
for address, names in names_by_address.iteritems():
|
|
for name in names:
|
|
if name in missing_names:
|
|
logging.info('Missing name %s is at address %x instead of [%s]' %
|
|
(name, address, ','.join('%x' % a for a in missing_names[name])))
|
|
|
|
if float(num_new_symbols) / len(raw_symbols) < .05:
|
|
logging.warning('Number of aliases is oddly low (%.0f%%). It should '
|
|
'usually be around 25%%. Ensure --tool-prefix is correct. ',
|
|
float(num_new_symbols) / len(raw_symbols) * 100)
|
|
|
|
# Step 2: Create new symbols as siblings to each existing one.
|
|
logging.debug('Creating %d new symbols from nm output', num_new_symbols)
|
|
expected_num_symbols = len(raw_symbols) + num_new_symbols
|
|
ret = []
|
|
prev_src = 0
|
|
for cur_src, name_list in replacements:
|
|
ret += raw_symbols[prev_src:cur_src]
|
|
prev_src = cur_src + 1
|
|
sym = raw_symbols[cur_src]
|
|
# Create symbols (|sym| gets recreated and discarded).
|
|
new_syms = []
|
|
for full_name in name_list:
|
|
# Do not set |aliases| in order to avoid being pruned by
|
|
# _CompactLargeAliasesIntoSharedSymbols(), which assumes aliases differ
|
|
# only by path. The field will be set afterwards by _ConnectNmAliases().
|
|
new_syms.append(models.Symbol(
|
|
sym.section_name, sym.size, address=sym.address, full_name=full_name))
|
|
ret += new_syms
|
|
ret += raw_symbols[prev_src:]
|
|
assert expected_num_symbols == len(ret)
|
|
return ret
|
|
|
|
|
|
def LoadAndPostProcessSizeInfo(path, file_obj=None):
|
|
"""Returns a SizeInfo for the given |path|."""
|
|
logging.debug('Loading results from: %s', path)
|
|
size_info = file_format.LoadSizeInfo(path, file_obj=file_obj)
|
|
logging.info('Normalizing symbol names')
|
|
_NormalizeNames(size_info.raw_symbols)
|
|
logging.info('Calculating padding')
|
|
_CalculatePadding(size_info.raw_symbols)
|
|
logging.info('Loaded %d symbols', len(size_info.raw_symbols))
|
|
return size_info
|
|
|
|
|
|
def CreateMetadata(map_path, elf_path, apk_path, tool_prefix, output_directory,
|
|
linker_name):
|
|
"""Creates metadata dict.
|
|
|
|
Args:
|
|
map_path: Path to the linker .map(.gz) file to parse.
|
|
elf_path: Path to the corresponding unstripped ELF file. Used to find symbol
|
|
aliases and inlined functions. Can be None.
|
|
apk_path: Path to the .apk file to measure.
|
|
tool_prefix: Prefix for c++filt & nm.
|
|
output_directory: Build output directory.
|
|
linker_name: 'gold', 'lld_v#' (# is a number), 'lld-lto_v#', or None.
|
|
|
|
Returns:
|
|
None if |elf_path| is not supplied. Otherwise returns dict mapping string
|
|
constants to values.
|
|
If |elf_path| is supplied, git revision and elf info are included.
|
|
If |output_directory| is also supplied, then filenames will be included.
|
|
"""
|
|
metadata = None
|
|
if elf_path:
|
|
logging.debug('Constructing metadata')
|
|
git_rev = _DetectGitRevision(os.path.dirname(elf_path))
|
|
architecture = _ArchFromElf(elf_path, tool_prefix)
|
|
build_id = BuildIdFromElf(elf_path, tool_prefix)
|
|
timestamp_obj = datetime.datetime.utcfromtimestamp(os.path.getmtime(
|
|
elf_path))
|
|
timestamp = calendar.timegm(timestamp_obj.timetuple())
|
|
relative_tool_prefix = path_util.ToSrcRootRelative(tool_prefix)
|
|
|
|
metadata = {
|
|
models.METADATA_GIT_REVISION: git_rev,
|
|
models.METADATA_ELF_ARCHITECTURE: architecture,
|
|
models.METADATA_ELF_MTIME: timestamp,
|
|
models.METADATA_ELF_BUILD_ID: build_id,
|
|
models.METADATA_LINKER_NAME: linker_name,
|
|
models.METADATA_TOOL_PREFIX: relative_tool_prefix,
|
|
}
|
|
|
|
if output_directory:
|
|
relative_to_out = lambda path: os.path.relpath(path, output_directory)
|
|
gn_args = _ParseGnArgs(os.path.join(output_directory, 'args.gn'))
|
|
metadata[models.METADATA_MAP_FILENAME] = relative_to_out(map_path)
|
|
metadata[models.METADATA_ELF_FILENAME] = relative_to_out(elf_path)
|
|
metadata[models.METADATA_GN_ARGS] = gn_args
|
|
|
|
if apk_path:
|
|
metadata[models.METADATA_APK_FILENAME] = relative_to_out(apk_path)
|
|
metadata[models.METADATA_APK_SIZE] = os.path.getsize(apk_path)
|
|
return metadata
|
|
|
|
|
|
def _ResolveThinArchivePaths(raw_symbols, thin_archives):
|
|
"""Converts object_paths for thin archives to external .o paths."""
|
|
for symbol in raw_symbols:
|
|
object_path = symbol.object_path
|
|
if object_path.endswith(')'):
|
|
start_idx = object_path.rindex('(')
|
|
archive_path = object_path[:start_idx]
|
|
if archive_path in thin_archives:
|
|
subpath = object_path[start_idx + 1:-1]
|
|
symbol.object_path = ar.CreateThinObjectPath(archive_path, subpath)
|
|
|
|
|
|
def _ParseElfInfo(map_path, elf_path, tool_prefix, track_string_literals,
|
|
outdir_context=None, linker_name=None):
|
|
"""Adds ELF section sizes and symbols."""
|
|
if elf_path:
|
|
# Run nm on the elf file to retrieve the list of symbol names per-address.
|
|
# This list is required because the .map file contains only a single name
|
|
# for each address, yet multiple symbols are often coalesced when they are
|
|
# identical. This coalescing happens mainly for small symbols and for C++
|
|
# templates. Such symbols make up ~500kb of libchrome.so on Android.
|
|
elf_nm_result = nm.CollectAliasesByAddressAsync(elf_path, tool_prefix)
|
|
|
|
# Run nm on all .o/.a files to retrieve the symbol names within them.
|
|
# The list is used to detect when mutiple .o files contain the same symbol
|
|
# (e.g. inline functions), and to update the object_path / source_path
|
|
# fields accordingly.
|
|
# Looking in object files is required because the .map file choses a
|
|
# single path for these symbols.
|
|
# Rather than record all paths for each symbol, set the paths to be the
|
|
# common ancestor of all paths.
|
|
if outdir_context:
|
|
bulk_analyzer = obj_analyzer.BulkObjectFileAnalyzer(
|
|
tool_prefix, outdir_context.output_directory)
|
|
bulk_analyzer.AnalyzePaths(outdir_context.elf_object_paths)
|
|
|
|
logging.info('Parsing Linker Map')
|
|
with _OpenMaybeGz(map_path) as map_file:
|
|
section_sizes, raw_symbols = (
|
|
linker_map_parser.MapFileParser().Parse(linker_name, map_file))
|
|
|
|
if outdir_context and outdir_context.thin_archives:
|
|
_ResolveThinArchivePaths(raw_symbols, outdir_context.thin_archives)
|
|
|
|
if elf_path:
|
|
logging.debug('Validating section sizes')
|
|
elf_section_sizes = _SectionSizesFromElf(elf_path, tool_prefix)
|
|
for k, v in elf_section_sizes.iteritems():
|
|
if v != section_sizes.get(k):
|
|
logging.error('ELF file and .map file do not agree on section sizes.')
|
|
logging.error('.map file: %r', section_sizes)
|
|
logging.error('readelf: %r', elf_section_sizes)
|
|
sys.exit(1)
|
|
|
|
if elf_path and outdir_context:
|
|
missed_object_paths = _DiscoverMissedObjectPaths(
|
|
raw_symbols, outdir_context.known_inputs)
|
|
missed_object_paths = ar.ExpandThinArchives(
|
|
missed_object_paths, outdir_context.output_directory)[0]
|
|
bulk_analyzer.AnalyzePaths(missed_object_paths)
|
|
bulk_analyzer.SortPaths()
|
|
if track_string_literals:
|
|
merge_string_syms = [s for s in raw_symbols if
|
|
s.full_name == '** merge strings' or
|
|
s.full_name == '** lld merge strings']
|
|
# More likely for there to be a bug in supersize than an ELF to not have a
|
|
# single string literal.
|
|
assert merge_string_syms
|
|
string_ranges = [(s.address, s.size) for s in merge_string_syms]
|
|
bulk_analyzer.AnalyzeStringLiterals(elf_path, string_ranges)
|
|
|
|
logging.info('Stripping linker prefixes from symbol names')
|
|
_StripLinkerAddedSymbolPrefixes(raw_symbols)
|
|
# Map file for some reason doesn't demangle all names.
|
|
# Demangle prints its own log statement.
|
|
demangle.DemangleRemainingSymbols(raw_symbols, tool_prefix)
|
|
|
|
if elf_path:
|
|
logging.info(
|
|
'Adding symbols removed by identical code folding (as reported by nm)')
|
|
# This normally does not block (it's finished by this time).
|
|
names_by_address = elf_nm_result.get()
|
|
raw_symbols = _AddNmAliases(raw_symbols, names_by_address)
|
|
|
|
if outdir_context:
|
|
object_paths_by_name = bulk_analyzer.GetSymbolNames()
|
|
logging.debug(
|
|
'Fetched path information for %d symbols from %d files',
|
|
len(object_paths_by_name),
|
|
len(outdir_context.elf_object_paths) + len(missed_object_paths))
|
|
|
|
# For aliases, this provides path information where there wasn't any.
|
|
logging.info('Creating aliases for symbols shared by multiple paths')
|
|
raw_symbols = _AssignNmAliasPathsAndCreatePathAliases(
|
|
raw_symbols, object_paths_by_name)
|
|
|
|
if track_string_literals:
|
|
logging.info('Waiting for string literal extraction to complete.')
|
|
list_of_positions_by_object_path = bulk_analyzer.GetStringPositions()
|
|
bulk_analyzer.Close()
|
|
|
|
if track_string_literals:
|
|
logging.info('Deconstructing ** merge strings into literals')
|
|
replacements = _CreateMergeStringsReplacements(merge_string_syms,
|
|
list_of_positions_by_object_path)
|
|
for merge_sym, literal_syms in itertools.izip(
|
|
merge_string_syms, replacements):
|
|
# Don't replace if no literals were found.
|
|
if literal_syms:
|
|
# Re-find the symbols since aliases cause their indices to change.
|
|
idx = raw_symbols.index(merge_sym)
|
|
# This assignment is a bit slow (causes array to be shifted), but
|
|
# is fast enough since len(merge_string_syms) < 10.
|
|
raw_symbols[idx:idx + 1] = literal_syms
|
|
|
|
return section_sizes, raw_symbols
|
|
|
|
|
|
def _ComputePakFileSymbols(
|
|
file_name, contents, res_info, symbols_by_id, compression_ratio=1):
|
|
id_map = {id(v): k
|
|
for k, v in sorted(contents.resources.items(), reverse=True)}
|
|
alias_map = {k: id_map[id(v)] for k, v in contents.resources.iteritems()
|
|
if id_map[id(v)] != k}
|
|
# Longest locale pak is es-419.pak
|
|
if len(os.path.basename(file_name)) <= 9:
|
|
section_name = models.SECTION_PAK_TRANSLATIONS
|
|
else:
|
|
section_name = models.SECTION_PAK_NONTRANSLATED
|
|
overhead = (12 + 6) * compression_ratio # Header size plus extra offset
|
|
symbols_by_id[file_name] = models.Symbol(
|
|
section_name, overhead, full_name='Overhead: {}'.format(file_name))
|
|
for resource_id in sorted(contents.resources):
|
|
if resource_id in alias_map:
|
|
# 4 extra bytes of metadata (2 16-bit ints)
|
|
size = 4
|
|
resource_id = alias_map[resource_id]
|
|
else:
|
|
# 6 extra bytes of metadata (1 32-bit int, 1 16-bit int)
|
|
size = len(contents.resources[resource_id]) + 6
|
|
name, source_path = res_info[resource_id]
|
|
if resource_id not in symbols_by_id:
|
|
full_name = '{}: {}'.format(source_path, name)
|
|
symbols_by_id[resource_id] = models.Symbol(
|
|
section_name, 0, address=resource_id, full_name=full_name)
|
|
size *= compression_ratio
|
|
symbols_by_id[resource_id].size += size
|
|
|
|
|
|
class _ResourceSourceMapper(object):
|
|
def __init__(self, apk_path, output_directory, knobs):
|
|
self._knobs = knobs
|
|
self._res_info = self._LoadResInfo(apk_path, output_directory)
|
|
self._pattern_dollar_underscore = re.compile(r'\$(.*?)__\d+')
|
|
self._pattern_version_suffix = re.compile(r'-v\d+/')
|
|
|
|
@staticmethod
|
|
def _ParseResInfoFile(res_info_path):
|
|
with open(res_info_path, 'r') as info_file:
|
|
res_info = {}
|
|
renames = {}
|
|
for line in info_file.readlines():
|
|
dest, source = line.strip().split(',')
|
|
# Allow indirection due to renames.
|
|
if dest.startswith('Rename:'):
|
|
dest = dest.split(':', 1)[1]
|
|
renames[dest] = source
|
|
else:
|
|
res_info[dest] = source
|
|
for dest, renamed_dest in renames.iteritems():
|
|
# Allow one more level of indirection due to renaming renamed files
|
|
renamed_dest = renames.get(renamed_dest, renamed_dest)
|
|
actual_source = res_info.get(renamed_dest)
|
|
if actual_source:
|
|
res_info[dest] = actual_source
|
|
return res_info
|
|
|
|
def _LoadResInfo(self, apk_path, output_directory):
|
|
apk_name = os.path.basename(apk_path)
|
|
apk_res_info_name = apk_name + '.res.info'
|
|
apk_res_info_path = os.path.join(
|
|
output_directory, 'size-info', apk_res_info_name)
|
|
res_info_without_root = self._ParseResInfoFile(apk_res_info_path)
|
|
# We package resources in the res/ folder only in the apk.
|
|
res_info = {
|
|
os.path.join('res', dest): source
|
|
for dest, source in res_info_without_root.iteritems()
|
|
}
|
|
res_info.update(self._knobs.apk_other_files)
|
|
return res_info
|
|
|
|
def FindSourceForPath(self, path):
|
|
original_path = path
|
|
# Sometimes android adds $ in front and __# before extension.
|
|
path = self._pattern_dollar_underscore.sub(r'\1', path)
|
|
ret = self._res_info.get(path)
|
|
if ret:
|
|
return ret
|
|
# Android build tools may append extra -v flags for the root dir.
|
|
path = self._pattern_version_suffix.sub('/', path)
|
|
ret = self._res_info.get(path)
|
|
if ret:
|
|
return ret
|
|
if original_path not in self._knobs.apk_expected_other_files:
|
|
logging.warning('Unexpected file in apk: %s', original_path)
|
|
return None
|
|
|
|
|
|
def _ParsePakInfoFile(pak_info_path):
|
|
with open(pak_info_path, 'r') as info_file:
|
|
res_info = {}
|
|
for line in info_file.readlines():
|
|
name, res_id, path = line.split(',')
|
|
res_info[int(res_id)] = (name, path.strip())
|
|
return res_info
|
|
|
|
|
|
def _ParsePakSymbols(
|
|
section_sizes, object_paths, output_directory, symbols_by_id):
|
|
object_paths_by_id = collections.defaultdict(list)
|
|
for path in object_paths:
|
|
whitelist_path = os.path.join(output_directory, path + '.whitelist')
|
|
if (not os.path.exists(whitelist_path)
|
|
or os.path.getsize(whitelist_path) == 0):
|
|
continue
|
|
with open(whitelist_path, 'r') as f:
|
|
for line in f:
|
|
resource_id = int(line.rstrip())
|
|
# There may be object files in static libraries that are removed by the
|
|
# linker when there are no external references to its symbols. These
|
|
# files may be included in object_paths which our apk does not use,
|
|
# resulting in resource_ids that don't end up being in the final apk.
|
|
if resource_id not in symbols_by_id:
|
|
continue
|
|
object_paths_by_id[resource_id].append(path)
|
|
|
|
raw_symbols = []
|
|
for resource_id, symbol in symbols_by_id.iteritems():
|
|
raw_symbols.append(symbol)
|
|
paths = set(object_paths_by_id[resource_id])
|
|
if paths:
|
|
symbol.object_path = paths.pop()
|
|
if paths:
|
|
aliases = symbol.aliases or [symbol]
|
|
symbol.aliases = aliases
|
|
for path in paths:
|
|
new_sym = models.Symbol(
|
|
symbol.section_name, symbol.size, address=symbol.address,
|
|
full_name=symbol.full_name, object_path=path, aliases=aliases)
|
|
aliases.append(new_sym)
|
|
raw_symbols.append(new_sym)
|
|
raw_symbols.sort(key=lambda s: (s.section_name, s.address))
|
|
raw_total = 0.0
|
|
int_total = 0
|
|
for symbol in raw_symbols:
|
|
raw_total += symbol.size
|
|
# We truncate rather than round to ensure that we do not over attribute. It
|
|
# is easier to add another symbol to make up the difference.
|
|
symbol.size = int(symbol.size)
|
|
int_total += symbol.size
|
|
# Attribute excess to translations since only those are compressed.
|
|
raw_symbols.append(models.Symbol(
|
|
models.SECTION_PAK_TRANSLATIONS, int(round(raw_total - int_total)),
|
|
full_name='Overhead: Pak compression artifacts'))
|
|
|
|
for symbol in raw_symbols:
|
|
prev = section_sizes.setdefault(symbol.section_name, 0)
|
|
section_sizes[symbol.section_name] = prev + symbol.size
|
|
return raw_symbols
|
|
|
|
|
|
def _ParseApkElfSectionSize(section_sizes, metadata, apk_elf_result):
|
|
if metadata:
|
|
logging.debug('Extracting section sizes from .so within .apk')
|
|
apk_build_id, apk_section_sizes, elf_overhead_size = apk_elf_result.get()
|
|
assert apk_build_id == metadata[models.METADATA_ELF_BUILD_ID], (
|
|
'BuildID from apk_elf_result did not match')
|
|
|
|
packed_section_name = None
|
|
architecture = metadata[models.METADATA_ELF_ARCHITECTURE]
|
|
# Packing occurs enabled only arm32 & arm64.
|
|
if architecture == 'arm':
|
|
packed_section_name = '.rel.dyn'
|
|
elif architecture == 'arm64':
|
|
packed_section_name = '.rela.dyn'
|
|
|
|
if packed_section_name:
|
|
logging.debug('Recording size of unpacked relocations')
|
|
if packed_section_name not in section_sizes:
|
|
logging.warning('Packed section not present: %s', packed_section_name)
|
|
else:
|
|
apk_section_sizes['%s (unpacked)' % packed_section_name] = (
|
|
section_sizes.get(packed_section_name))
|
|
return apk_section_sizes, elf_overhead_size
|
|
return section_sizes, 0
|
|
|
|
|
|
def _ParseDexSymbols(section_sizes, apk_path, output_directory):
|
|
symbols = apkanalyzer.CreateDexSymbols(apk_path, output_directory)
|
|
prev = section_sizes.setdefault(models.SECTION_DEX, 0)
|
|
section_sizes[models.SECTION_DEX] = prev + sum(s.size for s in symbols)
|
|
return symbols
|
|
|
|
|
|
def _ParseApkOtherSymbols(section_sizes, apk_path, apk_so_path,
|
|
output_directory, knobs):
|
|
res_source_mapper = _ResourceSourceMapper(apk_path, output_directory, knobs)
|
|
apk_symbols = []
|
|
zip_info_total = 0
|
|
with zipfile.ZipFile(apk_path) as z:
|
|
for zip_info in z.infolist():
|
|
zip_info_total += zip_info.compress_size
|
|
# Skip main shared library, pak, and dex files as they are accounted for.
|
|
if (zip_info.filename == apk_so_path
|
|
or zip_info.filename.endswith('.dex')
|
|
or zip_info.filename.endswith('.pak')):
|
|
continue
|
|
source_path = res_source_mapper.FindSourceForPath(zip_info.filename)
|
|
if source_path is None:
|
|
source_path = os.path.join(models.APK_PREFIX_PATH, zip_info.filename)
|
|
apk_symbols.append(models.Symbol(
|
|
models.SECTION_OTHER, zip_info.compress_size,
|
|
source_path=source_path,
|
|
full_name=zip_info.filename)) # Full name must disambiguate
|
|
overhead_size = os.path.getsize(apk_path) - zip_info_total
|
|
assert overhead_size >= 0, 'Apk overhead must be non-negative'
|
|
zip_overhead_symbol = models.Symbol(
|
|
models.SECTION_OTHER, overhead_size, full_name='Overhead: APK file')
|
|
apk_symbols.append(zip_overhead_symbol)
|
|
prev = section_sizes.setdefault(models.SECTION_OTHER, 0)
|
|
section_sizes[models.SECTION_OTHER] = prev + sum(s.size for s in apk_symbols)
|
|
return apk_symbols
|
|
|
|
|
|
def _FindPakSymbolsFromApk(apk_path, output_directory, knobs):
|
|
with zipfile.ZipFile(apk_path) as z:
|
|
pak_zip_infos = (f for f in z.infolist() if f.filename.endswith('.pak'))
|
|
apk_info_name = os.path.basename(apk_path) + '.pak.info'
|
|
pak_info_path = os.path.join(output_directory, 'size-info', apk_info_name)
|
|
res_info = _ParsePakInfoFile(pak_info_path)
|
|
symbols_by_id = {}
|
|
total_compressed_size = 0
|
|
total_uncompressed_size = 0
|
|
for zip_info in pak_zip_infos:
|
|
contents = data_pack.ReadDataPackFromString(z.read(zip_info))
|
|
compression_ratio = 1.0
|
|
if zip_info.compress_size < zip_info.file_size:
|
|
total_compressed_size += zip_info.compress_size
|
|
total_uncompressed_size += zip_info.file_size
|
|
compression_ratio = knobs.pak_compression_ratio
|
|
_ComputePakFileSymbols(
|
|
zip_info.filename, contents,
|
|
res_info, symbols_by_id, compression_ratio=compression_ratio)
|
|
if total_uncompressed_size > 0:
|
|
actual_ratio = (
|
|
float(total_compressed_size) / total_uncompressed_size)
|
|
logging.info('Pak Compression Ratio: %f Actual: %f Diff: %.0f',
|
|
knobs.pak_compression_ratio, actual_ratio,
|
|
(knobs.pak_compression_ratio - actual_ratio) *
|
|
total_uncompressed_size)
|
|
return symbols_by_id
|
|
|
|
|
|
def _FindPakSymbolsFromFiles(pak_files, pak_info_path, output_directory):
|
|
"""Uses files from args to find and add pak symbols."""
|
|
res_info = _ParsePakInfoFile(pak_info_path)
|
|
symbols_by_id = {}
|
|
for pak_file_path in pak_files:
|
|
with open(pak_file_path, 'r') as f:
|
|
contents = data_pack.ReadDataPackFromString(f.read())
|
|
_ComputePakFileSymbols(
|
|
os.path.relpath(pak_file_path, output_directory), contents, res_info,
|
|
symbols_by_id)
|
|
return symbols_by_id
|
|
|
|
|
|
def _CalculateElfOverhead(section_sizes, elf_path):
|
|
if elf_path:
|
|
section_sizes_total_without_bss = sum(
|
|
s for k, s in section_sizes.iteritems() if k != models.SECTION_BSS)
|
|
elf_overhead_size = (
|
|
os.path.getsize(elf_path) - section_sizes_total_without_bss)
|
|
assert elf_overhead_size >= 0, (
|
|
'Negative ELF overhead {}'.format(elf_overhead_size))
|
|
return elf_overhead_size
|
|
return 0
|
|
|
|
|
|
def CreateSectionSizesAndSymbols(
|
|
map_path=None, tool_prefix=None, output_directory=None, elf_path=None,
|
|
apk_path=None, track_string_literals=True, metadata=None,
|
|
apk_so_path=None, pak_files=None, pak_info_file=None, linker_name=None,
|
|
knobs=SectionSizeKnobs()):
|
|
"""Creates sections sizes and symbols for a SizeInfo.
|
|
|
|
Args:
|
|
map_path: Path to the linker .map(.gz) file to parse.
|
|
elf_path: Path to the corresponding unstripped ELF file. Used to find symbol
|
|
aliases and inlined functions. Can be None.
|
|
tool_prefix: Prefix for c++filt & nm (required).
|
|
output_directory: Build output directory. If None, source_paths and symbol
|
|
alias information will not be recorded.
|
|
track_string_literals: Whether to break down "** merge string" sections into
|
|
smaller symbols (requires output_directory).
|
|
|
|
Returns:
|
|
A tuple of (section_sizes, raw_symbols).
|
|
section_sizes is a dict mapping section names to their size
|
|
raw_symbols is a list of Symbol objects
|
|
"""
|
|
if apk_path and elf_path:
|
|
# Extraction takes around 1 second, so do it in parallel.
|
|
apk_elf_result = concurrent.ForkAndCall(
|
|
_ElfInfoFromApk, (apk_path, apk_so_path, tool_prefix))
|
|
|
|
outdir_context = None
|
|
source_mapper = None
|
|
if output_directory:
|
|
# Start by finding the elf_object_paths, so that nm can run on them while
|
|
# the linker .map is being parsed.
|
|
logging.info('Parsing ninja files.')
|
|
source_mapper, ninja_elf_object_paths = (
|
|
ninja_parser.Parse(output_directory, elf_path))
|
|
logging.debug('Parsed %d .ninja files.', source_mapper.parsed_file_count)
|
|
assert not elf_path or ninja_elf_object_paths, (
|
|
'Failed to find link command in ninja files for ' +
|
|
os.path.relpath(elf_path, output_directory))
|
|
|
|
if ninja_elf_object_paths:
|
|
elf_object_paths, thin_archives = ar.ExpandThinArchives(
|
|
ninja_elf_object_paths, output_directory)
|
|
known_inputs = set(elf_object_paths)
|
|
known_inputs.update(ninja_elf_object_paths)
|
|
else:
|
|
elf_object_paths = None
|
|
known_inputs = None
|
|
# When we don't know which elf file is used, just search all paths.
|
|
thin_archives = set(
|
|
p for p in source_mapper.IterAllPaths()
|
|
if p.endswith('.a') and ar.IsThinArchive(
|
|
os.path.join(output_directory, p)))
|
|
|
|
outdir_context = _OutputDirectoryContext(
|
|
elf_object_paths=elf_object_paths,
|
|
known_inputs=known_inputs,
|
|
output_directory=output_directory,
|
|
source_mapper=source_mapper,
|
|
thin_archives=thin_archives)
|
|
|
|
section_sizes, raw_symbols = _ParseElfInfo(
|
|
map_path, elf_path, tool_prefix, track_string_literals,
|
|
outdir_context=outdir_context, linker_name=linker_name)
|
|
elf_overhead_size = _CalculateElfOverhead(section_sizes, elf_path)
|
|
|
|
pak_symbols_by_id = None
|
|
if apk_path:
|
|
pak_symbols_by_id = _FindPakSymbolsFromApk(apk_path, output_directory,
|
|
knobs)
|
|
if elf_path:
|
|
section_sizes, elf_overhead_size = _ParseApkElfSectionSize(
|
|
section_sizes, metadata, apk_elf_result)
|
|
raw_symbols.extend(
|
|
_ParseDexSymbols(section_sizes, apk_path, output_directory))
|
|
raw_symbols.extend(
|
|
_ParseApkOtherSymbols(section_sizes, apk_path, apk_so_path,
|
|
output_directory, knobs))
|
|
elif pak_files and pak_info_file:
|
|
pak_symbols_by_id = _FindPakSymbolsFromFiles(
|
|
pak_files, pak_info_file, output_directory)
|
|
|
|
if elf_path:
|
|
elf_overhead_symbol = models.Symbol(
|
|
models.SECTION_OTHER, elf_overhead_size, full_name='Overhead: ELF file')
|
|
prev = section_sizes.setdefault(models.SECTION_OTHER, 0)
|
|
section_sizes[models.SECTION_OTHER] = prev + elf_overhead_size
|
|
raw_symbols.append(elf_overhead_symbol)
|
|
|
|
if pak_symbols_by_id:
|
|
object_paths = (p for p in source_mapper.IterAllPaths() if p.endswith('.o'))
|
|
pak_raw_symbols = _ParsePakSymbols(
|
|
section_sizes, object_paths, output_directory, pak_symbols_by_id)
|
|
raw_symbols.extend(pak_raw_symbols)
|
|
|
|
_ExtractSourcePathsAndNormalizeObjectPaths(raw_symbols, source_mapper)
|
|
_PopulateComponents(raw_symbols, knobs)
|
|
logging.info('Converting excessive aliases into shared-path symbols')
|
|
_CompactLargeAliasesIntoSharedSymbols(raw_symbols, knobs)
|
|
logging.debug('Connecting nm aliases')
|
|
_ConnectNmAliases(raw_symbols)
|
|
return section_sizes, raw_symbols
|
|
|
|
|
|
def CreateSizeInfo(
|
|
section_sizes, raw_symbols, metadata=None, normalize_names=True):
|
|
"""Performs operations on all symbols and creates a SizeInfo object."""
|
|
logging.debug('Sorting %d symbols', len(raw_symbols))
|
|
# TODO(agrieve): Either change this sort so that it's only sorting by section
|
|
# (and not using .sort()), or have it specify a total ordering (which must
|
|
# also include putting padding-only symbols before others of the same
|
|
# address). Note: The sort as-is takes ~1.5 seconds.
|
|
raw_symbols.sort(key=lambda s: (
|
|
s.IsPak(), s.IsBss(), s.section_name, s.address))
|
|
logging.info('Processed %d symbols', len(raw_symbols))
|
|
|
|
# Padding not really required, but it is useful to check for large padding and
|
|
# log a warning.
|
|
logging.info('Calculating padding')
|
|
_CalculatePadding(raw_symbols)
|
|
|
|
# Do not call _NormalizeNames() during archive since that method tends to need
|
|
# tweaks over time. Calling it only when loading .size files allows for more
|
|
# flexability.
|
|
if normalize_names:
|
|
_NormalizeNames(raw_symbols)
|
|
|
|
return models.SizeInfo(section_sizes, raw_symbols, metadata=metadata)
|
|
|
|
|
|
def _DetectGitRevision(directory):
|
|
"""Runs git rev-parse to get the SHA1 hash of the current revision.
|
|
|
|
Args:
|
|
directory: Path to directory where rev-parse command will be run.
|
|
|
|
Returns:
|
|
A string with the SHA1 hash, or None if an error occured.
|
|
"""
|
|
try:
|
|
git_rev = subprocess.check_output(
|
|
['git', '-C', directory, 'rev-parse', 'HEAD'])
|
|
return git_rev.rstrip()
|
|
except Exception:
|
|
logging.warning('Failed to detect git revision for file metadata.')
|
|
return None
|
|
|
|
|
|
def BuildIdFromElf(elf_path, tool_prefix):
|
|
args = [path_util.GetReadElfPath(tool_prefix), '-n', elf_path]
|
|
stdout = subprocess.check_output(args)
|
|
match = re.search(r'Build ID: (\w+)', stdout)
|
|
assert match, 'Build ID not found from running: ' + ' '.join(args)
|
|
return match.group(1)
|
|
|
|
|
|
def _SectionSizesFromElf(elf_path, tool_prefix):
|
|
args = [path_util.GetReadElfPath(tool_prefix), '-S', '--wide', elf_path]
|
|
stdout = subprocess.check_output(args)
|
|
section_sizes = {}
|
|
# Matches [ 2] .hash HASH 00000000006681f0 0001f0 003154 04 A 3 0 8
|
|
for match in re.finditer(r'\[[\s\d]+\] (\..*)$', stdout, re.MULTILINE):
|
|
items = match.group(1).split()
|
|
section_sizes[items[0]] = int(items[4], 16)
|
|
return section_sizes
|
|
|
|
|
|
def _ArchFromElf(elf_path, tool_prefix):
|
|
args = [path_util.GetReadElfPath(tool_prefix), '-h', elf_path]
|
|
stdout = subprocess.check_output(args)
|
|
machine = re.search('Machine:\s*(.+)', stdout).group(1)
|
|
if machine == 'Intel 80386':
|
|
return 'x86'
|
|
if machine == 'Advanced Micro Devices X86-64':
|
|
return 'x64'
|
|
elif machine == 'ARM':
|
|
return 'arm'
|
|
elif machine == 'AArch64':
|
|
return 'arm64'
|
|
return machine
|
|
|
|
|
|
def _ParseGnArgs(args_path):
|
|
"""Returns a list of normalized "key=value" strings."""
|
|
args = {}
|
|
with open(args_path) as f:
|
|
for l in f:
|
|
# Strips #s even if within string literal. Not a problem in practice.
|
|
parts = l.split('#')[0].split('=')
|
|
if len(parts) != 2:
|
|
continue
|
|
args[parts[0].strip()] = parts[1].strip()
|
|
return ["%s=%s" % x for x in sorted(args.iteritems())]
|
|
|
|
|
|
def _DetectLinkerName(map_path):
|
|
with _OpenMaybeGz(map_path) as map_file:
|
|
return linker_map_parser.DetectLinkerNameFromMapFile(map_file)
|
|
|
|
|
|
def _ElfInfoFromApk(apk_path, apk_so_path, tool_prefix):
|
|
"""Returns a tuple of (build_id, section_sizes)."""
|
|
with zipfile.ZipFile(apk_path) as apk, \
|
|
tempfile.NamedTemporaryFile() as f:
|
|
f.write(apk.read(apk_so_path))
|
|
f.flush()
|
|
build_id = BuildIdFromElf(f.name, tool_prefix)
|
|
section_sizes = _SectionSizesFromElf(f.name, tool_prefix)
|
|
elf_overhead_size = _CalculateElfOverhead(section_sizes, f.name)
|
|
return build_id, section_sizes, elf_overhead_size
|
|
|
|
|
|
def _AutoIdentifyInputFile(args):
|
|
file_output = subprocess.check_output(['file', args.f])
|
|
format_text = file_output[file_output.find(': ') + 2:]
|
|
# File-not-found -> 'cannot ...' and directory -> 'directory', which don't
|
|
# match anything here, so they are handled by the final 'return False'.
|
|
if (format_text.startswith('Java archive data') or
|
|
format_text.startswith('Zip archive data')):
|
|
logging.info('Auto-identified --apk-file.')
|
|
args.apk_file = args.f
|
|
return True
|
|
if format_text.startswith('ELF '):
|
|
logging.info('Auto-identified --elf-file.')
|
|
args.elf_file = args.f
|
|
return True
|
|
if format_text.startswith('ASCII text'):
|
|
logging.info('Auto-identified --map-file.')
|
|
args.map_file = args.f
|
|
return True
|
|
return False
|
|
|
|
|
|
def AddMainPathsArguments(parser):
|
|
"""Add arguments for DeduceMainPaths()."""
|
|
parser.add_argument('-f', metavar='FILE',
|
|
help='Auto-identify input file type.')
|
|
parser.add_argument('--apk-file',
|
|
help='.apk file to measure. When set, --elf-file will be '
|
|
'derived (if unset). Providing the .apk allows '
|
|
'for the size of packed relocations to be recorded')
|
|
parser.add_argument('--elf-file',
|
|
help='Path to input ELF file. Currently used for '
|
|
'capturing metadata.')
|
|
parser.add_argument('--map-file',
|
|
help='Path to input .map(.gz) file. Defaults to '
|
|
'{{elf_file}}.map(.gz)?. If given without '
|
|
'--elf-file, no size metadata will be recorded.')
|
|
parser.add_argument('--no-source-paths', action='store_true',
|
|
help='Do not use .ninja files to map '
|
|
'object_path -> source_path')
|
|
parser.add_argument('--output-directory',
|
|
help='Path to the root build directory.')
|
|
parser.add_argument('--tool-prefix',
|
|
help='Path prefix for c++filt, nm, readelf.')
|
|
|
|
|
|
def AddArguments(parser):
|
|
parser.add_argument('size_file', help='Path to output .size file.')
|
|
parser.add_argument('--pak-file', action='append',
|
|
help='Paths to pak files.')
|
|
parser.add_argument('--pak-info-file',
|
|
help='This file should contain all ids found in the pak '
|
|
'files that have been passed in.')
|
|
parser.add_argument('--no-string-literals', dest='track_string_literals',
|
|
default=True, action='store_false',
|
|
help='Disable breaking down "** merge strings" into more '
|
|
'granular symbols.')
|
|
parser.add_argument('--source-directory',
|
|
help='Custom path to the root source directory.')
|
|
AddMainPathsArguments(parser)
|
|
|
|
|
|
def DeduceMainPaths(args, parser):
|
|
"""Computes main paths based on input, and deduces them if needed."""
|
|
if args.f is not None:
|
|
if not _AutoIdentifyInputFile(args):
|
|
parser.error('Cannot find or identify file %s' % args.f)
|
|
|
|
apk_path = args.apk_file
|
|
elf_path = args.elf_file
|
|
map_path = args.map_file
|
|
any_input = apk_path or elf_path or map_path
|
|
if not any_input:
|
|
parser.error('Must pass at least one of --apk-file, --elf-file, --map-file')
|
|
output_directory_finder = path_util.OutputDirectoryFinder(
|
|
value=args.output_directory,
|
|
any_path_within_output_directory=any_input)
|
|
|
|
apk_so_path = None
|
|
if apk_path:
|
|
with zipfile.ZipFile(apk_path) as z:
|
|
lib_infos = [f for f in z.infolist()
|
|
if f.filename.endswith('.so') and f.file_size > 0]
|
|
assert lib_infos, 'APK has no .so files.'
|
|
# TODO(agrieve): Add support for multiple .so files, and take into account
|
|
# secondary architectures.
|
|
apk_so_path = max(lib_infos, key=lambda x:x.file_size).filename
|
|
logging.debug('Sub-apk path=%s', apk_so_path)
|
|
if not elf_path and output_directory_finder.Tentative():
|
|
elf_path = os.path.join(
|
|
output_directory_finder.Tentative(), 'lib.unstripped',
|
|
os.path.basename(apk_so_path.replace('crazy.', '')))
|
|
logging.debug('Detected --elf-file=%s', elf_path)
|
|
|
|
if map_path:
|
|
if not map_path.endswith('.map') and not map_path.endswith('.map.gz'):
|
|
parser.error('Expected --map-file to end with .map or .map.gz')
|
|
else:
|
|
map_path = elf_path + '.map'
|
|
if not os.path.exists(map_path):
|
|
map_path += '.gz'
|
|
if not os.path.exists(map_path):
|
|
parser.error('Could not find .map(.gz)? file. Ensure you have built with '
|
|
'is_official_build=true, or use --map-file to point me a '
|
|
'linker map file.')
|
|
|
|
linker_name = _DetectLinkerName(map_path)
|
|
logging.info('Linker name: %s' % linker_name)
|
|
tool_prefix_finder = path_util.ToolPrefixFinder(
|
|
value=args.tool_prefix,
|
|
output_directory_finder=output_directory_finder,
|
|
linker_name=linker_name)
|
|
tool_prefix = tool_prefix_finder.Finalized()
|
|
output_directory = None
|
|
if not args.no_source_paths:
|
|
output_directory = output_directory_finder.Finalized()
|
|
return (output_directory, tool_prefix, apk_path, apk_so_path, elf_path,
|
|
map_path, linker_name)
|
|
|
|
|
|
def Run(args, parser):
|
|
if not args.size_file.endswith('.size'):
|
|
parser.error('size_file must end with .size')
|
|
|
|
(output_directory, tool_prefix, apk_path, apk_so_path, elf_path, map_path,
|
|
linker_name) = (DeduceMainPaths(args, parser))
|
|
|
|
metadata = CreateMetadata(map_path, elf_path, apk_path, tool_prefix,
|
|
output_directory, linker_name)
|
|
|
|
knobs = SectionSizeKnobs()
|
|
if args.source_directory:
|
|
knobs.src_root = args.source_directory
|
|
|
|
section_sizes, raw_symbols = CreateSectionSizesAndSymbols(
|
|
map_path=map_path, tool_prefix=tool_prefix, elf_path=elf_path,
|
|
apk_path=apk_path, output_directory=output_directory,
|
|
track_string_literals=args.track_string_literals,
|
|
metadata=metadata, apk_so_path=apk_so_path,
|
|
pak_files=args.pak_file, pak_info_file=args.pak_info_file,
|
|
linker_name=linker_name, knobs=knobs)
|
|
size_info = CreateSizeInfo(
|
|
section_sizes, raw_symbols, metadata=metadata, normalize_names=False)
|
|
|
|
if logging.getLogger().isEnabledFor(logging.INFO):
|
|
for line in describe.DescribeSizeInfoCoverage(size_info):
|
|
logging.info(line)
|
|
logging.info('Recorded info for %d symbols', len(size_info.raw_symbols))
|
|
logging.info('Recording metadata: \n %s',
|
|
'\n '.join(describe.DescribeMetadata(size_info.metadata)))
|
|
logging.info('Saving result to %s', args.size_file)
|
|
file_format.SaveSizeInfo(size_info, args.size_file)
|
|
size_in_mb = os.path.getsize(args.size_file) / 1024.0 / 1024.0
|
|
logging.info('Done. File size is %.2fMiB.', size_in_mb)
|