mirror of
https://github.com/klzgrad/naiveproxy.git
synced 2024-12-01 09:46:09 +03:00
195 lines
6.6 KiB
Python
195 lines
6.6 KiB
Python
# Copyright 2017 The Chromium Authors. All rights reserved.
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
"""Runs nm on specified .a and .o file, plus some analysis.
|
|
|
|
CollectAliasesByAddress():
|
|
Runs nm on the elf to collect all symbol names. This reveals symbol names of
|
|
identical-code-folded functions.
|
|
|
|
CollectAliasesByAddressAsync():
|
|
Runs CollectAliasesByAddress in a subprocess and returns a promise.
|
|
|
|
RunNmOnIntermediates():
|
|
BulkForkAndCall() target: Runs nm on a .a file or a list of .o files, parses
|
|
the output, extracts symbol information, and (if available) extracts string
|
|
offset information.
|
|
"""
|
|
|
|
import collections
|
|
import subprocess
|
|
|
|
import concurrent
|
|
import demangle
|
|
import path_util
|
|
|
|
|
|
def _IsRelevantNmName(name):
|
|
# Skip lines like:
|
|
# 00000000 t $t
|
|
# 00000000 r $d.23
|
|
# 00000344 N
|
|
return name and not name.startswith('$')
|
|
|
|
|
|
def _IsRelevantObjectFileName(name):
|
|
# Prevent marking compiler-generated symbols as candidates for shared paths.
|
|
# E.g., multiple files might have "CSWTCH.12", but they are different symbols.
|
|
#
|
|
# Find these via:
|
|
# size_info.symbols.GroupedByFullName(min_count=-2).Filter(
|
|
# lambda s: s.WhereObjectPathMatches('{')).SortedByCount()
|
|
# and then search for {shared}.
|
|
# List of names this applies to:
|
|
# startup
|
|
# __tcf_0 <-- Generated for global destructors.
|
|
# ._79
|
|
# .Lswitch.table, .Lswitch.table.12
|
|
# CSWTCH.12
|
|
# lock.12
|
|
# table.12
|
|
# __compound_literal.12
|
|
# .L.ref.tmp.1
|
|
# .L.str, .L.str.3
|
|
# .L__func__.main: (when using __func__)
|
|
# .L__FUNCTION__._ZN6webrtc17AudioDeviceBuffer11StopPlayoutEv
|
|
# .L__PRETTY_FUNCTION__._Unwind_Resume
|
|
# .L_ZZ24ScaleARGBFilterCols_NEONE9dx_offset (an array literal)
|
|
if name in ('__tcf_0', 'startup'):
|
|
return False
|
|
if name.startswith('._') and name[2:].isdigit():
|
|
return False
|
|
if name.startswith('.L') and name.find('.', 2) != -1:
|
|
return False
|
|
|
|
dot_idx = name.find('.')
|
|
if dot_idx == -1:
|
|
return True
|
|
name = name[:dot_idx]
|
|
|
|
return name not in ('CSWTCH', 'lock', '__compound_literal', 'table')
|
|
|
|
|
|
def CollectAliasesByAddress(elf_path, tool_prefix):
|
|
"""Runs nm on |elf_path| and returns a dict of address->[names]"""
|
|
# Constructors often show up twice, so use sets to ensure no duplicates.
|
|
names_by_address = collections.defaultdict(set)
|
|
|
|
# About 60mb of output, but piping takes ~30s, and loading it into RAM
|
|
# directly takes 3s.
|
|
args = [path_util.GetNmPath(tool_prefix), '--no-sort', '--defined-only',
|
|
elf_path]
|
|
output = subprocess.check_output(args)
|
|
for line in output.splitlines():
|
|
space_idx = line.find(' ')
|
|
address_str = line[:space_idx]
|
|
section = line[space_idx + 1]
|
|
mangled_name = line[space_idx + 3:]
|
|
|
|
# To verify that rodata does not have aliases:
|
|
# nm --no-sort --defined-only libchrome.so > nm.out
|
|
# grep -v '\$' nm.out | grep ' r ' | sort | cut -d' ' -f1 > addrs
|
|
# wc -l < addrs; uniq < addrs | wc -l
|
|
if section not in 'tTW' or not _IsRelevantNmName(mangled_name):
|
|
continue
|
|
|
|
address = int(address_str, 16)
|
|
if not address:
|
|
continue
|
|
names_by_address[address].add(mangled_name)
|
|
|
|
# Demangle all names.
|
|
names_by_address = demangle.DemangleSetsInDicts(names_by_address, tool_prefix)
|
|
|
|
# Since this is run in a separate process, minimize data passing by returning
|
|
# only aliased symbols.
|
|
# Also: Sort to ensure stable ordering.
|
|
return {k: sorted(v) for k, v in names_by_address.iteritems() if len(v) > 1}
|
|
|
|
|
|
def _CollectAliasesByAddressAsyncHelper(elf_path, tool_prefix):
|
|
result = CollectAliasesByAddress(elf_path, tool_prefix)
|
|
return concurrent.EncodeDictOfLists(result, key_transform=str)
|
|
|
|
|
|
def CollectAliasesByAddressAsync(elf_path, tool_prefix):
|
|
"""Calls CollectAliasesByAddress in a helper process. Returns a Result."""
|
|
def decode(encoded):
|
|
return concurrent.DecodeDictOfLists(encoded, key_transform=int)
|
|
return concurrent.ForkAndCall(
|
|
_CollectAliasesByAddressAsyncHelper, (elf_path, tool_prefix),
|
|
decode_func=decode)
|
|
|
|
|
|
def _ParseOneObjectFileNmOutput(lines):
|
|
# Constructors are often repeated because they have the same unmangled
|
|
# name, but multiple mangled names. See:
|
|
# https://stackoverflow.com/questions/6921295/dual-emission-of-constructor-symbols
|
|
symbol_names = set()
|
|
string_addresses = []
|
|
for line in lines:
|
|
if not line:
|
|
break
|
|
space_idx = line.find(' ') # Skip over address.
|
|
section = line[space_idx + 1]
|
|
mangled_name = line[space_idx + 3:]
|
|
if _IsRelevantNmName(mangled_name):
|
|
# Refer to _IsRelevantObjectFileName() for examples of names.
|
|
if section == 'r' and (
|
|
mangled_name.startswith('.L.str') or
|
|
mangled_name.startswith('.L__') and mangled_name.find('.', 3) != -1):
|
|
# Leave as a string for easier marshalling.
|
|
string_addresses.append(line[:space_idx].lstrip('0') or '0')
|
|
elif _IsRelevantObjectFileName(mangled_name):
|
|
symbol_names.add(mangled_name)
|
|
return symbol_names, string_addresses
|
|
|
|
|
|
# This is a target for BulkForkAndCall().
|
|
def RunNmOnIntermediates(target, tool_prefix, output_directory):
|
|
"""Returns encoded_symbol_names_by_path, encoded_string_addresses_by_path.
|
|
|
|
Args:
|
|
target: Either a single path to a .a (as a string), or a list of .o paths.
|
|
"""
|
|
is_archive = isinstance(target, basestring)
|
|
args = [path_util.GetNmPath(tool_prefix), '--no-sort', '--defined-only']
|
|
if is_archive:
|
|
args.append(target)
|
|
else:
|
|
args.extend(target)
|
|
output = subprocess.check_output(args, cwd=output_directory)
|
|
lines = output.splitlines()
|
|
# Empty .a file has no output.
|
|
if not lines:
|
|
return concurrent.EMPTY_ENCODED_DICT, concurrent.EMPTY_ENCODED_DICT
|
|
is_multi_file = not lines[0]
|
|
lines = iter(lines)
|
|
if is_multi_file:
|
|
next(lines)
|
|
path = next(lines)[:-1] # Path ends with a colon.
|
|
else:
|
|
assert not is_archive
|
|
path = target[0]
|
|
|
|
symbol_names_by_path = {}
|
|
string_addresses_by_path = {}
|
|
while path:
|
|
if is_archive:
|
|
# E.g. foo/bar.a(baz.o)
|
|
path = '%s(%s)' % (target, path)
|
|
|
|
mangled_symbol_names, string_addresses = _ParseOneObjectFileNmOutput(lines)
|
|
symbol_names_by_path[path] = mangled_symbol_names
|
|
if string_addresses:
|
|
string_addresses_by_path[path] = string_addresses
|
|
path = next(lines, ':')[:-1]
|
|
|
|
# The multiprocess API uses pickle, which is ridiculously slow. More than 2x
|
|
# faster to use join & split.
|
|
# TODO(agrieve): We could use path indices as keys rather than paths to cut
|
|
# down on marshalling overhead.
|
|
return (concurrent.EncodeDictOfLists(symbol_names_by_path),
|
|
concurrent.EncodeDictOfLists(string_addresses_by_path))
|