mirror of
https://github.com/klzgrad/naiveproxy.git
synced 2024-11-24 22:36:09 +03:00
420 lines
15 KiB
Python
420 lines
15 KiB
Python
|
# Copyright 2018 The Chromium Authors. All rights reserved.
|
||
|
# Use of this source code is governed by a BSD-style license that can be
|
||
|
# found in the LICENSE file.
|
||
|
|
||
|
import logging
|
||
|
import os
|
||
|
import re
|
||
|
import struct
|
||
|
import zipfile
|
||
|
|
||
|
# The default zipfile python module cannot open APKs properly, but this
|
||
|
# fixes it. Note that simply importing this file is sufficient to
|
||
|
# ensure that zip works correctly for all other modules. See:
|
||
|
# http://bugs.python.org/issue14315
|
||
|
# https://hg.python.org/cpython/rev/6dd5e9556a60#l2.8
|
||
|
def _PatchZipFile():
|
||
|
# pylint: disable=protected-access
|
||
|
oldDecodeExtra = zipfile.ZipInfo._decodeExtra
|
||
|
def decodeExtra(self):
|
||
|
try:
|
||
|
oldDecodeExtra(self)
|
||
|
except struct.error:
|
||
|
pass
|
||
|
zipfile.ZipInfo._decodeExtra = decodeExtra
|
||
|
_PatchZipFile()
|
||
|
|
||
|
|
||
|
class ApkZipInfo(object):
|
||
|
"""Models a single file entry from an ApkReader.
|
||
|
|
||
|
This is very similar to the zipfile.ZipInfo class. It provides a few
|
||
|
properties describing the entry:
|
||
|
- filename (same as ZipInfo.filename)
|
||
|
- file_size (same as ZipInfo.file_size)
|
||
|
- compress_size (same as ZipInfo.file_size)
|
||
|
- file_offset (note: not provided by ZipInfo)
|
||
|
|
||
|
And a few useful methods: IsCompressed() and IsElfFile().
|
||
|
|
||
|
Entries can be created by using ApkReader() methods.
|
||
|
"""
|
||
|
def __init__(self, zip_file, zip_info):
|
||
|
"""Construct instance. Do not call this directly. Use ApkReader methods."""
|
||
|
self._file = zip_file
|
||
|
self._info = zip_info
|
||
|
self._file_offset = None
|
||
|
|
||
|
@property
|
||
|
def filename(self):
|
||
|
"""Entry's file path within APK."""
|
||
|
return self._info.filename
|
||
|
|
||
|
@property
|
||
|
def file_size(self):
|
||
|
"""Entry's extracted file size in bytes."""
|
||
|
return self._info.file_size
|
||
|
|
||
|
@property
|
||
|
def compress_size(self):
|
||
|
"""Entry' s compressed file size in bytes."""
|
||
|
return self._info.compress_size
|
||
|
|
||
|
@property
|
||
|
def file_offset(self):
|
||
|
"""Entry's starting file offset in the APK."""
|
||
|
if self._file_offset is None:
|
||
|
self._file_offset = self._ZipFileOffsetFromLocalHeader(
|
||
|
self._file.fp, self._info.header_offset)
|
||
|
return self._file_offset
|
||
|
|
||
|
def __repr__(self):
|
||
|
"""Convert to string for debugging."""
|
||
|
return 'ApkZipInfo["%s",size=0x%x,compressed=0x%x,offset=0x%x]' % (
|
||
|
self.filename, self.file_size, self.compress_size, self.file_offset)
|
||
|
|
||
|
def IsCompressed(self):
|
||
|
"""Returns True iff the entry is compressed."""
|
||
|
return self._info.compress_type != zipfile.ZIP_STORED
|
||
|
|
||
|
def IsElfFile(self):
|
||
|
"""Returns True iff the entry is an ELF file."""
|
||
|
with self._file.open(self._info, 'r') as f:
|
||
|
return f.read(4) == '\x7fELF'
|
||
|
|
||
|
@staticmethod
|
||
|
def _ZipFileOffsetFromLocalHeader(fd, local_header_offset):
|
||
|
"""Return a file's start offset from its zip archive local header.
|
||
|
|
||
|
Args:
|
||
|
fd: Input file object.
|
||
|
local_header_offset: Local header offset (from its ZipInfo entry).
|
||
|
Returns:
|
||
|
file start offset.
|
||
|
"""
|
||
|
FILE_NAME_LEN_OFFSET = 26
|
||
|
FILE_NAME_OFFSET = 30
|
||
|
fd.seek(local_header_offset + FILE_NAME_LEN_OFFSET)
|
||
|
file_name_len = struct.unpack('H', fd.read(2))[0]
|
||
|
extra_field_len = struct.unpack('H', fd.read(2))[0]
|
||
|
file_offset = (local_header_offset + FILE_NAME_OFFSET +
|
||
|
file_name_len + extra_field_len)
|
||
|
return file_offset
|
||
|
|
||
|
|
||
|
class ApkReader(object):
|
||
|
"""A convenience class used to read the content of APK files.
|
||
|
|
||
|
Its design is very similar to the one from zipfile.ZipFile, except
|
||
|
that its returns ApkZipInfo entries which provide a |file_offset|
|
||
|
property that can be used to know where a given file is located inside
|
||
|
the archive.
|
||
|
|
||
|
It is also easy to mock for unit-testing (see MockApkReader in
|
||
|
apk_utils_unittest.py) without creating any files on disk.
|
||
|
|
||
|
Usage is the following:
|
||
|
- Create an instance using a with statement (for proper unit-testing).
|
||
|
- Call ListEntries() to list all entries in the archive. This returns
|
||
|
a list of ApkZipInfo entries.
|
||
|
- Or call FindEntry() corresponding to a given path within the archive.
|
||
|
|
||
|
For example:
|
||
|
with ApkReader(input_apk_path) as reader:
|
||
|
info = reader.FindEntry('lib/armeabi-v7a/libfoo.so')
|
||
|
if info.IsCompressed() or not info.IsElfFile():
|
||
|
raise Exception('Invalid library path")
|
||
|
|
||
|
The ApkZipInfo can be used to inspect the entry's metadata, or read its
|
||
|
content with the ReadAll() method. See its documentation for all details.
|
||
|
"""
|
||
|
def __init__(self, apk_path):
|
||
|
"""Initialize instance."""
|
||
|
self._zip_file = zipfile.ZipFile(apk_path, 'r')
|
||
|
self._path = apk_path
|
||
|
|
||
|
def __enter__(self):
|
||
|
"""Python context manager entry."""
|
||
|
return self
|
||
|
|
||
|
def __exit__(self, *kwargs):
|
||
|
"""Python context manager exit."""
|
||
|
self.Close()
|
||
|
|
||
|
@property
|
||
|
def path(self):
|
||
|
"""The corresponding input APK path."""
|
||
|
return self._path
|
||
|
|
||
|
def Close(self):
|
||
|
"""Close the reader (and underlying ZipFile instance)."""
|
||
|
self._zip_file.close()
|
||
|
|
||
|
def ListEntries(self):
|
||
|
"""Return a list of ApkZipInfo entries for this APK."""
|
||
|
result = []
|
||
|
for info in self._zip_file.infolist():
|
||
|
result.append(ApkZipInfo(self._zip_file, info))
|
||
|
return result
|
||
|
|
||
|
def FindEntry(self, file_path):
|
||
|
"""Return an ApkZipInfo instance for a given archive file path.
|
||
|
|
||
|
Args:
|
||
|
file_path: zip file path.
|
||
|
Return:
|
||
|
A new ApkZipInfo entry on success.
|
||
|
Raises:
|
||
|
KeyError on failure (entry not found).
|
||
|
"""
|
||
|
info = self._zip_file.getinfo(file_path)
|
||
|
return ApkZipInfo(self._zip_file, info)
|
||
|
|
||
|
|
||
|
|
||
|
class ApkNativeLibraries(object):
|
||
|
"""A class for the list of uncompressed shared libraries inside an APK.
|
||
|
|
||
|
Create a new instance by passing the path to an input APK, then use
|
||
|
the FindLibraryByOffset() method to find the native shared library path
|
||
|
corresponding to a given file offset.
|
||
|
|
||
|
GetAbiList() and GetLibrariesList() can also be used to inspect
|
||
|
the state of the instance.
|
||
|
"""
|
||
|
def __init__(self, apk_reader):
|
||
|
"""Initialize instance.
|
||
|
|
||
|
Args:
|
||
|
apk_reader: An ApkReader instance corresponding to the input APK.
|
||
|
"""
|
||
|
self._native_libs = []
|
||
|
for entry in apk_reader.ListEntries():
|
||
|
# Chromium uses so-called 'placeholder' native shared libraries
|
||
|
# that have a size of 0, and are only used to deal with bugs in
|
||
|
# older Android system releases (they are never loaded and cannot
|
||
|
# appear in stack traces). Ignore these here to avoid generating
|
||
|
# confusing results.
|
||
|
if entry.file_size == 0:
|
||
|
continue
|
||
|
|
||
|
# Only uncompressed libraries can appear in stack traces.
|
||
|
if entry.IsCompressed():
|
||
|
continue
|
||
|
|
||
|
# Only consider files within lib/ and with a filename ending with .so
|
||
|
# at the moment. NOTE: Do not require a 'lib' prefix, since that would
|
||
|
# prevent finding the 'crazy.libXXX.so' libraries used by Chromium.
|
||
|
if (not entry.filename.startswith('lib/') or
|
||
|
not entry.filename.endswith('.so')):
|
||
|
continue
|
||
|
|
||
|
lib_path = entry.filename
|
||
|
|
||
|
self._native_libs.append(
|
||
|
(lib_path, entry.file_offset, entry.file_offset + entry.file_size))
|
||
|
|
||
|
def IsEmpty(self):
|
||
|
"""Return true iff the list is empty."""
|
||
|
return not bool(self._native_libs)
|
||
|
|
||
|
def GetLibraries(self):
|
||
|
"""Return the list of all library paths in this instance."""
|
||
|
return sorted([x[0] for x in self._native_libs])
|
||
|
|
||
|
def GetDumpList(self):
|
||
|
"""Retrieve full library map.
|
||
|
|
||
|
Returns:
|
||
|
A list of (lib_path, file_offset, file_size) tuples, sorted
|
||
|
in increasing |file_offset| values.
|
||
|
"""
|
||
|
result = []
|
||
|
for entry in self._native_libs:
|
||
|
lib_path, file_start, file_end = entry
|
||
|
result.append((lib_path, file_start, file_end - file_start))
|
||
|
|
||
|
return sorted(result, lambda x, y: cmp(x[1], y[1]))
|
||
|
|
||
|
def FindLibraryByOffset(self, file_offset):
|
||
|
"""Find the native library at a given file offset.
|
||
|
|
||
|
Args:
|
||
|
file_offset: File offset within the original APK.
|
||
|
Returns:
|
||
|
Returns a (lib_path, lib_offset) tuple on success, or (None, 0)
|
||
|
on failure. Note that lib_path will omit the 'lib/$ABI/' prefix,
|
||
|
lib_offset is the adjustment of file_offset within the library.
|
||
|
"""
|
||
|
for lib_path, start_offset, end_offset in self._native_libs:
|
||
|
if file_offset >= start_offset and file_offset < end_offset:
|
||
|
return (lib_path, file_offset - start_offset)
|
||
|
|
||
|
return (None, 0)
|
||
|
|
||
|
|
||
|
class ApkLibraryPathTranslator(object):
|
||
|
"""Translates APK file paths + byte offsets into library path + offset.
|
||
|
|
||
|
The purpose of this class is to translate a native shared library path
|
||
|
that points to an APK into a new device-specific path that points to a
|
||
|
native shared library, as if it was installed there. E.g.:
|
||
|
|
||
|
('/data/data/com.example.app-1/base.apk', 0x123be00)
|
||
|
|
||
|
would be translated into:
|
||
|
|
||
|
('/data/data/com.example.app-1/base.apk!lib/libfoo.so', 0x3be00)
|
||
|
|
||
|
If the original APK (installed as base.apk) contains an uncompressed shared
|
||
|
library under lib/armeabi-v7a/libfoo.so at offset 0x120000.
|
||
|
|
||
|
Note that the virtual device path after the ! doesn't necessarily match
|
||
|
the path inside the .apk. This doesn't really matter for the rest of
|
||
|
the symbolization functions since only the file's base name can be used
|
||
|
to find the corresponding file on the host.
|
||
|
|
||
|
Usage is the following:
|
||
|
|
||
|
1/ Create new instance.
|
||
|
|
||
|
2/ Call AddHostApk() one or several times to add the host path
|
||
|
of an APK, its package name, and device-installed named.
|
||
|
|
||
|
3/ Call TranslatePath() to translate a (path, offset) tuple corresponding
|
||
|
to an on-device APK, into the corresponding virtual device library
|
||
|
path and offset.
|
||
|
"""
|
||
|
|
||
|
# Depending on the version of the system, a non-system APK might be installed
|
||
|
# on a path that looks like the following:
|
||
|
#
|
||
|
# * /data/..../<package_name>-<number>.apk, where <number> is used to
|
||
|
# distinguish several versions of the APK during package updates.
|
||
|
#
|
||
|
# * /data/..../<package_name>-<suffix>/base.apk, where <suffix> is a
|
||
|
# string of random ASCII characters following the dash after the
|
||
|
# package name. This serves as a way to distinguish the installation
|
||
|
# paths during package update, and randomize its final location
|
||
|
# (to prevent apps from hard-coding the paths to other apps).
|
||
|
#
|
||
|
# Note that the 'base.apk' name comes from the system.
|
||
|
#
|
||
|
# * /data/.../<package_name>-<suffix>/<split_name>.apk, where <suffix>
|
||
|
# is the same as above, and <split_name> is the name of am app bundle
|
||
|
# split APK.
|
||
|
#
|
||
|
# System APKs are installed on paths that look like /system/app/Foo.apk
|
||
|
# but this class ignores them intentionally.
|
||
|
|
||
|
# Compiler regular expression for the first format above.
|
||
|
_RE_APK_PATH_1 = re.compile(
|
||
|
r'/data/.*/(?P<package_name>[A-Za-z0-9_.]+)-(?P<version>[0-9]+)\.apk')
|
||
|
|
||
|
# Compiled regular expression for the second and third formats above.
|
||
|
_RE_APK_PATH_2 = re.compile(
|
||
|
r'/data/.*/(?P<package_name>[A-Za-z0-9_.]+)-(?P<suffix>[^/]+)/' +
|
||
|
r'(?P<apk_name>.+\.apk)')
|
||
|
|
||
|
def __init__(self):
|
||
|
"""Initialize instance. Call AddHostApk() to add host apk file paths."""
|
||
|
self._path_map = {} # Maps (package_name, apk_name) to host-side APK path.
|
||
|
self._libs_map = {} # Maps APK host path to ApkNativeLibrariesMap instance.
|
||
|
|
||
|
def AddHostApk(self, package_name, native_libs, device_apk_name=None):
|
||
|
"""Add a file path to the host APK search list.
|
||
|
|
||
|
Args:
|
||
|
package_name: Corresponding apk package name.
|
||
|
native_libs: ApkNativeLibraries instance for the corresponding APK.
|
||
|
device_apk_name: Optional expected name of the installed APK on the
|
||
|
device. This is only useful when symbolizing app bundle that run on
|
||
|
Android L+. I.e. it will be ignored in other cases.
|
||
|
"""
|
||
|
if native_libs.IsEmpty():
|
||
|
logging.debug('Ignoring host APK without any uncompressed native ' +
|
||
|
'libraries: %s', device_apk_name)
|
||
|
return
|
||
|
|
||
|
# If the APK name is not provided, use the default of 'base.apk'. This
|
||
|
# will be ignored if we find <package_name>-<number>.apk file paths
|
||
|
# in the input, but will work properly for Android L+, as long as we're
|
||
|
# not using Android app bundles.
|
||
|
device_apk_name = device_apk_name or 'base.apk'
|
||
|
|
||
|
key = "%s/%s" % (package_name, device_apk_name)
|
||
|
if key in self._libs_map:
|
||
|
raise KeyError('There is already an APK associated with (%s)' % key)
|
||
|
|
||
|
self._libs_map[key] = native_libs
|
||
|
|
||
|
@staticmethod
|
||
|
def _MatchApkDeviceInstallPath(apk_path):
|
||
|
"""Check whether a given path matches an installed APK device file path.
|
||
|
|
||
|
Args:
|
||
|
apk_path: Device-specific file path.
|
||
|
Returns:
|
||
|
On success, a (package_name, apk_name) tuple. On failure, (None. None).
|
||
|
"""
|
||
|
m = ApkLibraryPathTranslator._RE_APK_PATH_1.match(apk_path)
|
||
|
if m:
|
||
|
return (m.group('package_name'), 'base.apk')
|
||
|
|
||
|
m = ApkLibraryPathTranslator._RE_APK_PATH_2.match(apk_path)
|
||
|
if m:
|
||
|
return (m.group('package_name'), m.group('apk_name'))
|
||
|
|
||
|
return (None, None)
|
||
|
|
||
|
def TranslatePath(self, apk_path, apk_offset):
|
||
|
"""Translate a potential apk file path + offset into library path + offset.
|
||
|
|
||
|
Args:
|
||
|
apk_path: Library or apk file path on the device (e.g.
|
||
|
'/data/data/com.example.app-XSAHKSJH/base.apk').
|
||
|
apk_offset: Byte offset within the library or apk.
|
||
|
|
||
|
Returns:
|
||
|
a new (lib_path, lib_offset) tuple. If |apk_path| points to an APK,
|
||
|
then this function searches inside the corresponding host-side APKs
|
||
|
(added with AddHostApk() above) for the corresponding uncompressed
|
||
|
native shared library at |apk_offset|, if found, this returns a new
|
||
|
device-specific path corresponding to a virtual installation of said
|
||
|
library with an adjusted offset.
|
||
|
|
||
|
Otherwise, just return the original (apk_path, apk_offset) values.
|
||
|
"""
|
||
|
if not apk_path.endswith('.apk'):
|
||
|
return (apk_path, apk_offset)
|
||
|
|
||
|
apk_package, apk_name = self._MatchApkDeviceInstallPath(apk_path)
|
||
|
if not apk_package:
|
||
|
return (apk_path, apk_offset)
|
||
|
|
||
|
key = '%s/%s' % (apk_package, apk_name)
|
||
|
native_libs = self._libs_map.get(key)
|
||
|
if not native_libs:
|
||
|
logging.debug('Unknown %s package', key)
|
||
|
return (apk_path, apk_offset)
|
||
|
|
||
|
lib_name, new_offset = native_libs.FindLibraryByOffset(apk_offset)
|
||
|
if not lib_name:
|
||
|
logging.debug('Invalid offset in %s.apk package: %d', key, apk_offset)
|
||
|
return (apk_path, apk_offset)
|
||
|
|
||
|
lib_name = os.path.basename(lib_name)
|
||
|
|
||
|
# Some libraries are stored with a crazy. prefix inside the APK, this
|
||
|
# is done to prevent the PackageManager from extracting the libraries
|
||
|
# at installation time when running on pre Android M systems, where the
|
||
|
# system linker cannot load libraries directly from APKs.
|
||
|
crazy_prefix = 'crazy.'
|
||
|
if lib_name.startswith(crazy_prefix):
|
||
|
lib_name = lib_name[len(crazy_prefix):]
|
||
|
|
||
|
# Put this in a fictional lib sub-directory for good measure.
|
||
|
new_path = '%s!lib/%s' % (apk_path, lib_name)
|
||
|
|
||
|
return (new_path, new_offset)
|