mirror of
https://github.com/klzgrad/naiveproxy.git
synced 2024-12-26 05:48:46 +03:00
411 lines
13 KiB
Python
411 lines
13 KiB
Python
|
# Copyright 2013 The Chromium Authors. All rights reserved.
|
||
|
# Use of this source code is governed by a BSD-style license that can be
|
||
|
# found in the LICENSE file.
|
||
|
|
||
|
import difflib
|
||
|
import hashlib
|
||
|
import itertools
|
||
|
import json
|
||
|
import os
|
||
|
import sys
|
||
|
import zipfile
|
||
|
|
||
|
|
||
|
# When set and a difference is detected, a diff of what changed is printed.
|
||
|
PRINT_EXPLANATIONS = int(os.environ.get('PRINT_BUILD_EXPLANATIONS', 0))
|
||
|
|
||
|
# An escape hatch that causes all targets to be rebuilt.
|
||
|
_FORCE_REBUILD = int(os.environ.get('FORCE_REBUILD', 0))
|
||
|
|
||
|
|
||
|
def CallAndRecordIfStale(
|
||
|
function, record_path=None, input_paths=None, input_strings=None,
|
||
|
output_paths=None, force=False, pass_changes=False):
|
||
|
"""Calls function if outputs are stale.
|
||
|
|
||
|
Outputs are considered stale if:
|
||
|
- any output_paths are missing, or
|
||
|
- the contents of any file within input_paths has changed, or
|
||
|
- the contents of input_strings has changed.
|
||
|
|
||
|
To debug which files are out-of-date, set the environment variable:
|
||
|
PRINT_MD5_DIFFS=1
|
||
|
|
||
|
Args:
|
||
|
function: The function to call.
|
||
|
record_path: Path to record metadata.
|
||
|
Defaults to output_paths[0] + '.md5.stamp'
|
||
|
input_paths: List of paths to calcualte an md5 sum on.
|
||
|
input_strings: List of strings to record verbatim.
|
||
|
output_paths: List of output paths.
|
||
|
force: Whether to treat outputs as missing regardless of whether they
|
||
|
actually are.
|
||
|
pass_changes: Whether to pass a Changes instance to |function|.
|
||
|
"""
|
||
|
assert record_path or output_paths
|
||
|
input_paths = input_paths or []
|
||
|
input_strings = input_strings or []
|
||
|
output_paths = output_paths or []
|
||
|
record_path = record_path or output_paths[0] + '.md5.stamp'
|
||
|
|
||
|
assert record_path.endswith('.stamp'), (
|
||
|
'record paths must end in \'.stamp\' so that they are easy to find '
|
||
|
'and delete')
|
||
|
|
||
|
new_metadata = _Metadata()
|
||
|
new_metadata.AddStrings(input_strings)
|
||
|
|
||
|
for path in input_paths:
|
||
|
if _IsZipFile(path):
|
||
|
entries = _ExtractZipEntries(path)
|
||
|
new_metadata.AddZipFile(path, entries)
|
||
|
else:
|
||
|
new_metadata.AddFile(path, _Md5ForPath(path))
|
||
|
|
||
|
old_metadata = None
|
||
|
force = force or _FORCE_REBUILD
|
||
|
missing_outputs = [x for x in output_paths if force or not os.path.exists(x)]
|
||
|
# When outputs are missing, don't bother gathering change information.
|
||
|
if not missing_outputs and os.path.exists(record_path):
|
||
|
with open(record_path, 'r') as jsonfile:
|
||
|
try:
|
||
|
old_metadata = _Metadata.FromFile(jsonfile)
|
||
|
except: # pylint: disable=bare-except
|
||
|
pass # Not yet using new file format.
|
||
|
|
||
|
changes = Changes(old_metadata, new_metadata, force, missing_outputs)
|
||
|
if not changes.HasChanges():
|
||
|
return
|
||
|
|
||
|
if PRINT_EXPLANATIONS:
|
||
|
print '=' * 80
|
||
|
print 'Target is stale: %s' % record_path
|
||
|
print changes.DescribeDifference()
|
||
|
print '=' * 80
|
||
|
|
||
|
args = (changes,) if pass_changes else ()
|
||
|
function(*args)
|
||
|
|
||
|
with open(record_path, 'w') as f:
|
||
|
new_metadata.ToFile(f)
|
||
|
|
||
|
|
||
|
class Changes(object):
|
||
|
"""Provides and API for querying what changed between runs."""
|
||
|
|
||
|
def __init__(self, old_metadata, new_metadata, force, missing_outputs):
|
||
|
self.old_metadata = old_metadata
|
||
|
self.new_metadata = new_metadata
|
||
|
self.force = force
|
||
|
self.missing_outputs = missing_outputs
|
||
|
|
||
|
def _GetOldTag(self, path, subpath=None):
|
||
|
return self.old_metadata and self.old_metadata.GetTag(path, subpath)
|
||
|
|
||
|
def HasChanges(self):
|
||
|
"""Returns whether any changes exist."""
|
||
|
return (self.force or
|
||
|
not self.old_metadata or
|
||
|
self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5() or
|
||
|
self.old_metadata.FilesMd5() != self.new_metadata.FilesMd5())
|
||
|
|
||
|
def AddedOrModifiedOnly(self):
|
||
|
"""Returns whether the only changes were from added or modified (sub)files.
|
||
|
|
||
|
No missing outputs, no removed paths/subpaths.
|
||
|
"""
|
||
|
if (self.force or
|
||
|
not self.old_metadata or
|
||
|
self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5()):
|
||
|
return False
|
||
|
if any(self.IterRemovedPaths()):
|
||
|
return False
|
||
|
for path in self.IterModifiedPaths():
|
||
|
if any(self.IterRemovedSubpaths(path)):
|
||
|
return False
|
||
|
return True
|
||
|
|
||
|
def IterAllPaths(self):
|
||
|
"""Generator for paths."""
|
||
|
return self.new_metadata.IterPaths();
|
||
|
|
||
|
def IterAllSubpaths(self, path):
|
||
|
"""Generator for subpaths."""
|
||
|
return self.new_metadata.IterSubpaths(path);
|
||
|
|
||
|
def IterAddedPaths(self):
|
||
|
"""Generator for paths that were added."""
|
||
|
for path in self.new_metadata.IterPaths():
|
||
|
if self._GetOldTag(path) is None:
|
||
|
yield path
|
||
|
|
||
|
def IterAddedSubpaths(self, path):
|
||
|
"""Generator for paths that were added within the given zip file."""
|
||
|
for subpath in self.new_metadata.IterSubpaths(path):
|
||
|
if self._GetOldTag(path, subpath) is None:
|
||
|
yield subpath
|
||
|
|
||
|
def IterRemovedPaths(self):
|
||
|
"""Generator for paths that were removed."""
|
||
|
if self.old_metadata:
|
||
|
for path in self.old_metadata.IterPaths():
|
||
|
if self.new_metadata.GetTag(path) is None:
|
||
|
yield path
|
||
|
|
||
|
def IterRemovedSubpaths(self, path):
|
||
|
"""Generator for paths that were removed within the given zip file."""
|
||
|
if self.old_metadata:
|
||
|
for subpath in self.old_metadata.IterSubpaths(path):
|
||
|
if self.new_metadata.GetTag(path, subpath) is None:
|
||
|
yield subpath
|
||
|
|
||
|
def IterModifiedPaths(self):
|
||
|
"""Generator for paths whose contents have changed."""
|
||
|
for path in self.new_metadata.IterPaths():
|
||
|
old_tag = self._GetOldTag(path)
|
||
|
new_tag = self.new_metadata.GetTag(path)
|
||
|
if old_tag is not None and old_tag != new_tag:
|
||
|
yield path
|
||
|
|
||
|
def IterModifiedSubpaths(self, path):
|
||
|
"""Generator for paths within a zip file whose contents have changed."""
|
||
|
for subpath in self.new_metadata.IterSubpaths(path):
|
||
|
old_tag = self._GetOldTag(path, subpath)
|
||
|
new_tag = self.new_metadata.GetTag(path, subpath)
|
||
|
if old_tag is not None and old_tag != new_tag:
|
||
|
yield subpath
|
||
|
|
||
|
def IterChangedPaths(self):
|
||
|
"""Generator for all changed paths (added/removed/modified)."""
|
||
|
return itertools.chain(self.IterRemovedPaths(),
|
||
|
self.IterModifiedPaths(),
|
||
|
self.IterAddedPaths())
|
||
|
|
||
|
def IterChangedSubpaths(self, path):
|
||
|
"""Generator for paths within a zip that were added/removed/modified."""
|
||
|
return itertools.chain(self.IterRemovedSubpaths(path),
|
||
|
self.IterModifiedSubpaths(path),
|
||
|
self.IterAddedSubpaths(path))
|
||
|
|
||
|
def DescribeDifference(self):
|
||
|
"""Returns a human-readable description of what changed."""
|
||
|
if self.force:
|
||
|
return 'force=True'
|
||
|
elif self.missing_outputs:
|
||
|
return 'Outputs do not exist:\n ' + '\n '.join(self.missing_outputs)
|
||
|
elif self.old_metadata is None:
|
||
|
return 'Previous stamp file not found.'
|
||
|
|
||
|
if self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5():
|
||
|
ndiff = difflib.ndiff(self.old_metadata.GetStrings(),
|
||
|
self.new_metadata.GetStrings())
|
||
|
changed = [s for s in ndiff if not s.startswith(' ')]
|
||
|
return 'Input strings changed:\n ' + '\n '.join(changed)
|
||
|
|
||
|
if self.old_metadata.FilesMd5() == self.new_metadata.FilesMd5():
|
||
|
return "There's no difference."
|
||
|
|
||
|
lines = []
|
||
|
lines.extend('Added: ' + p for p in self.IterAddedPaths())
|
||
|
lines.extend('Removed: ' + p for p in self.IterRemovedPaths())
|
||
|
for path in self.IterModifiedPaths():
|
||
|
lines.append('Modified: ' + path)
|
||
|
lines.extend(' -> Subpath added: ' + p
|
||
|
for p in self.IterAddedSubpaths(path))
|
||
|
lines.extend(' -> Subpath removed: ' + p
|
||
|
for p in self.IterRemovedSubpaths(path))
|
||
|
lines.extend(' -> Subpath modified: ' + p
|
||
|
for p in self.IterModifiedSubpaths(path))
|
||
|
if lines:
|
||
|
return 'Input files changed:\n ' + '\n '.join(lines)
|
||
|
return 'I have no idea what changed (there is a bug).'
|
||
|
|
||
|
|
||
|
class _Metadata(object):
|
||
|
"""Data model for tracking change metadata."""
|
||
|
# Schema:
|
||
|
# {
|
||
|
# "files-md5": "VALUE",
|
||
|
# "strings-md5": "VALUE",
|
||
|
# "input-files": [
|
||
|
# {
|
||
|
# "path": "path.jar",
|
||
|
# "tag": "{MD5 of entries}",
|
||
|
# "entries": [
|
||
|
# { "path": "org/chromium/base/Foo.class", "tag": "{CRC32}" }, ...
|
||
|
# ]
|
||
|
# }, {
|
||
|
# "path": "path.txt",
|
||
|
# "tag": "{MD5}",
|
||
|
# }
|
||
|
# ],
|
||
|
# "input-strings": ["a", "b", ...],
|
||
|
# }
|
||
|
def __init__(self):
|
||
|
self._files_md5 = None
|
||
|
self._strings_md5 = None
|
||
|
self._files = []
|
||
|
self._strings = []
|
||
|
# Map of (path, subpath) -> entry. Created upon first call to _GetEntry().
|
||
|
self._file_map = None
|
||
|
|
||
|
@classmethod
|
||
|
def FromFile(cls, fileobj):
|
||
|
"""Returns a _Metadata initialized from a file object."""
|
||
|
ret = cls()
|
||
|
obj = json.load(fileobj)
|
||
|
ret._files_md5 = obj['files-md5']
|
||
|
ret._strings_md5 = obj['strings-md5']
|
||
|
ret._files = obj['input-files']
|
||
|
ret._strings = obj['input-strings']
|
||
|
return ret
|
||
|
|
||
|
def ToFile(self, fileobj):
|
||
|
"""Serializes metadata to the given file object."""
|
||
|
obj = {
|
||
|
"files-md5": self.FilesMd5(),
|
||
|
"strings-md5": self.StringsMd5(),
|
||
|
"input-files": self._files,
|
||
|
"input-strings": self._strings,
|
||
|
}
|
||
|
json.dump(obj, fileobj, indent=2)
|
||
|
|
||
|
def _AssertNotQueried(self):
|
||
|
assert self._files_md5 is None
|
||
|
assert self._strings_md5 is None
|
||
|
assert self._file_map is None
|
||
|
|
||
|
def AddStrings(self, values):
|
||
|
self._AssertNotQueried()
|
||
|
self._strings.extend(str(v) for v in values)
|
||
|
|
||
|
def AddFile(self, path, tag):
|
||
|
"""Adds metadata for a non-zip file.
|
||
|
|
||
|
Args:
|
||
|
path: Path to the file.
|
||
|
tag: A short string representative of the file contents.
|
||
|
"""
|
||
|
self._AssertNotQueried()
|
||
|
self._files.append({
|
||
|
'path': path,
|
||
|
'tag': tag,
|
||
|
})
|
||
|
|
||
|
def AddZipFile(self, path, entries):
|
||
|
"""Adds metadata for a zip file.
|
||
|
|
||
|
Args:
|
||
|
path: Path to the file.
|
||
|
entries: List of (subpath, tag) tuples for entries within the zip.
|
||
|
"""
|
||
|
self._AssertNotQueried()
|
||
|
tag = _ComputeInlineMd5(itertools.chain((e[0] for e in entries),
|
||
|
(e[1] for e in entries)))
|
||
|
self._files.append({
|
||
|
'path': path,
|
||
|
'tag': tag,
|
||
|
'entries': [{"path": e[0], "tag": e[1]} for e in entries],
|
||
|
})
|
||
|
|
||
|
def GetStrings(self):
|
||
|
"""Returns the list of input strings."""
|
||
|
return self._strings
|
||
|
|
||
|
def FilesMd5(self):
|
||
|
"""Lazily computes and returns the aggregate md5 of input files."""
|
||
|
if self._files_md5 is None:
|
||
|
# Omit paths from md5 since temporary files have random names.
|
||
|
self._files_md5 = _ComputeInlineMd5(
|
||
|
self.GetTag(p) for p in sorted(self.IterPaths()))
|
||
|
return self._files_md5
|
||
|
|
||
|
def StringsMd5(self):
|
||
|
"""Lazily computes and returns the aggregate md5 of input strings."""
|
||
|
if self._strings_md5 is None:
|
||
|
self._strings_md5 = _ComputeInlineMd5(self._strings)
|
||
|
return self._strings_md5
|
||
|
|
||
|
def _GetEntry(self, path, subpath=None):
|
||
|
"""Returns the JSON entry for the given path / subpath."""
|
||
|
if self._file_map is None:
|
||
|
self._file_map = {}
|
||
|
for entry in self._files:
|
||
|
self._file_map[(entry['path'], None)] = entry
|
||
|
for subentry in entry.get('entries', ()):
|
||
|
self._file_map[(entry['path'], subentry['path'])] = subentry
|
||
|
return self._file_map.get((path, subpath))
|
||
|
|
||
|
def GetTag(self, path, subpath=None):
|
||
|
"""Returns the tag for the given path / subpath."""
|
||
|
ret = self._GetEntry(path, subpath)
|
||
|
return ret and ret['tag']
|
||
|
|
||
|
def IterPaths(self):
|
||
|
"""Returns a generator for all top-level paths."""
|
||
|
return (e['path'] for e in self._files)
|
||
|
|
||
|
def IterSubpaths(self, path):
|
||
|
"""Returns a generator for all subpaths in the given zip.
|
||
|
|
||
|
If the given path is not a zip file or doesn't exist, returns an empty
|
||
|
iterable.
|
||
|
"""
|
||
|
outer_entry = self._GetEntry(path)
|
||
|
if not outer_entry:
|
||
|
return ()
|
||
|
subentries = outer_entry.get('entries', [])
|
||
|
return (entry['path'] for entry in subentries)
|
||
|
|
||
|
|
||
|
def _UpdateMd5ForFile(md5, path, block_size=2**16):
|
||
|
with open(path, 'rb') as infile:
|
||
|
while True:
|
||
|
data = infile.read(block_size)
|
||
|
if not data:
|
||
|
break
|
||
|
md5.update(data)
|
||
|
|
||
|
|
||
|
def _UpdateMd5ForDirectory(md5, dir_path):
|
||
|
for root, _, files in os.walk(dir_path):
|
||
|
for f in files:
|
||
|
_UpdateMd5ForFile(md5, os.path.join(root, f))
|
||
|
|
||
|
|
||
|
def _Md5ForPath(path):
|
||
|
md5 = hashlib.md5()
|
||
|
if os.path.isdir(path):
|
||
|
_UpdateMd5ForDirectory(md5, path)
|
||
|
else:
|
||
|
_UpdateMd5ForFile(md5, path)
|
||
|
return md5.hexdigest()
|
||
|
|
||
|
|
||
|
def _ComputeInlineMd5(iterable):
|
||
|
"""Computes the md5 of the concatenated parameters."""
|
||
|
md5 = hashlib.md5()
|
||
|
for item in iterable:
|
||
|
md5.update(str(item))
|
||
|
return md5.hexdigest()
|
||
|
|
||
|
|
||
|
def _IsZipFile(path):
|
||
|
"""Returns whether to treat the given file as a zip file."""
|
||
|
# ijar doesn't set the CRC32 field.
|
||
|
if path.endswith('.interface.jar'):
|
||
|
return False
|
||
|
return path[-4:] in ('.zip', '.apk', '.jar') or path.endswith('.srcjar')
|
||
|
|
||
|
|
||
|
def _ExtractZipEntries(path):
|
||
|
"""Returns a list of (path, CRC32) of all files within |path|."""
|
||
|
entries = []
|
||
|
with zipfile.ZipFile(path) as zip_file:
|
||
|
for zip_info in zip_file.infolist():
|
||
|
# Skip directories and empty files.
|
||
|
if zip_info.CRC:
|
||
|
entries.append(
|
||
|
(zip_info.filename, zip_info.CRC + zip_info.compress_type))
|
||
|
return entries
|