mirror of
https://github.com/klzgrad/naiveproxy.git
synced 2025-03-27 18:43:56 +03:00
487 lines
18 KiB
Python
Executable File
487 lines
18 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
'''Collections of messages and their translations, called cliques. Also
|
|
collections of cliques (uber-cliques).
|
|
'''
|
|
|
|
import re
|
|
import types
|
|
|
|
from grit import constants
|
|
from grit import exception
|
|
from grit import lazy_re
|
|
from grit import pseudo
|
|
from grit import pseudo_rtl
|
|
from grit import tclib
|
|
|
|
|
|
class UberClique(object):
|
|
'''A factory (NOT a singleton factory) for making cliques. It has several
|
|
methods for working with the cliques created using the factory.
|
|
'''
|
|
|
|
def __init__(self):
|
|
# A map from message ID to list of cliques whose source messages have
|
|
# that ID. This will contain all cliques created using this factory.
|
|
# Different messages can have the same ID because they have the
|
|
# same translateable portion and placeholder names, but occur in different
|
|
# places in the resource tree.
|
|
#
|
|
# Each list of cliques is kept sorted by description, to achieve
|
|
# stable results from the BestClique method, see below.
|
|
self.cliques_ = {}
|
|
|
|
# A map of clique IDs to list of languages to indicate translations where we
|
|
# fell back to English.
|
|
self.fallback_translations_ = {}
|
|
|
|
# A map of clique IDs to list of languages to indicate missing translations.
|
|
self.missing_translations_ = {}
|
|
|
|
def _AddMissingTranslation(self, lang, clique, is_error):
|
|
tl = self.fallback_translations_
|
|
if is_error:
|
|
tl = self.missing_translations_
|
|
id = clique.GetId()
|
|
if id not in tl:
|
|
tl[id] = {}
|
|
if lang not in tl[id]:
|
|
tl[id][lang] = 1
|
|
|
|
def HasMissingTranslations(self):
|
|
return len(self.missing_translations_) > 0
|
|
|
|
def MissingTranslationsReport(self):
|
|
'''Returns a string suitable for printing to report missing
|
|
and fallback translations to the user.
|
|
'''
|
|
def ReportTranslation(clique, langs):
|
|
text = clique.GetMessage().GetPresentableContent()
|
|
# The text 'error' (usually 'Error:' but we are conservative)
|
|
# can trigger some build environments (Visual Studio, we're
|
|
# looking at you) to consider invocation of grit to have failed,
|
|
# so we make sure never to output that word.
|
|
extract = re.sub('(?i)error', 'REDACTED', text[0:40])[0:40]
|
|
ellipsis = ''
|
|
if len(text) > 40:
|
|
ellipsis = '...'
|
|
langs_extract = langs[0:6]
|
|
describe_langs = ','.join(langs_extract)
|
|
if len(langs) > 6:
|
|
describe_langs += " and %d more" % (len(langs) - 6)
|
|
return " %s \"%s%s\" %s" % (clique.GetId(), extract, ellipsis,
|
|
describe_langs)
|
|
lines = []
|
|
if len(self.fallback_translations_):
|
|
lines.append(
|
|
"WARNING: Fell back to English for the following translations:")
|
|
for (id, langs) in self.fallback_translations_.items():
|
|
lines.append(ReportTranslation(self.cliques_[id][0], langs.keys()))
|
|
if len(self.missing_translations_):
|
|
lines.append("ERROR: The following translations are MISSING:")
|
|
for (id, langs) in self.missing_translations_.items():
|
|
lines.append(ReportTranslation(self.cliques_[id][0], langs.keys()))
|
|
return '\n'.join(lines)
|
|
|
|
def MakeClique(self, message, translateable=True):
|
|
'''Create a new clique initialized with a message.
|
|
|
|
Args:
|
|
message: tclib.Message()
|
|
translateable: True | False
|
|
'''
|
|
clique = MessageClique(self, message, translateable)
|
|
|
|
# Enable others to find this clique by its message ID
|
|
if message.GetId() in self.cliques_:
|
|
presentable_text = clique.GetMessage().GetPresentableContent()
|
|
if not message.HasAssignedId():
|
|
for c in self.cliques_[message.GetId()]:
|
|
assert c.GetMessage().GetPresentableContent() == presentable_text
|
|
self.cliques_[message.GetId()].append(clique)
|
|
# We need to keep each list of cliques sorted by description, to
|
|
# achieve stable results from the BestClique method, see below.
|
|
self.cliques_[message.GetId()].sort(
|
|
key=lambda c:c.GetMessage().GetDescription())
|
|
else:
|
|
self.cliques_[message.GetId()] = [clique]
|
|
|
|
return clique
|
|
|
|
def FindCliqueAndAddTranslation(self, translation, language):
|
|
'''Adds the specified translation to the clique with the source message
|
|
it is a translation of.
|
|
|
|
Args:
|
|
translation: tclib.Translation()
|
|
language: 'en' | 'fr' ...
|
|
|
|
Return:
|
|
True if the source message was found, otherwise false.
|
|
'''
|
|
if translation.GetId() in self.cliques_:
|
|
for clique in self.cliques_[translation.GetId()]:
|
|
clique.AddTranslation(translation, language)
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
def BestClique(self, id):
|
|
'''Returns the "best" clique from a list of cliques. All the cliques
|
|
must have the same ID. The "best" clique is chosen in the following
|
|
order of preference:
|
|
- The first clique that has a non-ID-based description.
|
|
- If no such clique found, the first clique with an ID-based description.
|
|
- Otherwise the first clique.
|
|
|
|
This method is stable in terms of always returning a clique with
|
|
an identical description (on different runs of GRIT on the same
|
|
data) because self.cliques_ is sorted by description.
|
|
'''
|
|
clique_list = self.cliques_[id]
|
|
clique_with_id = None
|
|
clique_default = None
|
|
for clique in clique_list:
|
|
if not clique_default:
|
|
clique_default = clique
|
|
|
|
description = clique.GetMessage().GetDescription()
|
|
if description and len(description) > 0:
|
|
if not description.startswith('ID:'):
|
|
# this is the preferred case so we exit right away
|
|
return clique
|
|
elif not clique_with_id:
|
|
clique_with_id = clique
|
|
if clique_with_id:
|
|
return clique_with_id
|
|
else:
|
|
return clique_default
|
|
|
|
def BestCliquePerId(self):
|
|
'''Iterates over the list of all cliques and returns the best clique for
|
|
each ID. This will be the first clique with a source message that has a
|
|
non-empty description, or an arbitrary clique if none of them has a
|
|
description.
|
|
'''
|
|
for id in self.cliques_:
|
|
yield self.BestClique(id)
|
|
|
|
def BestCliqueByOriginalText(self, text, meaning):
|
|
'''Finds the "best" (as in BestClique()) clique that has original text
|
|
'text' and meaning 'meaning'. Returns None if there is no such clique.
|
|
'''
|
|
# If needed, this can be optimized by maintaining a map of
|
|
# fingerprints of original text+meaning to cliques.
|
|
for c in self.BestCliquePerId():
|
|
msg = c.GetMessage()
|
|
if msg.GetRealContent() == text and msg.GetMeaning() == meaning:
|
|
return msg
|
|
return None
|
|
|
|
def AllMessageIds(self):
|
|
'''Returns a list of all defined message IDs.
|
|
'''
|
|
return self.cliques_.keys()
|
|
|
|
def AllCliques(self):
|
|
'''Iterates over all cliques. Note that this can return multiple cliques
|
|
with the same ID.
|
|
'''
|
|
for cliques in self.cliques_.values():
|
|
for c in cliques:
|
|
yield c
|
|
|
|
def GenerateXtbParserCallback(self, lang, debug=False):
|
|
'''Creates a callback function as required by grit.xtb_reader.Parse().
|
|
This callback will create Translation objects for each message from
|
|
the XTB that exists in this uberclique, and add them as translations for
|
|
the relevant cliques. The callback will add translations to the language
|
|
specified by 'lang'
|
|
|
|
Args:
|
|
lang: 'fr'
|
|
debug: True | False
|
|
'''
|
|
def Callback(id, structure):
|
|
if id not in self.cliques_:
|
|
if debug: print "Ignoring translation #%s" % id
|
|
return
|
|
|
|
if debug: print "Adding translation #%s" % id
|
|
|
|
# We fetch placeholder information from the original message (the XTB file
|
|
# only contains placeholder names).
|
|
original_msg = self.BestClique(id).GetMessage()
|
|
|
|
translation = tclib.Translation(id=id)
|
|
for is_ph,text in structure:
|
|
if not is_ph:
|
|
translation.AppendText(text)
|
|
else:
|
|
found_placeholder = False
|
|
for ph in original_msg.GetPlaceholders():
|
|
if ph.GetPresentation() == text:
|
|
translation.AppendPlaceholder(tclib.Placeholder(
|
|
ph.GetPresentation(), ph.GetOriginal(), ph.GetExample()))
|
|
found_placeholder = True
|
|
break
|
|
if not found_placeholder:
|
|
raise exception.MismatchingPlaceholders(
|
|
'Translation for message ID %s had <ph name="%s"/>, no match\n'
|
|
'in original message' % (id, text))
|
|
self.FindCliqueAndAddTranslation(translation, lang)
|
|
return Callback
|
|
|
|
|
|
class CustomType(object):
|
|
'''A base class you should implement if you wish to specify a custom type
|
|
for a message clique (i.e. custom validation and optional modification of
|
|
translations).'''
|
|
|
|
def Validate(self, message):
|
|
'''Returns true if the message (a tclib.Message object) is valid,
|
|
otherwise false.
|
|
'''
|
|
raise NotImplementedError()
|
|
|
|
def ValidateAndModify(self, lang, translation):
|
|
'''Returns true if the translation (a tclib.Translation object) is valid,
|
|
otherwise false. The language is also passed in. This method may modify
|
|
the translation that is passed in, if it so wishes.
|
|
'''
|
|
raise NotImplementedError()
|
|
|
|
def ModifyTextPart(self, lang, text):
|
|
'''If you call ModifyEachTextPart, it will turn around and call this method
|
|
for each text part of the translation. You should return the modified
|
|
version of the text, or just the original text to not change anything.
|
|
'''
|
|
raise NotImplementedError()
|
|
|
|
def ModifyEachTextPart(self, lang, translation):
|
|
'''Call this to easily modify one or more of the textual parts of a
|
|
translation. It will call ModifyTextPart for each part of the
|
|
translation.
|
|
'''
|
|
contents = translation.GetContent()
|
|
for ix in range(len(contents)):
|
|
if (isinstance(contents[ix], types.StringTypes)):
|
|
contents[ix] = self.ModifyTextPart(lang, contents[ix])
|
|
|
|
|
|
class OneOffCustomType(CustomType):
|
|
'''A very simple custom type that performs the validation expressed by
|
|
the input expression on all languages including the source language.
|
|
The expression can access the variables 'lang', 'msg' and 'text()' where
|
|
'lang' is the language of 'msg', 'msg' is the message or translation being
|
|
validated and 'text()' returns the real contents of 'msg' (for shorthand).
|
|
'''
|
|
def __init__(self, expression):
|
|
self.expr = expression
|
|
def Validate(self, message):
|
|
return self.ValidateAndModify(MessageClique.source_language, message)
|
|
def ValidateAndModify(self, lang, msg):
|
|
def text():
|
|
return msg.GetRealContent()
|
|
return eval(self.expr, {},
|
|
{'lang' : lang,
|
|
'text' : text,
|
|
'msg' : msg,
|
|
})
|
|
|
|
|
|
class MessageClique(object):
|
|
'''A message along with all of its translations. Also code to bring
|
|
translations together with their original message.'''
|
|
|
|
# change this to the language code of Messages you add to cliques_.
|
|
# TODO(joi) Actually change this based on the <grit> node's source language
|
|
source_language = 'en'
|
|
|
|
# A constant translation we use when asked for a translation into the
|
|
# special language constants.CONSTANT_LANGUAGE.
|
|
CONSTANT_TRANSLATION = tclib.Translation(text='TTTTTT')
|
|
|
|
# A pattern to match messages that are empty or whitespace only.
|
|
WHITESPACE_MESSAGE = lazy_re.compile(u'^\s*$')
|
|
|
|
def __init__(self, uber_clique, message, translateable=True,
|
|
custom_type=None):
|
|
'''Create a new clique initialized with just a message.
|
|
|
|
Note that messages with a body comprised only of whitespace will implicitly
|
|
be marked non-translatable.
|
|
|
|
Args:
|
|
uber_clique: Our uber-clique (collection of cliques)
|
|
message: tclib.Message()
|
|
translateable: True | False
|
|
custom_type: instance of clique.CustomType interface
|
|
'''
|
|
# Our parent
|
|
self.uber_clique = uber_clique
|
|
# If not translateable, we only store the original message.
|
|
self.translateable = translateable
|
|
|
|
# We implicitly mark messages that have a whitespace-only body as
|
|
# non-translateable.
|
|
if MessageClique.WHITESPACE_MESSAGE.match(message.GetRealContent()):
|
|
self.translateable = False
|
|
|
|
# A mapping of language identifiers to tclib.BaseMessage and its
|
|
# subclasses (i.e. tclib.Message and tclib.Translation).
|
|
self.clique = { MessageClique.source_language : message }
|
|
# A list of the "shortcut groups" this clique is
|
|
# part of. Within any given shortcut group, no shortcut key (e.g. &J)
|
|
# must appear more than once in each language for all cliques that
|
|
# belong to the group.
|
|
self.shortcut_groups = []
|
|
# An instance of the CustomType interface, or None. If this is set, it will
|
|
# be used to validate the original message and translations thereof, and
|
|
# will also get a chance to modify translations of the message.
|
|
self.SetCustomType(custom_type)
|
|
|
|
def GetMessage(self):
|
|
'''Retrieves the tclib.Message that is the source for this clique.'''
|
|
return self.clique[MessageClique.source_language]
|
|
|
|
def GetId(self):
|
|
'''Retrieves the message ID of the messages in this clique.'''
|
|
return self.GetMessage().GetId()
|
|
|
|
def IsTranslateable(self):
|
|
return self.translateable
|
|
|
|
def AddToShortcutGroup(self, group):
|
|
self.shortcut_groups.append(group)
|
|
|
|
def SetCustomType(self, custom_type):
|
|
'''Makes this clique use custom_type for validating messages and
|
|
translations, and optionally modifying translations.
|
|
'''
|
|
self.custom_type = custom_type
|
|
if custom_type and not custom_type.Validate(self.GetMessage()):
|
|
raise exception.InvalidMessage(self.GetMessage().GetRealContent())
|
|
|
|
def MessageForLanguage(self, lang, pseudo_if_no_match=True,
|
|
fallback_to_english=False):
|
|
'''Returns the message/translation for the specified language, providing
|
|
a pseudotranslation if there is no available translation and a pseudo-
|
|
translation is requested.
|
|
|
|
The translation of any message whatsoever in the special language
|
|
'x_constant' is the message "TTTTTT".
|
|
|
|
Args:
|
|
lang: 'en'
|
|
pseudo_if_no_match: True
|
|
fallback_to_english: False
|
|
|
|
Return:
|
|
tclib.BaseMessage
|
|
'''
|
|
if not self.translateable:
|
|
return self.GetMessage()
|
|
|
|
if lang == constants.CONSTANT_LANGUAGE:
|
|
return self.CONSTANT_TRANSLATION
|
|
|
|
for msglang in self.clique.keys():
|
|
if lang == msglang:
|
|
return self.clique[msglang]
|
|
|
|
if lang == constants.FAKE_BIDI:
|
|
return pseudo_rtl.PseudoRTLMessage(self.GetMessage())
|
|
|
|
if fallback_to_english:
|
|
self.uber_clique._AddMissingTranslation(lang, self, is_error=False)
|
|
return self.GetMessage()
|
|
|
|
# If we're not supposed to generate pseudotranslations, we add an error
|
|
# report to a list of errors, then fail at a higher level, so that we
|
|
# get a list of all messages that are missing translations.
|
|
if not pseudo_if_no_match:
|
|
self.uber_clique._AddMissingTranslation(lang, self, is_error=True)
|
|
|
|
return pseudo.PseudoMessage(self.GetMessage())
|
|
|
|
def AllMessagesThatMatch(self, lang_re, include_pseudo = True):
|
|
'''Returns a map of all messages that match 'lang', including the pseudo
|
|
translation if requested.
|
|
|
|
Args:
|
|
lang_re: re.compile('fr|en')
|
|
include_pseudo: True
|
|
|
|
Return:
|
|
{ 'en' : tclib.Message,
|
|
'fr' : tclib.Translation,
|
|
pseudo.PSEUDO_LANG : tclib.Translation }
|
|
'''
|
|
if not self.translateable:
|
|
return [self.GetMessage()]
|
|
|
|
matches = {}
|
|
for msglang in self.clique:
|
|
if lang_re.match(msglang):
|
|
matches[msglang] = self.clique[msglang]
|
|
|
|
if include_pseudo:
|
|
matches[pseudo.PSEUDO_LANG] = pseudo.PseudoMessage(self.GetMessage())
|
|
|
|
return matches
|
|
|
|
def AddTranslation(self, translation, language):
|
|
'''Add a translation to this clique. The translation must have the same
|
|
ID as the message that is the source for this clique.
|
|
|
|
If this clique is not translateable, the function just returns.
|
|
|
|
Args:
|
|
translation: tclib.Translation()
|
|
language: 'en'
|
|
|
|
Throws:
|
|
grit.exception.InvalidTranslation if the translation you're trying to add
|
|
doesn't have the same message ID as the source message of this clique.
|
|
'''
|
|
if not self.translateable:
|
|
return
|
|
if translation.GetId() != self.GetId():
|
|
raise exception.InvalidTranslation(
|
|
'Msg ID %s, transl ID %s' % (self.GetId(), translation.GetId()))
|
|
|
|
assert not language in self.clique
|
|
|
|
# Because two messages can differ in the original content of their
|
|
# placeholders yet share the same ID (because they are otherwise the
|
|
# same), the translation we are getting may have different original
|
|
# content for placeholders than our message, yet it is still the right
|
|
# translation for our message (because it is for the same ID). We must
|
|
# therefore fetch the original content of placeholders from our original
|
|
# English message.
|
|
#
|
|
# See grit.clique_unittest.MessageCliqueUnittest.testSemiIdenticalCliques
|
|
# for a concrete explanation of why this is necessary.
|
|
|
|
original = self.MessageForLanguage(self.source_language, False)
|
|
if len(original.GetPlaceholders()) != len(translation.GetPlaceholders()):
|
|
print ("ERROR: '%s' translation of message id %s does not match" %
|
|
(language, translation.GetId()))
|
|
assert False
|
|
|
|
transl_msg = tclib.Translation(id=self.GetId(),
|
|
text=translation.GetPresentableContent(),
|
|
placeholders=original.GetPlaceholders())
|
|
|
|
if (self.custom_type and
|
|
not self.custom_type.ValidateAndModify(language, transl_msg)):
|
|
print "WARNING: %s translation failed validation: %s" % (
|
|
language, transl_msg.GetId())
|
|
|
|
self.clique[language] = transl_msg
|
|
|