naiveproxy/base/strings/pattern.cc
2018-01-28 13:32:06 -05:00

170 lines
5.1 KiB
C++

// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/strings/pattern.h"
#include "base/third_party/icu/icu_utf.h"
namespace base {
namespace {
static bool IsWildcard(base_icu::UChar32 character) {
return character == '*' || character == '?';
}
// Move the strings pointers to the point where they start to differ.
template <typename CHAR, typename NEXT>
static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
const CHAR** string, const CHAR* string_end,
NEXT next) {
const CHAR* escape = nullptr;
while (*pattern != pattern_end && *string != string_end) {
if (!escape && IsWildcard(**pattern)) {
// We don't want to match wildcard here, except if it's escaped.
return;
}
// Check if the escapement char is found. If so, skip it and move to the
// next character.
if (!escape && **pattern == '\\') {
escape = *pattern;
next(pattern, pattern_end);
continue;
}
// Check if the chars match, if so, increment the ptrs.
const CHAR* pattern_next = *pattern;
const CHAR* string_next = *string;
base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
if (pattern_char == next(&string_next, string_end) &&
pattern_char != CBU_SENTINEL) {
*pattern = pattern_next;
*string = string_next;
} else {
// Uh oh, it did not match, we are done. If the last char was an
// escapement, that means that it was an error to advance the ptr here,
// let's put it back where it was. This also mean that the MatchPattern
// function will return false because if we can't match an escape char
// here, then no one will.
if (escape) {
*pattern = escape;
}
return;
}
escape = nullptr;
}
}
template <typename CHAR, typename NEXT>
static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
while (*pattern != end) {
if (!IsWildcard(**pattern))
return;
next(pattern, end);
}
}
template <typename CHAR, typename NEXT>
static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
const CHAR* pattern, const CHAR* pattern_end,
int depth,
NEXT next) {
const int kMaxDepth = 16;
if (depth > kMaxDepth)
return false;
// Eat all the matching chars.
EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
// If the string is empty, then the pattern must be empty too, or contains
// only wildcards.
if (eval == eval_end) {
EatWildcard(&pattern, pattern_end, next);
return pattern == pattern_end;
}
// Pattern is empty but not string, this is not a match.
if (pattern == pattern_end)
return false;
// If this is a question mark, then we need to compare the rest with
// the current string or the string with one character eaten.
const CHAR* next_pattern = pattern;
next(&next_pattern, pattern_end);
if (pattern[0] == '?') {
if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
depth + 1, next))
return true;
const CHAR* next_eval = eval;
next(&next_eval, eval_end);
if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
depth + 1, next))
return true;
}
// This is a *, try to match all the possible substrings with the remainder
// of the pattern.
if (pattern[0] == '*') {
// Collapse duplicate wild cards (********** into *) so that the
// method does not recurse unnecessarily. http://crbug.com/52839
EatWildcard(&next_pattern, pattern_end, next);
while (eval != eval_end) {
if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
depth + 1, next))
return true;
eval++;
}
// We reached the end of the string, let see if the pattern contains only
// wildcards.
if (eval == eval_end) {
EatWildcard(&pattern, pattern_end, next);
if (pattern != pattern_end)
return false;
return true;
}
}
return false;
}
struct NextCharUTF8 {
base_icu::UChar32 operator()(const char** p, const char* end) {
base_icu::UChar32 c;
int offset = 0;
CBU8_NEXT(*p, offset, end - *p, c);
*p += offset;
return c;
}
};
struct NextCharUTF16 {
base_icu::UChar32 operator()(const char16** p, const char16* end) {
base_icu::UChar32 c;
int offset = 0;
CBU16_NEXT(*p, offset, end - *p, c);
*p += offset;
return c;
}
};
} // namespace
bool MatchPattern(const StringPiece& eval, const StringPiece& pattern) {
return MatchPatternT(eval.data(), eval.data() + eval.size(),
pattern.data(), pattern.data() + pattern.size(),
0, NextCharUTF8());
}
bool MatchPattern(const StringPiece16& eval, const StringPiece16& pattern) {
return MatchPatternT(eval.data(), eval.data() + eval.size(),
pattern.data(), pattern.data() + pattern.size(),
0, NextCharUTF16());
}
} // namespace base