1// Copyright (C) 2011 The Libphonenumber Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// Author: George Yakovlev 16// Philippe Liard 17 18#include "phonenumbers/regexp_adapter.h" 19 20#include <string> 21#include <vector> 22 23#include <gtest/gtest.h> 24 25#include "phonenumbers/base/memory/scoped_ptr.h" 26#include "phonenumbers/stl_util.h" 27#include "phonenumbers/stringutil.h" 28 29#ifdef I18N_PHONENUMBERS_USE_RE2 30#include "phonenumbers/regexp_adapter_re2.h" 31#else 32#include "phonenumbers/regexp_adapter_icu.h" 33#endif // I18N_PHONENUMBERS_USE_RE2 34 35namespace i18n { 36namespace phonenumbers { 37 38using std::vector; 39 40// Structure that contains the attributes used to test an implementation of the 41// regexp adapter. 42struct RegExpTestContext { 43 explicit RegExpTestContext(const string& name, 44 const AbstractRegExpFactory* factory) 45 : name(name), 46 factory(factory), 47 digits(factory->CreateRegExp("\\d+")), 48 parentheses_digits(factory->CreateRegExp("\\((\\d+)\\)")), 49 single_digit(factory->CreateRegExp("\\d")), 50 two_digit_groups(factory->CreateRegExp("(\\d+)-(\\d+)")), 51 six_digit_groups(factory->CreateRegExp( 52 "(\\d+)-(\\d+)-(\\d+)-(\\d+)-(\\d+)-(\\d+)")) {} 53 54 const string name; 55 const scoped_ptr<const AbstractRegExpFactory> factory; 56 const scoped_ptr<const RegExp> digits; 57 const scoped_ptr<const RegExp> parentheses_digits; 58 const scoped_ptr<const RegExp> single_digit; 59 const scoped_ptr<const RegExp> two_digit_groups; 60 const scoped_ptr<const RegExp> six_digit_groups; 61}; 62 63class RegExpAdapterTest : public testing::Test { 64 protected: 65 RegExpAdapterTest() { 66#ifdef I18N_PHONENUMBERS_USE_RE2 67 contexts_.push_back( 68 new RegExpTestContext("RE2", new RE2RegExpFactory())); 69#else 70 contexts_.push_back( 71 new RegExpTestContext("ICU Regex", new ICURegExpFactory())); 72#endif // I18N_PHONENUMBERS_USE_RE2 73 } 74 75 ~RegExpAdapterTest() { gtl::STLDeleteElements(&contexts_); } 76 77 static string ErrorMessage(const RegExpTestContext& context) { 78 return StrCat("Test failed with ", context.name, " implementation."); 79 } 80 81 typedef vector<const RegExpTestContext*>::const_iterator TestContextIterator; 82 vector<const RegExpTestContext*> contexts_; 83}; 84 85TEST_F(RegExpAdapterTest, TestConsumeNoMatch) { 86 for (vector<const RegExpTestContext*>::const_iterator it = contexts_.begin(); 87 it != contexts_.end(); 88 ++it) { 89 const RegExpTestContext& context = **it; 90 const scoped_ptr<RegExpInput> input( 91 context.factory->CreateInput("+1-123-456-789")); 92 93 // When 'true' is passed to Consume(), the match occurs from the beginning 94 // of the input. 95 ASSERT_FALSE(context.digits->Consume( 96 input.get(), true, NULL, NULL, NULL, NULL, NULL, NULL)) 97 << ErrorMessage(context); 98 ASSERT_EQ("+1-123-456-789", input->ToString()) << ErrorMessage(context); 99 100 string res1; 101 ASSERT_FALSE(context.parentheses_digits->Consume( 102 input.get(), true, &res1, NULL, NULL, NULL, NULL, NULL)) 103 << ErrorMessage(context); 104 ASSERT_EQ("+1-123-456-789", input->ToString()) << ErrorMessage(context); 105 ASSERT_EQ("", res1) << ErrorMessage(context); 106 } 107} 108 109 110TEST_F(RegExpAdapterTest, TestConsumeWithNull) { 111 for (TestContextIterator it = contexts_.begin(); it != contexts_.end(); 112 ++it) { 113 const RegExpTestContext& context = **it; 114 const AbstractRegExpFactory& factory = *context.factory; 115 const scoped_ptr<RegExpInput> input(factory.CreateInput("+123")); 116 const scoped_ptr<const RegExp> plus_sign(factory.CreateRegExp("(\\+)")); 117 118 ASSERT_TRUE(plus_sign->Consume(input.get(), true, NULL, NULL, NULL, NULL, 119 NULL, NULL)) 120 << ErrorMessage(context); 121 ASSERT_EQ("123", input->ToString()) << ErrorMessage(context); 122 } 123} 124 125TEST_F(RegExpAdapterTest, TestConsumeRetainsMatches) { 126 for (TestContextIterator it = contexts_.begin(); it != contexts_.end(); 127 ++it) { 128 const RegExpTestContext& context = **it; 129 const scoped_ptr<RegExpInput> input( 130 context.factory->CreateInput("1-123-456-789")); 131 132 string res1, res2; 133 ASSERT_TRUE(context.two_digit_groups->Consume( 134 input.get(), true, &res1, &res2, NULL, NULL, NULL, NULL)) 135 << ErrorMessage(context); 136 ASSERT_EQ("-456-789", input->ToString()) << ErrorMessage(context); 137 ASSERT_EQ("1", res1) << ErrorMessage(context); 138 ASSERT_EQ("123", res2) << ErrorMessage(context); 139 } 140} 141 142TEST_F(RegExpAdapterTest, TestFindAndConsume) { 143 for (TestContextIterator it = contexts_.begin(); it != contexts_.end(); 144 ++it) { 145 const RegExpTestContext& context = **it; 146 const scoped_ptr<RegExpInput> input( 147 context.factory->CreateInput("+1-123-456-789")); 148 const scoped_ptr<RegExpInput> input_with_six_digit_groups( 149 context.factory->CreateInput("111-222-333-444-555-666")); 150 151 // When 'false' is passed to Consume(), the match can occur from any place 152 // in the input. 153 ASSERT_TRUE(context.digits->Consume(input.get(), false, NULL, NULL, NULL, 154 NULL, NULL, NULL)) 155 << ErrorMessage(context); 156 ASSERT_EQ("-123-456-789", input->ToString()) << ErrorMessage(context); 157 158 ASSERT_TRUE(context.digits->Consume(input.get(), false, NULL, NULL, NULL, 159 NULL, NULL, NULL)) 160 << ErrorMessage(context); 161 ASSERT_EQ("-456-789", input->ToString()) << ErrorMessage(context); 162 163 ASSERT_FALSE(context.parentheses_digits->Consume( 164 input.get(), false, NULL, NULL, NULL, NULL, NULL, NULL)) 165 << ErrorMessage(context); 166 ASSERT_EQ("-456-789", input->ToString()) << ErrorMessage(context); 167 168 string res1, res2; 169 ASSERT_TRUE(context.two_digit_groups->Consume( 170 input.get(), false, &res1, &res2, NULL, NULL, NULL, NULL)) 171 << ErrorMessage(context); 172 printf("previous input: %s", input.get()->ToString().c_str()); 173 ASSERT_EQ("", input->ToString()) << ErrorMessage(context); 174 ASSERT_EQ("456", res1) << ErrorMessage(context); 175 ASSERT_EQ("789", res2) << ErrorMessage(context); 176 177 // Testing maximum no of substrings that can be matched presently, six. 178 string mat1, mat2, res3, res4, res5, res6; 179 ASSERT_TRUE(context.six_digit_groups->Consume( 180 input_with_six_digit_groups.get(), false, &mat1, &mat2, &res3, &res4, 181 &res5, &res6)) 182 << ErrorMessage(context); 183 printf("Present input: %s", 184 input_with_six_digit_groups.get()->ToString().c_str()); 185 ASSERT_EQ("", input_with_six_digit_groups->ToString()) 186 << ErrorMessage(context); 187 ASSERT_EQ("111", mat1) << ErrorMessage(context); 188 ASSERT_EQ("222", mat2) << ErrorMessage(context); 189 ASSERT_EQ("333", res3) << ErrorMessage(context); 190 ASSERT_EQ("444", res4) << ErrorMessage(context); 191 ASSERT_EQ("555", res5) << ErrorMessage(context); 192 ASSERT_EQ("666", res6) << ErrorMessage(context); 193 } 194} 195 196TEST_F(RegExpAdapterTest, TestPartialMatch) { 197 for (TestContextIterator it = contexts_.begin(); it != contexts_.end(); 198 ++it) { 199 const RegExpTestContext& context = **it; 200 const AbstractRegExpFactory& factory = *context.factory; 201 const scoped_ptr<const RegExp> reg_exp(factory.CreateRegExp("([\\da-z]+)")); 202 string matched; 203 204 EXPECT_TRUE(reg_exp->PartialMatch("12345af", &matched)) 205 << ErrorMessage(context); 206 EXPECT_EQ("12345af", matched) << ErrorMessage(context); 207 208 EXPECT_TRUE(reg_exp->PartialMatch("12345af", NULL)) 209 << ErrorMessage(context); 210 211 EXPECT_TRUE(reg_exp->PartialMatch("[12]", &matched)) 212 << ErrorMessage(context); 213 EXPECT_EQ("12", matched) << ErrorMessage(context); 214 215 matched.clear(); 216 EXPECT_FALSE(reg_exp->PartialMatch("[]", &matched)) 217 << ErrorMessage(context); 218 EXPECT_EQ("", matched) << ErrorMessage(context); 219 } 220} 221 222TEST_F(RegExpAdapterTest, TestFullMatch) { 223 for (TestContextIterator it = contexts_.begin(); it != contexts_.end(); 224 ++it) { 225 const RegExpTestContext& context = **it; 226 const AbstractRegExpFactory& factory = *context.factory; 227 const scoped_ptr<const RegExp> reg_exp(factory.CreateRegExp("([\\da-z]+)")); 228 string matched; 229 230 EXPECT_TRUE(reg_exp->FullMatch("12345af", &matched)) 231 << ErrorMessage(context); 232 EXPECT_EQ("12345af", matched) << ErrorMessage(context); 233 234 EXPECT_TRUE(reg_exp->FullMatch("12345af", NULL)) << ErrorMessage(context); 235 236 matched.clear(); 237 EXPECT_FALSE(reg_exp->FullMatch("[12]", &matched)) << ErrorMessage(context); 238 EXPECT_EQ("", matched) << ErrorMessage(context); 239 240 matched.clear(); 241 EXPECT_FALSE(reg_exp->FullMatch("[]", &matched)) << ErrorMessage(context); 242 EXPECT_EQ("", matched) << ErrorMessage(context); 243 } 244} 245 246TEST_F(RegExpAdapterTest, TestReplace) { 247 for (vector<const RegExpTestContext*>::const_iterator it = contexts_.begin(); 248 it != contexts_.end(); 249 ++it) { 250 const RegExpTestContext& context = **it; 251 string input("123-4567 "); 252 253 ASSERT_TRUE(context.single_digit->Replace(&input, "+")) 254 << ErrorMessage(context); 255 ASSERT_EQ("+23-4567 ", input) << ErrorMessage(context); 256 257 ASSERT_TRUE(context.single_digit->Replace(&input, "+")) 258 << ErrorMessage(context); 259 ASSERT_EQ("++3-4567 ", input) << ErrorMessage(context); 260 261 const scoped_ptr<const RegExp> single_letter( 262 context.factory->CreateRegExp("[a-z]")); 263 ASSERT_FALSE(single_letter->Replace(&input, "+")) << ErrorMessage(context); 264 ASSERT_EQ("++3-4567 ", input) << ErrorMessage(context); 265 } 266} 267 268TEST_F(RegExpAdapterTest, TestReplaceWithGroup) { 269 for (TestContextIterator it = contexts_.begin(); it != contexts_.end(); 270 ++it) { 271 const RegExpTestContext& context = **it; 272 273 // Make sure referencing groups in the regexp in the replacement string 274 // works. $[0-9] notation is used. 275 string input = "123-4567 abc"; 276 ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$2")) 277 << ErrorMessage(context); 278 ASSERT_EQ("4567 abc", input) << ErrorMessage(context); 279 280 input = "123-4567"; 281 ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$1")) 282 << ErrorMessage(context); 283 ASSERT_EQ("123", input) << ErrorMessage(context); 284 285 input = "123-4567"; 286 ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$2")) 287 << ErrorMessage(context); 288 ASSERT_EQ("4567", input) << ErrorMessage(context); 289 290 input = "123-4567"; 291 ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$1 $2")) 292 << ErrorMessage(context); 293 ASSERT_EQ("123 4567", input) << ErrorMessage(context); 294 } 295} 296 297TEST_F(RegExpAdapterTest, TestReplaceWithDollarSign) { 298 for (TestContextIterator it = contexts_.begin(); it != contexts_.end(); 299 ++it) { 300 const RegExpTestContext& context = **it; 301 302 // Make sure '$' can be used in the replacement string when escaped. 303 string input = "123-4567"; 304 ASSERT_TRUE(context.two_digit_groups->Replace(&input, "\\$1 \\$2")) 305 << ErrorMessage(context); 306 307 ASSERT_EQ("$1 $2", input) << ErrorMessage(context); 308 } 309} 310 311TEST_F(RegExpAdapterTest, TestGlobalReplace) { 312 for (TestContextIterator it = contexts_.begin(); it != contexts_.end(); 313 ++it) { 314 const RegExpTestContext& context = **it; 315 316 string input("123-4567 "); 317 318 ASSERT_TRUE(context.single_digit->GlobalReplace(&input, "*")) 319 << ErrorMessage(context); 320 ASSERT_EQ("***-**** ", input) << ErrorMessage(context); 321 322 ASSERT_FALSE(context.single_digit->GlobalReplace(&input, "*")) 323 << ErrorMessage(context); 324 ASSERT_EQ("***-**** ", input) << ErrorMessage(context); 325 } 326} 327 328TEST_F(RegExpAdapterTest, TestUtf8) { 329 for (TestContextIterator it = contexts_.begin(); it != contexts_.end(); 330 ++it) { 331 const RegExpTestContext& context = **it; 332 const AbstractRegExpFactory& factory = *context.factory; 333 334 const scoped_ptr<const RegExp> reg_exp(factory.CreateRegExp( 335 "\xE2\x84\xA1\xE2\x8A\x8F([\xCE\xB1-\xCF\x89]*)\xE2\x8A\x90" 336 /* "℡⊏([α-ω]*)⊐" */)); 337 string matched; 338 339 EXPECT_FALSE(reg_exp->Match( 340 "\xE2\x84\xA1\xE2\x8A\x8F" "123\xE2\x8A\x90" /* "℡⊏123⊐" */, true, 341 &matched)) << ErrorMessage(context); 342 EXPECT_TRUE(reg_exp->Match( 343 "\xE2\x84\xA1\xE2\x8A\x8F\xCE\xB1\xCE\xB2\xE2\x8A\x90" 344 /* "℡⊏αβ⊐" */, true, &matched)) << ErrorMessage(context); 345 346 EXPECT_EQ("\xCE\xB1\xCE\xB2" /* "αβ" */, matched) << ErrorMessage(context); 347 } 348} 349 350} // namespace phonenumbers 351} // namespace i18n 352