1// Copyright (C) 2011 The Libphonenumber Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Author: George Yakovlev
16//         Philippe Liard
17
18#include "phonenumbers/regexp_adapter.h"
19
20#include <string>
21#include <vector>
22
23#include <gtest/gtest.h>
24
25#include "phonenumbers/base/memory/scoped_ptr.h"
26#include "phonenumbers/stl_util.h"
27#include "phonenumbers/stringutil.h"
28
29#ifdef I18N_PHONENUMBERS_USE_RE2
30#include "phonenumbers/regexp_adapter_re2.h"
31#else
32#include "phonenumbers/regexp_adapter_icu.h"
33#endif  // I18N_PHONENUMBERS_USE_RE2
34
35namespace i18n {
36namespace phonenumbers {
37
38using std::vector;
39
40// Structure that contains the attributes used to test an implementation of the
41// regexp adapter.
42struct RegExpTestContext {
43  explicit RegExpTestContext(const string& name,
44                             const AbstractRegExpFactory* factory)
45      : name(name),
46        factory(factory),
47        digits(factory->CreateRegExp("\\d+")),
48        parentheses_digits(factory->CreateRegExp("\\((\\d+)\\)")),
49        single_digit(factory->CreateRegExp("\\d")),
50        two_digit_groups(factory->CreateRegExp("(\\d+)-(\\d+)")),
51        six_digit_groups(factory->CreateRegExp(
52            "(\\d+)-(\\d+)-(\\d+)-(\\d+)-(\\d+)-(\\d+)")) {}
53
54  const string name;
55  const scoped_ptr<const AbstractRegExpFactory> factory;
56  const scoped_ptr<const RegExp> digits;
57  const scoped_ptr<const RegExp> parentheses_digits;
58  const scoped_ptr<const RegExp> single_digit;
59  const scoped_ptr<const RegExp> two_digit_groups;
60  const scoped_ptr<const RegExp> six_digit_groups;
61};
62
63class RegExpAdapterTest : public testing::Test {
64 protected:
65  RegExpAdapterTest() {
66#ifdef I18N_PHONENUMBERS_USE_RE2
67    contexts_.push_back(
68        new RegExpTestContext("RE2", new RE2RegExpFactory()));
69#else
70    contexts_.push_back(
71        new RegExpTestContext("ICU Regex", new ICURegExpFactory()));
72#endif  // I18N_PHONENUMBERS_USE_RE2
73  }
74
75  ~RegExpAdapterTest() { gtl::STLDeleteElements(&contexts_); }
76
77  static string ErrorMessage(const RegExpTestContext& context) {
78    return StrCat("Test failed with ", context.name, " implementation.");
79  }
80
81  typedef vector<const RegExpTestContext*>::const_iterator TestContextIterator;
82  vector<const RegExpTestContext*> contexts_;
83};
84
85TEST_F(RegExpAdapterTest, TestConsumeNoMatch) {
86  for (vector<const RegExpTestContext*>::const_iterator it = contexts_.begin();
87       it != contexts_.end();
88       ++it) {
89    const RegExpTestContext& context = **it;
90    const scoped_ptr<RegExpInput> input(
91        context.factory->CreateInput("+1-123-456-789"));
92
93    // When 'true' is passed to Consume(), the match occurs from the beginning
94    // of the input.
95    ASSERT_FALSE(context.digits->Consume(
96         input.get(), true, NULL, NULL, NULL, NULL, NULL, NULL))
97         << ErrorMessage(context);
98    ASSERT_EQ("+1-123-456-789", input->ToString()) << ErrorMessage(context);
99
100    string res1;
101    ASSERT_FALSE(context.parentheses_digits->Consume(
102        input.get(), true, &res1, NULL, NULL, NULL, NULL, NULL))
103        << ErrorMessage(context);
104    ASSERT_EQ("+1-123-456-789", input->ToString()) << ErrorMessage(context);
105    ASSERT_EQ("", res1) << ErrorMessage(context);
106  }
107}
108
109
110TEST_F(RegExpAdapterTest, TestConsumeWithNull) {
111  for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
112       ++it) {
113    const RegExpTestContext& context = **it;
114    const AbstractRegExpFactory& factory = *context.factory;
115    const scoped_ptr<RegExpInput> input(factory.CreateInput("+123"));
116    const scoped_ptr<const RegExp> plus_sign(factory.CreateRegExp("(\\+)"));
117
118    ASSERT_TRUE(plus_sign->Consume(input.get(), true, NULL, NULL, NULL, NULL,
119                                   NULL, NULL))
120        << ErrorMessage(context);
121    ASSERT_EQ("123", input->ToString()) << ErrorMessage(context);
122  }
123}
124
125TEST_F(RegExpAdapterTest, TestConsumeRetainsMatches) {
126  for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
127       ++it) {
128    const RegExpTestContext& context = **it;
129    const scoped_ptr<RegExpInput> input(
130        context.factory->CreateInput("1-123-456-789"));
131
132    string res1, res2;
133    ASSERT_TRUE(context.two_digit_groups->Consume(
134        input.get(), true, &res1, &res2, NULL, NULL, NULL, NULL))
135        << ErrorMessage(context);
136    ASSERT_EQ("-456-789", input->ToString()) << ErrorMessage(context);
137    ASSERT_EQ("1", res1) << ErrorMessage(context);
138    ASSERT_EQ("123", res2) << ErrorMessage(context);
139  }
140}
141
142TEST_F(RegExpAdapterTest, TestFindAndConsume) {
143  for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
144       ++it) {
145    const RegExpTestContext& context = **it;
146    const scoped_ptr<RegExpInput> input(
147        context.factory->CreateInput("+1-123-456-789"));
148    const scoped_ptr<RegExpInput> input_with_six_digit_groups(
149        context.factory->CreateInput("111-222-333-444-555-666"));
150
151    // When 'false' is passed to Consume(), the match can occur from any place
152    // in the input.
153    ASSERT_TRUE(context.digits->Consume(input.get(), false, NULL, NULL, NULL,
154                                        NULL, NULL, NULL))
155        << ErrorMessage(context);
156    ASSERT_EQ("-123-456-789", input->ToString()) << ErrorMessage(context);
157
158    ASSERT_TRUE(context.digits->Consume(input.get(), false, NULL, NULL, NULL,
159                                        NULL, NULL, NULL))
160        << ErrorMessage(context);
161    ASSERT_EQ("-456-789", input->ToString()) << ErrorMessage(context);
162
163    ASSERT_FALSE(context.parentheses_digits->Consume(
164        input.get(), false, NULL, NULL, NULL, NULL, NULL, NULL))
165        << ErrorMessage(context);
166    ASSERT_EQ("-456-789", input->ToString()) << ErrorMessage(context);
167
168    string res1, res2;
169    ASSERT_TRUE(context.two_digit_groups->Consume(
170        input.get(), false, &res1, &res2, NULL, NULL, NULL, NULL))
171        << ErrorMessage(context);
172    printf("previous input: %s", input.get()->ToString().c_str());
173    ASSERT_EQ("", input->ToString()) << ErrorMessage(context);
174    ASSERT_EQ("456", res1) << ErrorMessage(context);
175    ASSERT_EQ("789", res2) << ErrorMessage(context);
176
177    // Testing maximum no of substrings that can be matched presently, six.
178    string mat1, mat2, res3, res4, res5, res6;
179    ASSERT_TRUE(context.six_digit_groups->Consume(
180        input_with_six_digit_groups.get(), false, &mat1, &mat2, &res3, &res4,
181        &res5, &res6))
182        << ErrorMessage(context);
183    printf("Present input: %s",
184           input_with_six_digit_groups.get()->ToString().c_str());
185    ASSERT_EQ("", input_with_six_digit_groups->ToString())
186        << ErrorMessage(context);
187    ASSERT_EQ("111", mat1) << ErrorMessage(context);
188    ASSERT_EQ("222", mat2) << ErrorMessage(context);
189    ASSERT_EQ("333", res3) << ErrorMessage(context);
190    ASSERT_EQ("444", res4) << ErrorMessage(context);
191    ASSERT_EQ("555", res5) << ErrorMessage(context);
192    ASSERT_EQ("666", res6) << ErrorMessage(context);
193  }
194}
195
196TEST_F(RegExpAdapterTest, TestPartialMatch) {
197  for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
198       ++it) {
199    const RegExpTestContext& context = **it;
200    const AbstractRegExpFactory& factory = *context.factory;
201    const scoped_ptr<const RegExp> reg_exp(factory.CreateRegExp("([\\da-z]+)"));
202    string matched;
203
204    EXPECT_TRUE(reg_exp->PartialMatch("12345af", &matched))
205        << ErrorMessage(context);
206    EXPECT_EQ("12345af", matched) << ErrorMessage(context);
207
208    EXPECT_TRUE(reg_exp->PartialMatch("12345af", NULL))
209        << ErrorMessage(context);
210
211    EXPECT_TRUE(reg_exp->PartialMatch("[12]", &matched))
212        << ErrorMessage(context);
213    EXPECT_EQ("12", matched) << ErrorMessage(context);
214
215    matched.clear();
216    EXPECT_FALSE(reg_exp->PartialMatch("[]", &matched))
217        << ErrorMessage(context);
218    EXPECT_EQ("", matched) << ErrorMessage(context);
219  }
220}
221
222TEST_F(RegExpAdapterTest, TestFullMatch) {
223  for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
224       ++it) {
225    const RegExpTestContext& context = **it;
226    const AbstractRegExpFactory& factory = *context.factory;
227    const scoped_ptr<const RegExp> reg_exp(factory.CreateRegExp("([\\da-z]+)"));
228    string matched;
229
230    EXPECT_TRUE(reg_exp->FullMatch("12345af", &matched))
231        << ErrorMessage(context);
232    EXPECT_EQ("12345af", matched) << ErrorMessage(context);
233
234    EXPECT_TRUE(reg_exp->FullMatch("12345af", NULL)) << ErrorMessage(context);
235
236    matched.clear();
237    EXPECT_FALSE(reg_exp->FullMatch("[12]", &matched)) << ErrorMessage(context);
238    EXPECT_EQ("", matched) << ErrorMessage(context);
239
240    matched.clear();
241    EXPECT_FALSE(reg_exp->FullMatch("[]", &matched)) << ErrorMessage(context);
242    EXPECT_EQ("", matched) << ErrorMessage(context);
243  }
244}
245
246TEST_F(RegExpAdapterTest, TestReplace) {
247  for (vector<const RegExpTestContext*>::const_iterator it = contexts_.begin();
248       it != contexts_.end();
249       ++it) {
250    const RegExpTestContext& context = **it;
251    string input("123-4567 ");
252
253    ASSERT_TRUE(context.single_digit->Replace(&input, "+"))
254        << ErrorMessage(context);
255    ASSERT_EQ("+23-4567 ", input) << ErrorMessage(context);
256
257    ASSERT_TRUE(context.single_digit->Replace(&input, "+"))
258        << ErrorMessage(context);
259    ASSERT_EQ("++3-4567 ", input) << ErrorMessage(context);
260
261    const scoped_ptr<const RegExp> single_letter(
262        context.factory->CreateRegExp("[a-z]"));
263    ASSERT_FALSE(single_letter->Replace(&input, "+")) << ErrorMessage(context);
264    ASSERT_EQ("++3-4567 ", input) << ErrorMessage(context);
265  }
266}
267
268TEST_F(RegExpAdapterTest, TestReplaceWithGroup) {
269  for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
270       ++it) {
271    const RegExpTestContext& context = **it;
272
273    // Make sure referencing groups in the regexp in the replacement string
274    // works. $[0-9] notation is used.
275    string input = "123-4567 abc";
276    ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$2"))
277        << ErrorMessage(context);
278    ASSERT_EQ("4567 abc", input) << ErrorMessage(context);
279
280    input = "123-4567";
281    ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$1"))
282        << ErrorMessage(context);
283    ASSERT_EQ("123", input) << ErrorMessage(context);
284
285    input = "123-4567";
286    ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$2"))
287        << ErrorMessage(context);
288    ASSERT_EQ("4567", input) << ErrorMessage(context);
289
290    input = "123-4567";
291    ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$1 $2"))
292        << ErrorMessage(context);
293    ASSERT_EQ("123 4567", input) << ErrorMessage(context);
294  }
295}
296
297TEST_F(RegExpAdapterTest, TestReplaceWithDollarSign) {
298  for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
299       ++it) {
300    const RegExpTestContext& context = **it;
301
302    // Make sure '$' can be used in the replacement string when escaped.
303    string input = "123-4567";
304    ASSERT_TRUE(context.two_digit_groups->Replace(&input, "\\$1 \\$2"))
305        << ErrorMessage(context);
306
307    ASSERT_EQ("$1 $2", input) << ErrorMessage(context);
308  }
309}
310
311TEST_F(RegExpAdapterTest, TestGlobalReplace) {
312  for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
313       ++it) {
314    const RegExpTestContext& context = **it;
315
316    string input("123-4567 ");
317
318    ASSERT_TRUE(context.single_digit->GlobalReplace(&input, "*"))
319        << ErrorMessage(context);
320    ASSERT_EQ("***-**** ", input) << ErrorMessage(context);
321
322    ASSERT_FALSE(context.single_digit->GlobalReplace(&input, "*"))
323        << ErrorMessage(context);
324    ASSERT_EQ("***-**** ", input) << ErrorMessage(context);
325  }
326}
327
328TEST_F(RegExpAdapterTest, TestUtf8) {
329  for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
330       ++it) {
331    const RegExpTestContext& context = **it;
332    const AbstractRegExpFactory& factory = *context.factory;
333
334    const scoped_ptr<const RegExp> reg_exp(factory.CreateRegExp(
335        "\xE2\x84\xA1\xE2\x8A\x8F([\xCE\xB1-\xCF\x89]*)\xE2\x8A\x90"
336        /* "℡⊏([α-ω]*)⊐" */));
337    string matched;
338
339    EXPECT_FALSE(reg_exp->Match(
340        "\xE2\x84\xA1\xE2\x8A\x8F" "123\xE2\x8A\x90" /* "℡⊏123⊐" */, true,
341        &matched)) << ErrorMessage(context);
342    EXPECT_TRUE(reg_exp->Match(
343        "\xE2\x84\xA1\xE2\x8A\x8F\xCE\xB1\xCE\xB2\xE2\x8A\x90"
344        /* "℡⊏αβ⊐" */, true, &matched)) << ErrorMessage(context);
345
346    EXPECT_EQ("\xCE\xB1\xCE\xB2" /* "αβ" */, matched) << ErrorMessage(context);
347  }
348}
349
350}  // namespace phonenumbers
351}  // namespace i18n
352