Skip to content

Commit

Permalink
Refactor regexes and mappings class and constants.
Browse files Browse the repository at this point in the history
  • Loading branch information
silvio2402 committed Jan 16, 2025
1 parent 0bd1332 commit bb32d1f
Show file tree
Hide file tree
Showing 9 changed files with 662 additions and 556 deletions.
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ set (
"src/phonenumbers/phonenumber.pb.cc" # Generated by Protocol Buffers.
"src/phonenumbers/phonenumberutil.cc"
"src/phonenumbers/regex_based_matcher.cc"
"src/phonenumbers/regexpsandmappings.cc"
"src/phonenumbers/regexp_cache.cc"
"src/phonenumbers/shortnumberinfo.cc"
"src/phonenumbers/string_byte_sink.cc"
Expand Down
83 changes: 83 additions & 0 deletions cpp/src/phonenumbers/constants.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// Copyright (C) 2025 The Libphonenumber Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <stddef.h>

#ifndef I18N_PHONENUMBERS_CONSTANTS_H_
#define I18N_PHONENUMBERS_CONSTANTS_H_

namespace i18n {
namespace phonenumbers {

class Constants {
friend class PhoneNumberMatcherRegExps;
friend class PhoneNumberRegExpsAndMappings;
friend class PhoneNumberUtil;

private:
// The kPlusSign signifies the international prefix.
static constexpr char kPlusSign[] = "+";

static constexpr char kStarSign[] = "*";

static constexpr char kRfc3966ExtnPrefix[] = ";ext=";
static constexpr char kRfc3966VisualSeparator[] = "[\\-\\.\\(\\)]?";

static constexpr char kDigits[] = "\\p{Nd}";

// We accept alpha characters in phone numbers, ASCII only. We store
// lower-case here only since our regular expressions are case-insensitive.
static constexpr char kValidAlpha[] = "a-z";
static constexpr char kValidAlphaInclUppercase[] = "A-Za-z";

static constexpr char kPossibleSeparatorsBetweenNumberAndExtLabel[] =
"[ \xC2\xA0\\t,]*";

// Optional full stop (.) or colon, followed by zero or more
// spaces/tabs/commas.
static constexpr char kPossibleCharsAfterExtLabel[] =
"[:\\.\xEF\xBC\x8E]?[ \xC2\xA0\\t,-]*";

static constexpr char kOptionalExtSuffix[] = "#?";

// The minimum and maximum length of the national significant number.
static constexpr size_t kMinLengthForNsn = 2;

static constexpr char kPlusChars[] = "+\xEF\xBC\x8B"; /* "++" */

// Regular expression of acceptable punctuation found in phone numbers, used
// to find numbers in text and to decide what is a viable phone number. This
// excludes diallable characters.
// This consists of dash characters, white space characters, full stops,
// slashes, square brackets, parentheses and tildes. It also includes the
// letter 'x' as that is found as a placeholder for carrier information in
// some phone numbers. Full-width variants are also present. To find out the
// unicode code-point of the characters below in vim, highlight the character
// and type 'ga'. Note that the - is used to express ranges of full-width
// punctuation below, as well as being present in the expression itself. In
// emacs, you can use M-x unicode-what to query information about the unicode
// character.
static constexpr char kValidPunctuation[] =
/* "-x‐-―−ー--/ ­<U+200B><U+2060> ()()[].\\[\\]/~⁓∼" */
"-x\xE2\x80\x90-\xE2\x80\x95\xE2\x88\x92\xE3\x83\xBC\xEF\xBC\x8D-\xEF\xBC"
"\x8F \xC2\xA0\xC2\xAD\xE2\x80\x8B\xE2\x81\xA0\xE3\x80\x80()\xEF\xBC\x88"
"\xEF\xBC\x89\xEF\xBC\xBB\xEF\xBC\xBD.\\[\\]/~\xE2\x81\x93\xE2\x88\xBC";

static constexpr char kCaptureUpToSecondNumberStart[] = "(.*)[\\\\/] *x";
};

} // namespace phonenumbers
} // namespace i18n

#endif // I18N_PHONENUMBERS_CONSTANTS_H_
10 changes: 6 additions & 4 deletions cpp/src/phonenumbers/phonenumbermatcher.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,21 @@

#include <ctype.h>
#include <stddef.h>
#include <unicode/uchar.h>

#include <limits>
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include <unicode/uchar.h>

#include "phonenumbers/alternate_format.h"
#include "phonenumbers/base/logging.h"
#include "phonenumbers/base/memory/scoped_ptr.h"
#include "phonenumbers/base/memory/singleton.h"
#include "phonenumbers/callback.h"
#include "phonenumbers/constants.h"
#include "phonenumbers/default_logger.h"
#include "phonenumbers/encoding_utils.h"
#include "phonenumbers/normalize_utf8.h"
Expand Down Expand Up @@ -283,10 +285,10 @@ class PhoneNumberMatcherRegExps : public Singleton<PhoneNumberMatcherRegExps> {
digit_block_limit_(PhoneNumberUtil::kMaxLengthForNsn +
PhoneNumberUtil::kMaxLengthCountryCode),
block_limit_(Limit(0, digit_block_limit_)),
punctuation_(StrCat("[", PhoneNumberUtil::kValidPunctuation, "]",
punctuation_(StrCat("[", Constants::kValidPunctuation, "]",
punctuation_limit_)),
digit_sequence_(StrCat("\\p{Nd}", Limit(1, digit_block_limit_))),
lead_class_chars_(StrCat(opening_parens_, PhoneNumberUtil::kPlusChars)),
lead_class_chars_(StrCat(opening_parens_, Constants::kPlusChars)),
lead_class_(StrCat("[", lead_class_chars_, "]")),
regexp_factory_for_pattern_(new ICURegExpFactory()),
#ifdef I18N_PHONENUMBERS_USE_RE2
Expand Down Expand Up @@ -314,7 +316,7 @@ class PhoneNumberMatcherRegExps : public Singleton<PhoneNumberMatcherRegExps> {
inner_matches_(new std::vector<const RegExp*>()),
capture_up_to_second_number_start_pattern_(
regexp_factory_->CreateRegExp(
PhoneNumberUtil::kCaptureUpToSecondNumberStart)),
Constants::kCaptureUpToSecondNumberStart)),
capturing_ascii_digits_pattern_(
regexp_factory_->CreateRegExp("(\\d+)")),
lead_class_pattern_(regexp_factory_->CreateRegExp(lead_class_)),
Expand Down
Loading

0 comments on commit bb32d1f

Please sign in to comment.