"Regular Expression Matching" is a hard problem on LeetCode (problem #10):
Given an input string s and a pattern p, implement regular expression matching with support for '.' and '*' where:
- '.' Matches any single character.
- '*' Matches zero or more of the preceding element.
The matching should cover the entire input string (not partial).
The following solution of mine passes all the tests. I would very much appreciate comments as to how I can improve on the coding style and idiomatic use of C++.
namespace regex_matching {
class PatternChar {
public:
PatternChar(char c): c_(c) {}
// The second char must be '*'
// To make the later code simpler, replace a* with A, replace .* with $.
PatternChar(char c1, char) {
if (c1 == '.') { c_ = '$'; return; }
c_ = 'A' + (c1 - 'a');
}
bool IsLetterOrDot() const {
if (c_ == '.') { return true; }
if (c_ >= 'a' && c_ <= 'z') { return true; }
return false;
}
bool IsStarred() const {
return !IsLetterOrDot();
}
bool Match(char c) const {
if (c_ == '.' || c_ == '$') { return true; }
return (c == c_ || c == 'a' + (c_ - 'A'));
}
private:
char c_;
friend ostream &operator<<(ostream &os, const PatternChar &pc);
};
ostream &operator<<(ostream &os, const PatternChar &pc) {
return (os << pc.c_);
}
class Pattern {
public:
Pattern(const string &p) {
for (int i = 0; i < p.size(); ++i) {
if (i < p.size() - 1 && p[i+1] == '*') {
pcs_.emplace_back(PatternChar(p[i], p[i+1]));
++i;
}
else {
pcs_.emplace_back(PatternChar(p[i]));
}
}
}
Pattern(const Pattern &p, int i): pcs_(p.pcs_.begin(), p.pcs_.begin() + i + 1) {}
size_t Length() const { return pcs_.size(); }
const PatternChar &operator[](int i) const { return pcs_[i]; }
const vector<PatternChar> &pattern_chars() const { return pcs_; }
private:
vector<PatternChar> pcs_;
friend ostream &operator<<(ostream &os, const Pattern &p);
};
ostream &operator<<(ostream &os, const Pattern &p) {
for (const auto &pc: p.pattern_chars()) { os << pc; }
return os;
}
class Matching {
public:
Matching(const string &s, const string &p, bool verbose = false):
s_(s), p_(p), verbose_(verbose),
matchable_(s_.size(), vector<bool>(p_.Length(), false)) {
FillFirstColumn();
FillFirstRow();
}
/*
matchable[i][j] can be true in one of the following ways:
(a) p[j] is letter or dot:
matchable[i-1][j-1] && (p[j] matches s[i])
(b) p[j] is capital ch or $:
(b.1) ch is not needed to match s[i], so matchable[i][j-1]
(b.2) ch was not needed to match s[i-1], but is needed to match s[i],
so matchable[i-1][j-1] and (ch matches s[i])
(b.3) ch was needed to match s[i-1], and is still needed to match s[i],
so matchable[i-1][j] and (ch matches s[i])
*/
bool Match() {
if (verbose_) { cout << "Pattern: " << p_ << endl; }
for (int i = 1; i < s_.size(); ++i) {
for (int j = 1; j < p_.Length(); ++j) {
if (p_[j].IsLetterOrDot()) { // Case (a) in the description
if (matchable_[i-1][j-1] && p_[j].Match(s_[i])) {
if (verbose_) {
cout << "Matched " << string(s_.begin(), s_.begin() + i + 1)
<< " with " << Pattern(p_, j) << " by case (a)" << endl; }
matchable_[i][j] = true;
}
continue;
}
// We are in Case (b) in the description, i.e. pcs[j] is capital ch or $
// (b.1) pcs[j] is not needed
if (matchable_[i][j-1]) {
if (verbose_) {
cout << "Matched " << string(s_.begin(), s_.begin() + i + 1)
<< " with " << Pattern(p_, j) << " by case (b.1)" << endl;
}
matchable_[i][j] = true;
continue;
}
// (b.2) pcs[j] was not needed to match s[i-1], but is needed to match s[i]
if (matchable_[i-1][j-1] && p_[j].Match(s_[i])) {
if (verbose_) {
cout << "Matched " << string(s_.begin(), s_.begin() + i + 1)
<< " with " << Pattern(p_, j) << " by case (b.2)" << endl;
}
matchable_[i][j] = true;
continue;
}
// (b.3) pcs[j] was needed to match s[i-1], and is still needed to match s[i]
if (matchable_[i-1][j] && p_[j].Match(s_[i])) {
if (verbose_) {
cout << "Matched " << string(s_.begin(), s_.begin() + i + 1)
<< " with " << Pattern(p_, j) << " by case (b.3)" << endl;
}
matchable_[i][j] = true;
continue;
}
}
}
return matchable_.back().back();
}
private:
Pattern p_;
const string &s_;
bool verbose_;
vector<vector<bool>> matchable_; // matchable_[i][j] == (p[0..j] matches s[0..i])
// Fill matchable_[i][0]
void FillFirstColumn() {
for (int i = 0; i < s_.size(); ++i) {
if (!p_[0].Match(s_[i])) { break; }
if (i != 0 && !p_[0].IsStarred()) { break; }
if (verbose_) {
cout << "Prefilling matchable_[" << i << "][" << 0 << "]" << endl;
}
matchable_[i][0] = true;
}
}
// Fill matchable_[0][j]
void FillFirstRow() {
bool match_began = false;
bool letter_or_dot_flag = false;
for (int j = 0; j < p_.Length(); ++j) {
if (p_[j].Match(s_[0])) { match_began = true; }
if (p_[j].IsLetterOrDot()) {
if (letter_or_dot_flag || !p_[j].Match(s_[0])) break;
letter_or_dot_flag = true;
if (verbose_) {
cout << "Prefilling matchable_[" << 0 << "][" << j << "]" << endl;
}
matchable_[0][j] = true;
continue;
}
if (!match_began) continue;
if (verbose_) {
cout << "Prefilling matchable_[" << 0 << "][" << j << "]" << endl;
}
matchable_[0][j] = true;
}
}
};
} // namespace regex_matching
// This class is the artifact of LeetCode
class Solution {
public:
bool isMatch(string s, string p) {
return regex_matching::Matching(s, p).Match();
}
};
PS Please note that this is my first attempt at following the Google C++ Style Guide.
PPS I know the following question is not in the scope of this stack exchange, but I would still appreciate some input. I read here that Google loves asking hard dynamic programming questions. The above code is probably a solution to just such a question. How it is possible to write a code of this size in an interview?
PatternandPatternChartake a little less than half of the solution size for no real logic). \$\endgroup\$