2
\$\begingroup\$

"Regular Expression Matching" is a hard problem on LeetCode (problem #10):

Given an input string s and a pattern p, implement regular expression matching with support for '.' and '*' where:

  • '.' Matches any single character.
  • '*' Matches zero or more of the preceding element.

The matching should cover the entire input string (not partial).

The following solution of mine passes all the tests. I would very much appreciate comments as to how I can improve on the coding style and idiomatic use of C++.

namespace regex_matching {

  class PatternChar {
    public:
      PatternChar(char c): c_(c) {}

      // The second char must be '*'
      // To make the later code simpler, replace a* with A, replace .* with $.
      PatternChar(char c1, char) {
        if (c1 == '.') { c_ = '$'; return; }
        c_ = 'A' + (c1 - 'a');
      }
      
      bool IsLetterOrDot() const {
        if (c_ == '.') { return true; }
        if (c_ >= 'a' && c_ <= 'z') { return true; }
        return false;
      }
    
      bool IsStarred() const {
        return !IsLetterOrDot();
      }
    
      bool Match(char c) const {
        if (c_ == '.' || c_ == '$') { return true; }
        return (c == c_ || c == 'a' + (c_ - 'A'));
      }
      
    private:
      char c_;
      
      friend ostream &operator<<(ostream &os, const PatternChar &pc);
  };
  
  ostream &operator<<(ostream &os, const PatternChar &pc) {
    return (os << pc.c_);
  }
  
  class Pattern {
    public:
      Pattern(const string &p) {
        for (int i = 0; i < p.size(); ++i) {
          if (i < p.size() - 1 && p[i+1] == '*') {
            pcs_.emplace_back(PatternChar(p[i], p[i+1]));     
            ++i;
          }
          else {
            pcs_.emplace_back(PatternChar(p[i])); 
          }
        }
      }
    
      Pattern(const Pattern &p, int i): pcs_(p.pcs_.begin(), p.pcs_.begin() + i + 1) {}
    
      size_t Length() const { return pcs_.size(); }
      const PatternChar &operator[](int i) const { return pcs_[i]; }

      const vector<PatternChar> &pattern_chars() const { return pcs_; }
    
    private:
      vector<PatternChar> pcs_;
    
      friend ostream &operator<<(ostream &os, const Pattern &p);
  };
  
  ostream &operator<<(ostream &os, const Pattern &p) {
    for (const auto &pc: p.pattern_chars()) { os << pc; }
    return os;
  }
  
  class Matching {
    public:
      Matching(const string &s, const string &p, bool verbose = false):
          s_(s), p_(p), verbose_(verbose), 
          matchable_(s_.size(), vector<bool>(p_.Length(), false)) {
        FillFirstColumn();
        FillFirstRow();
      }
      
      /*
      matchable[i][j] can be true in one of the following ways:
       (a) p[j] is letter or dot:
           matchable[i-1][j-1] && (p[j] matches s[i])
       (b) p[j] is capital ch or $:
           (b.1) ch is not needed to match s[i], so matchable[i][j-1]
           (b.2) ch was not needed to match s[i-1], but is needed to match s[i], 
                 so matchable[i-1][j-1] and (ch matches s[i])
           (b.3) ch was needed to match s[i-1], and is still needed to match s[i], 
                 so matchable[i-1][j] and (ch matches s[i])
      */
      bool Match() {
        if (verbose_) { cout << "Pattern: " << p_ << endl; }

        for (int i = 1; i < s_.size(); ++i) {
          for (int j = 1; j < p_.Length(); ++j) {

            if (p_[j].IsLetterOrDot()) { // Case (a) in the description
              if (matchable_[i-1][j-1] && p_[j].Match(s_[i])) {
                if (verbose_) { 
                  cout << "Matched " << string(s_.begin(), s_.begin() + i + 1) 
                       << " with " << Pattern(p_, j) << " by case (a)" << endl; }
                matchable_[i][j] = true;
              }
              continue;
            }

            // We are in Case (b) in the description, i.e. pcs[j] is capital ch or $

            //   (b.1) pcs[j] is not needed
            if (matchable_[i][j-1]) {
              if (verbose_) { 
                cout << "Matched " << string(s_.begin(), s_.begin() + i + 1) 
                     << " with " << Pattern(p_, j) << " by case (b.1)" << endl; 
              }
              matchable_[i][j] = true;
              continue;
            }

            //   (b.2) pcs[j] was not needed to match s[i-1], but is needed to match s[i]
            if (matchable_[i-1][j-1] && p_[j].Match(s_[i])) {
              if (verbose_) { 
                cout << "Matched " << string(s_.begin(), s_.begin() + i + 1) 
                     << " with " << Pattern(p_, j) << " by case (b.2)" << endl; 
              }
              matchable_[i][j] = true;
              continue;
            }

            //   (b.3) pcs[j] was needed to match s[i-1], and is still needed to match s[i]
            if (matchable_[i-1][j] && p_[j].Match(s_[i])) {
              if (verbose_) { 
                cout << "Matched " << string(s_.begin(), s_.begin() + i + 1) 
                     << " with " << Pattern(p_, j) << " by case (b.3)" << endl; 
              }
              matchable_[i][j] = true;
              continue;
            }
          }
        }
        return matchable_.back().back();
      }
    
    private:
      Pattern p_;
      const string &s_;
      bool verbose_;
      vector<vector<bool>> matchable_; // matchable_[i][j] == (p[0..j] matches s[0..i])
    
      // Fill matchable_[i][0]
      void FillFirstColumn() {
        for (int i = 0; i < s_.size(); ++i) {
          if (!p_[0].Match(s_[i])) { break; }
          if (i != 0 && !p_[0].IsStarred()) { break; }
          if (verbose_) { 
            cout << "Prefilling matchable_[" << i << "][" << 0 << "]" << endl; 
          }
          matchable_[i][0] = true;
        }
      }
    
      // Fill matchable_[0][j]
      void FillFirstRow() {    
        bool match_began = false;
        bool letter_or_dot_flag = false;
        for (int j = 0; j < p_.Length(); ++j) {
          if (p_[j].Match(s_[0])) { match_began = true; }
          if (p_[j].IsLetterOrDot()) {
            if (letter_or_dot_flag || !p_[j].Match(s_[0])) break;
            letter_or_dot_flag = true;
            if (verbose_) { 
              cout << "Prefilling matchable_[" << 0 << "][" << j << "]" << endl; 
            }
            matchable_[0][j] = true;
            continue;
          }
          if (!match_began) continue;
          if (verbose_) { 
            cout << "Prefilling matchable_[" << 0 << "][" << j << "]" << endl; 
          }
          matchable_[0][j] = true;
        }
      }
  };

} // namespace regex_matching

// This class is the artifact of LeetCode
class Solution {
  public:
    bool isMatch(string s, string p) {        
      return regex_matching::Matching(s, p).Match();
    }
};

PS Please note that this is my first attempt at following the Google C++ Style Guide.

PPS I know the following question is not in the scope of this stack exchange, but I would still appreciate some input. I read here that Google loves asking hard dynamic programming questions. The above code is probably a solution to just such a question. How it is possible to write a code of this size in an interview?

\$\endgroup\$
2
  • \$\begingroup\$ "How it is possible to write a code of this size in an interview?". It might have shorter way. Challenge site doesn't especially encourage maintainable code (Pattern and PatternChar take a little less than half of the solution size for no real logic). \$\endgroup\$ Commented Feb 1, 2022 at 9:07
  • \$\begingroup\$ @Jarod42 However, when you are interviewing with Google, they do want to see that you can write maintainable code, don't they? So, how is it possible there within the tight time limits of the interview? \$\endgroup\$ Commented Feb 2, 2022 at 11:03

0

You must log in to answer this question.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.