diff options
| author | BogDan Vatra <bogdan.vatra.ford@kdab.com> | 2015-10-09 14:18:30 +0300 |
|---|---|---|
| committer | BogDan Vatra <bogdan@kdab.com> | 2015-10-09 11:22:33 +0000 |
| commit | cda46721d8f574016fc4008af3f35df74ef20435 (patch) | |
| tree | 2db423ef83873f1a346ab3da0fc33adbac123f37 /src/repparser/qregexparser.h | |
| parent | 433bcd4b7d76096a648780b2cda4677b996af839 (diff) | |
Allow apps to use the new repparser
- rename qregexparser.h -> qrepregexparser.h
- install parser.g
- add a small .prf file to allow easily usage of reparser
Change-Id: Iefbe3581f4da56c290e9ba05a8923b6a0e7917bb
Reviewed-by: Brett Stottlemyer <bstottle@ford.com>
Diffstat (limited to 'src/repparser/qregexparser.h')
| -rw-r--r-- | src/repparser/qregexparser.h | 493 |
1 files changed, 0 insertions, 493 deletions
diff --git a/src/repparser/qregexparser.h b/src/repparser/qregexparser.h deleted file mode 100644 index 4e5ab10..0000000 --- a/src/repparser/qregexparser.h +++ /dev/null @@ -1,493 +0,0 @@ -/**************************************************************************** -** Copyright (C) 2014-2015 Ford Motor Company. -** All rights reserved. -** -** Copyright (C) 2012 Digia Plc and/or its subsidiary(-ies). -** Contact: http://www.qt-project.org/legal -** -** This file is part of the QLALR module of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:LGPL$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and Digia. For licensing terms and -** conditions see http://qt.digia.com/licensing. For further information -** use the contact form at http://qt.digia.com/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 2.1 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPL included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 2.1 requirements -** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. -** -** In addition, as a special exception, Digia gives you certain additional -** rights. These rights are described in the Digia Qt LGPL Exception -** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 3.0 as published by the Free Software -** Foundation and appearing in the file LICENSE.GPL included in the -** packaging of this file. Please review the following information to -** ensure the GNU General Public License version 3.0 requirements will be -** met: http://www.gnu.org/copyleft/gpl.html. -** -** -** $QT_END_LICENSE$ -** -****************************************************************************/ - -#ifndef QREGEXPARSER_H -#define QREGEXPARSER_H - -#include <QtCore/QSharedDataPointer> -#include <QtCore/QVarLengthArray> -#include <QtCore/QVariant> -#ifdef QT_BOOTSTRAPPED -# include <QtCore/QRegExp> -# define REGEX QRegExp -#else -# include <QtCore/QRegularExpression> -# define REGEX QRegularExpression -#endif -#include <QtCore/QMap> -#include <QFile> -#include <QTextStream> -#include <QDebug> - -struct MatchCandidate { - MatchCandidate(const QString &n, const QString &t, int i) : name(n), matchText(t), index(i) {} - QString name; - QString matchText; - int index; -}; - -QT_BEGIN_NAMESPACE - -template <typename _Parser, typename _Table> -class QRegexParser: protected _Table -{ -public: - QRegexParser(int maxMatchLen=4096); - virtual ~QRegexParser(); - - virtual bool parse(); - - virtual void reset() {} - - inline QVariant &sym(int index); - - void setBuffer(const QString &buffer); - - void setBufferFromDevice(QIODevice *device); - - void setDebug(); - - QString errorString() const - { - return m_errorString; - } - - void setErrorString(const QString &error) - { - m_errorString = QStringLiteral("%1 error at line %2: %3").arg(m_parserName).arg(QString::number(m_lineno)).arg(error); - qWarning() << m_errorString; - } - - void setParserName(const QString &name) - { - m_parserName = name; - } - - inline const QMap<QString, QString>& captured() const - { - return m_captured; - } - - inline bool isDebug() const - { - return m_debug; - } - - inline int lineNumber() const - { - return m_lineno; - } - -private: - int nextToken(); - - inline bool consumeRule(int rule) - { - return static_cast<_Parser*> (this)->consumeRule(rule); - } - - enum { DefaultStackSize = 128 }; - - struct Data: public QSharedData - { - Data(): stackSize (DefaultStackSize), tos (0) {} - - QVarLengthArray<int, DefaultStackSize> stateStack; - QVarLengthArray<QVariant, DefaultStackSize> parseStack; - int stackSize; - int tos; - - void reallocateStack() { - stackSize <<= 1; - stateStack.resize(stackSize); - parseStack.resize(stackSize); - } - }; - - inline QString escapeString(QString s) - { - return s.replace(QLatin1Char('\n'), QStringLiteral("\\n")).replace(QLatin1Char('\t'), QStringLiteral("\\t")); - } - - QSharedDataPointer<Data> d; - - QList<REGEX> m_regexes; -#ifndef QT_BOOTSTRAPPED - QMap<QChar, QList<int> > regexCandidates; -#endif - QList<int> m_tokens; - QString m_buffer, m_lastMatchText; - size_t m_loc, m_lastNewlinePosition; - int m_lineno; - int m_debug; - QStringList m_tokenNames; - QMap<QString, QString> m_captured; - int m_maxMatchLen; - QString m_parserName, m_errorString; - QVector<QMap<int, QString> > m_names; //storage for match names -}; - -template <typename _Parser, typename _Table> -inline QVariant &QRegexParser<_Parser, _Table>::sym(int n) -{ - return d->parseStack [d->tos + n - 1]; -} - -template <typename _Parser, typename _Table> -QRegexParser<_Parser, _Table>::~QRegexParser() -{ -} - -template <typename _Parser, typename _Table> -bool QRegexParser<_Parser, _Table>::parse() -{ - m_errorString.clear(); - reset(); - const int INITIAL_STATE = 0; - - d->tos = 0; - d->reallocateStack(); - - int act = d->stateStack[++d->tos] = INITIAL_STATE; - int token = -1; - - Q_FOREVER { - if (token == -1 && - _Table::TERMINAL_COUNT != _Table::action_index[act]) - token = nextToken(); - - act = _Table::t_action(act, token); - - if (d->stateStack[d->tos] == _Table::ACCEPT_STATE) - return true; - - else if (act > 0) { - if (++d->tos == d->stackSize) - d->reallocateStack(); - - d->parseStack[d->tos] = d->parseStack[d->tos - 1]; - d->stateStack[d->tos] = act; - token = -1; - } - - else if (act < 0) { - int r = - act - 1; - d->tos -= _Table::rhs[r]; - act = d->stateStack[d->tos++]; - if (!consumeRule(r)) - return false; - act = d->stateStack[d->tos] = _Table::nt_action(act, _Table::lhs[r] - _Table::TERMINAL_COUNT); - } - - else break; - } - - return false; -} - -template <typename _Parser, typename _Table> -QRegexParser<_Parser, _Table>::QRegexParser(int maxMatchLen) : d(new Data()), m_loc(0), m_lastNewlinePosition(0), m_lineno(1), m_debug(0), m_maxMatchLen(maxMatchLen) -{ - m_parserName = QLatin1String("QRegexParser"); - REGEX re(QStringLiteral("\\[([_a-zA-Z][_0-9a-zA-Z]*)(,\\s*M)?\\](.+)$")); -#ifdef QT_BOOTSTRAPPED - REGEX nameMatch(QStringLiteral("\\((\\?<(.*)>).+\\)")); - nameMatch.setMinimal(true); -#else - re.optimize(); -#endif - QMap<QString, int> token_lookup; - QMap<int, QString> names; - for (int i = 1; i < _Table::lhs[0]; i++) { - const QString text = QLatin1String(_Table::spell[i]); - names.clear(); -#ifdef QT_BOOTSTRAPPED - if (re.indexIn(text) == 0) { - const QString token = re.cap(1); - const bool multiline = re.cap(2).length() > 0; - QString pattern = re.cap(3); - //We need to identify/remove any match names in the pattern, since - //QRegExp doesn't support that feature - int pos = 0, counter = 1, loc = nameMatch.indexIn(pattern, pos); - while (loc >= 0) { - const QString res = nameMatch.cap(2); - if (!res.isEmpty()) { - names.insert(counter, res); - pattern.remove(nameMatch.cap(1)); - } - pos += loc + nameMatch.matchedLength() - nameMatch.cap(1).length(); - loc = nameMatch.indexIn(pattern, pos); - ++counter; - } - //We need to use indexIn, but that will search past the location we - //pass in. So prepend '^' and use QRegExp::CaretAtOffset. - if (pattern.at(0) != QChar(QLatin1Char('^'))) - pattern.prepend(QChar(QLatin1Char('^'))); -#else - QRegularExpressionMatch match = re.match(text, 0, QRegularExpression::NormalMatch, QRegularExpression::DontCheckSubjectStringMatchOption); - if (match.hasMatch()) { - const QString token = match.captured(1); - const bool multiline = match.captured(2).length() > 0; - const QString pattern = match.captured(3); -#endif - m_tokenNames.append(token); - int index = i; - if (token_lookup.contains(token)) - index = token_lookup[token]; - else - token_lookup[token] = i; -#ifdef QT_BOOTSTRAPPED - if (multiline) - qWarning() << "The multiline grammar option is ignore in force_bootstrap mode."; -#endif - REGEX pat(pattern); -#ifndef QT_BOOTSTRAPPED - if (multiline) - pat.setPatternOptions(QRegularExpression::DotMatchesEverythingOption); -#endif - if (!pat.isValid()) - qCritical() << "Pattern error for token #" << i << "for" << text << "pattern =" << pat << ":" << pat.errorString(); - else { -#ifndef QT_BOOTSTRAPPED - pat.optimize(); - int counter = 0; - Q_FOREACH (const QString &name, pat.namedCaptureGroups()) { - if (!name.isEmpty()) - names.insert(counter, name); - ++counter; - } -#endif - m_names.append(names); - m_regexes.append(pat); - if (token.startsWith(QLatin1String("ignore"))) - m_tokens.append(-1); - else - m_tokens.append(index); - } - } else { - qCritical() << "Error parsing regex at token #" << i << "for" << text << "Invalid syntax"; - } - } -} - -template <typename _Parser, typename _Table> -void QRegexParser<_Parser, _Table>::setBuffer(const QString &buffer) -{ - m_buffer = buffer; -} - -template <typename _Parser, typename _Table> -void QRegexParser<_Parser, _Table>::setBufferFromDevice(QIODevice *device) -{ - QTextStream in(device); - m_buffer = in.readAll(); -} - -template <typename _Parser, typename _Table> -void QRegexParser<_Parser, _Table>::setDebug() -{ - m_debug = true; - for (int r = 0; r < _Table::RULE_COUNT; ++r) - { - int ridx = _Table::rule_index[r]; - int _rhs = _Table::rhs[r]; - qDebug("%3d) %s ::=", r + 1, _Table::spell[_Table::rule_info[ridx]]); - ++ridx; - for (int i = ridx; i < ridx + _rhs; ++i) - { - int symbol = _Table::rule_info[i]; - if (symbol > 0 && symbol < _Table::lhs[0]) - qDebug(" token_%s (pattern = %s)",qPrintable(m_tokenNames[symbol-1]),qPrintable(m_regexes[symbol-1].pattern())); - else if (const char *name = _Table::spell[symbol]) - qDebug(" %s", name); - else - qDebug(" #%d", symbol); - } - qDebug(); - } -} - -template <typename _Parser, typename _Table> -int QRegexParser<_Parser, _Table>::nextToken() -{ - static const REGEX newline(QLatin1String("(\\n)")); - int token = -1; - while (token < 0) - { - if (m_loc == (size_t)(m_buffer.size())) - return _Table::EOF_SYMBOL; - - //Check m_lastMatchText for newlines and update m_lineno - //This isn't necessary, but being able to provide the line # and character # - //where the match is failing sure makes building/debugging grammars easier. -#ifdef QT_BOOTSTRAPPED - int loc = 0, pos = newline.indexIn(m_lastMatchText, loc); - while (pos >= 0) { - m_lineno++; - loc += pos + 1; - m_lastNewlinePosition += pos + 1; - pos = newline.indexIn(m_lastMatchText, loc); - } -#else //QT_BOOTSTRAPPED - QRegularExpressionMatchIterator matches = newline.globalMatch(m_lastMatchText); - while (matches.hasNext()) { - m_lineno++; - QRegularExpressionMatch match = matches.next(); - if (!matches.hasNext()) - m_lastNewlinePosition += match.capturedEnd(); - } -#endif //!QT_BOOTSTRAPPED - if (m_debug) { - qDebug(); - qDebug() << "nextToken loop, line =" << m_lineno - << "line position =" << m_loc - m_lastNewlinePosition - << "next 5 characters =" << escapeString(m_buffer.mid(m_loc, 5)); - } - int best = -1, maxLen = -1; -#ifndef QT_BOOTSTRAPPED - QRegularExpressionMatch bestRegex; -#endif - - //Find the longest match. - //If more than one are the same (longest) length, return the first one in - //the order defined. - QList<MatchCandidate> candidates; -#ifndef QT_BOOTSTRAPPED - { - //We used PCRE's PartialMatch to eliminate most of the regexes by the first - //character, so we keep a regexCandidates map with the list of possible regexes - //based on initial characters found so far. - const QChar nextChar = m_buffer.at(m_loc); - //Populate the list if we haven't seeen this character before - if (!regexCandidates.contains(nextChar)) { -# if (QT_VERSION >= QT_VERSION_CHECK(5, 5, 0)) - const QStringRef tmp = m_buffer.midRef(m_loc,1); -# else - const QString tmp = m_buffer.mid(m_loc,1); -# endif - int i = 0; - regexCandidates[nextChar] = QList<int>(); - Q_FOREACH (const QRegularExpression &re, m_regexes) - { - QRegularExpressionMatch match = re.match(tmp, 0, QRegularExpression::PartialPreferFirstMatch, QRegularExpression::DontCheckSubjectStringMatchOption); - //qDebug() << nextChar << tmp << match.hasMatch() << match.hasPartialMatch() << re.pattern(); - if (match.hasMatch() || match.hasPartialMatch()) - regexCandidates[nextChar] << i; - i++; - } - } - Q_FOREACH (int i, regexCandidates.value(nextChar)) - { - //Seems like I should be able to run the regex on the entire string, but performance is horrible - //unless I use a substring. - //QRegularExpressionMatch match = m_regexes[i].match(m_buffer, m_loc, QRegularExpression::NormalMatch, QRegularExpression::AnchoredMatchOption); -# if (QT_VERSION >= QT_VERSION_CHECK(5, 5, 0)) - QRegularExpressionMatch match = m_regexes.at(i).match(m_buffer.midRef(m_loc, m_maxMatchLen), 0, QRegularExpression::NormalMatch, QRegularExpression::AnchoredMatchOption | QRegularExpression::DontCheckSubjectStringMatchOption); -# else - QRegularExpressionMatch match = m_regexes.at(i).match(m_buffer.mid(m_loc, m_maxMatchLen), 0, QRegularExpression::NormalMatch, QRegularExpression::AnchoredMatchOption | QRegularExpression::DontCheckSubjectStringMatchOption); -# endif - if (match.hasMatch()) { - if (m_debug) - candidates << MatchCandidate(m_tokenNames[i], match.captured(), i); - if (match.capturedLength() > maxLen) { - best = i; - maxLen = match.capturedLength(); - bestRegex = match; - } - } - } - } -#else - { - int i = 0; - Q_FOREACH (const QRegExp &r, m_regexes) - { - if (r.indexIn(m_buffer, m_loc, QRegExp::CaretAtOffset) == (int)m_loc) { - if (m_debug) - candidates << MatchCandidate(m_tokenNames[i], r.cap(0), i); - if (r.matchedLength() > maxLen) { - best = i; - maxLen = r.matchedLength(); - } - } - ++i; - } - } -#endif - if (best < 0) { - setErrorString(QStringLiteral("Error generating tokens from file, next characters >%1<").arg(m_buffer.mid(m_loc, 15))); - return -1; - } else { - QMapIterator<int, QString> iter(m_names.at(best)); - if (iter.hasNext()) - m_captured.clear(); - while (iter.hasNext()) { - iter.next(); -#ifdef QT_BOOTSTRAPPED - m_captured.insert(iter.value(), m_regexes.at(best).cap(iter.key())); -#else - m_captured.insert(iter.value(), bestRegex.captured(iter.key())); -#endif - } - if (m_debug) { - qDebug() << "Match candidates:"; - Q_FOREACH (const MatchCandidate &m, candidates) { - QLatin1String result = m.index == best ? QLatin1String(" * ") : QLatin1String(" "); - qDebug() << qPrintable(result) << qPrintable(m.name) << qPrintable(escapeString(m.matchText)); - } - } - m_loc += maxLen; - if (m_tokens.at(best) >= 0) - token = m_tokens.at(best); -#ifdef QT_BOOTSTRAPPED - m_lastMatchText = m_regexes.at(best).cap(0); -#else - m_lastMatchText = bestRegex.captured(0); -#endif - } - } - return token; -} - -QT_END_NAMESPACE - -#endif // QREGEXPARSER_H |
