summaryrefslogtreecommitdiffstats
path: root/src/repparser/qregexparser.h
diff options
context:
space:
mode:
authorBogDan Vatra <bogdan.vatra.ford@kdab.com>2015-10-09 14:18:30 +0300
committerBogDan Vatra <bogdan@kdab.com>2015-10-09 11:22:33 +0000
commitcda46721d8f574016fc4008af3f35df74ef20435 (patch)
tree2db423ef83873f1a346ab3da0fc33adbac123f37 /src/repparser/qregexparser.h
parent433bcd4b7d76096a648780b2cda4677b996af839 (diff)
Allow apps to use the new repparser
- rename qregexparser.h -> qrepregexparser.h - install parser.g - add a small .prf file to allow easily usage of reparser Change-Id: Iefbe3581f4da56c290e9ba05a8923b6a0e7917bb Reviewed-by: Brett Stottlemyer <bstottle@ford.com>
Diffstat (limited to 'src/repparser/qregexparser.h')
-rw-r--r--src/repparser/qregexparser.h493
1 files changed, 0 insertions, 493 deletions
diff --git a/src/repparser/qregexparser.h b/src/repparser/qregexparser.h
deleted file mode 100644
index 4e5ab10..0000000
--- a/src/repparser/qregexparser.h
+++ /dev/null
@@ -1,493 +0,0 @@
-/****************************************************************************
-** Copyright (C) 2014-2015 Ford Motor Company.
-** All rights reserved.
-**
-** Copyright (C) 2012 Digia Plc and/or its subsidiary(-ies).
-** Contact: http://www.qt-project.org/legal
-**
-** This file is part of the QLALR module of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:LGPL$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and Digia. For licensing terms and
-** conditions see http://qt.digia.com/licensing. For further information
-** use the contact form at http://qt.digia.com/contact-us.
-**
-** GNU Lesser General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU Lesser
-** General Public License version 2.1 as published by the Free Software
-** Foundation and appearing in the file LICENSE.LGPL included in the
-** packaging of this file. Please review the following information to
-** ensure the GNU Lesser General Public License version 2.1 requirements
-** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
-**
-** In addition, as a special exception, Digia gives you certain additional
-** rights. These rights are described in the Digia Qt LGPL Exception
-** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 3.0 as published by the Free Software
-** Foundation and appearing in the file LICENSE.GPL included in the
-** packaging of this file. Please review the following information to
-** ensure the GNU General Public License version 3.0 requirements will be
-** met: http://www.gnu.org/copyleft/gpl.html.
-**
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
-
-#ifndef QREGEXPARSER_H
-#define QREGEXPARSER_H
-
-#include <QtCore/QSharedDataPointer>
-#include <QtCore/QVarLengthArray>
-#include <QtCore/QVariant>
-#ifdef QT_BOOTSTRAPPED
-# include <QtCore/QRegExp>
-# define REGEX QRegExp
-#else
-# include <QtCore/QRegularExpression>
-# define REGEX QRegularExpression
-#endif
-#include <QtCore/QMap>
-#include <QFile>
-#include <QTextStream>
-#include <QDebug>
-
-struct MatchCandidate {
- MatchCandidate(const QString &n, const QString &t, int i) : name(n), matchText(t), index(i) {}
- QString name;
- QString matchText;
- int index;
-};
-
-QT_BEGIN_NAMESPACE
-
-template <typename _Parser, typename _Table>
-class QRegexParser: protected _Table
-{
-public:
- QRegexParser(int maxMatchLen=4096);
- virtual ~QRegexParser();
-
- virtual bool parse();
-
- virtual void reset() {}
-
- inline QVariant &sym(int index);
-
- void setBuffer(const QString &buffer);
-
- void setBufferFromDevice(QIODevice *device);
-
- void setDebug();
-
- QString errorString() const
- {
- return m_errorString;
- }
-
- void setErrorString(const QString &error)
- {
- m_errorString = QStringLiteral("%1 error at line %2: %3").arg(m_parserName).arg(QString::number(m_lineno)).arg(error);
- qWarning() << m_errorString;
- }
-
- void setParserName(const QString &name)
- {
- m_parserName = name;
- }
-
- inline const QMap<QString, QString>& captured() const
- {
- return m_captured;
- }
-
- inline bool isDebug() const
- {
- return m_debug;
- }
-
- inline int lineNumber() const
- {
- return m_lineno;
- }
-
-private:
- int nextToken();
-
- inline bool consumeRule(int rule)
- {
- return static_cast<_Parser*> (this)->consumeRule(rule);
- }
-
- enum { DefaultStackSize = 128 };
-
- struct Data: public QSharedData
- {
- Data(): stackSize (DefaultStackSize), tos (0) {}
-
- QVarLengthArray<int, DefaultStackSize> stateStack;
- QVarLengthArray<QVariant, DefaultStackSize> parseStack;
- int stackSize;
- int tos;
-
- void reallocateStack() {
- stackSize <<= 1;
- stateStack.resize(stackSize);
- parseStack.resize(stackSize);
- }
- };
-
- inline QString escapeString(QString s)
- {
- return s.replace(QLatin1Char('\n'), QStringLiteral("\\n")).replace(QLatin1Char('\t'), QStringLiteral("\\t"));
- }
-
- QSharedDataPointer<Data> d;
-
- QList<REGEX> m_regexes;
-#ifndef QT_BOOTSTRAPPED
- QMap<QChar, QList<int> > regexCandidates;
-#endif
- QList<int> m_tokens;
- QString m_buffer, m_lastMatchText;
- size_t m_loc, m_lastNewlinePosition;
- int m_lineno;
- int m_debug;
- QStringList m_tokenNames;
- QMap<QString, QString> m_captured;
- int m_maxMatchLen;
- QString m_parserName, m_errorString;
- QVector<QMap<int, QString> > m_names; //storage for match names
-};
-
-template <typename _Parser, typename _Table>
-inline QVariant &QRegexParser<_Parser, _Table>::sym(int n)
-{
- return d->parseStack [d->tos + n - 1];
-}
-
-template <typename _Parser, typename _Table>
-QRegexParser<_Parser, _Table>::~QRegexParser()
-{
-}
-
-template <typename _Parser, typename _Table>
-bool QRegexParser<_Parser, _Table>::parse()
-{
- m_errorString.clear();
- reset();
- const int INITIAL_STATE = 0;
-
- d->tos = 0;
- d->reallocateStack();
-
- int act = d->stateStack[++d->tos] = INITIAL_STATE;
- int token = -1;
-
- Q_FOREVER {
- if (token == -1 && - _Table::TERMINAL_COUNT != _Table::action_index[act])
- token = nextToken();
-
- act = _Table::t_action(act, token);
-
- if (d->stateStack[d->tos] == _Table::ACCEPT_STATE)
- return true;
-
- else if (act > 0) {
- if (++d->tos == d->stackSize)
- d->reallocateStack();
-
- d->parseStack[d->tos] = d->parseStack[d->tos - 1];
- d->stateStack[d->tos] = act;
- token = -1;
- }
-
- else if (act < 0) {
- int r = - act - 1;
- d->tos -= _Table::rhs[r];
- act = d->stateStack[d->tos++];
- if (!consumeRule(r))
- return false;
- act = d->stateStack[d->tos] = _Table::nt_action(act, _Table::lhs[r] - _Table::TERMINAL_COUNT);
- }
-
- else break;
- }
-
- return false;
-}
-
-template <typename _Parser, typename _Table>
-QRegexParser<_Parser, _Table>::QRegexParser(int maxMatchLen) : d(new Data()), m_loc(0), m_lastNewlinePosition(0), m_lineno(1), m_debug(0), m_maxMatchLen(maxMatchLen)
-{
- m_parserName = QLatin1String("QRegexParser");
- REGEX re(QStringLiteral("\\[([_a-zA-Z][_0-9a-zA-Z]*)(,\\s*M)?\\](.+)$"));
-#ifdef QT_BOOTSTRAPPED
- REGEX nameMatch(QStringLiteral("\\((\\?<(.*)>).+\\)"));
- nameMatch.setMinimal(true);
-#else
- re.optimize();
-#endif
- QMap<QString, int> token_lookup;
- QMap<int, QString> names;
- for (int i = 1; i < _Table::lhs[0]; i++) {
- const QString text = QLatin1String(_Table::spell[i]);
- names.clear();
-#ifdef QT_BOOTSTRAPPED
- if (re.indexIn(text) == 0) {
- const QString token = re.cap(1);
- const bool multiline = re.cap(2).length() > 0;
- QString pattern = re.cap(3);
- //We need to identify/remove any match names in the pattern, since
- //QRegExp doesn't support that feature
- int pos = 0, counter = 1, loc = nameMatch.indexIn(pattern, pos);
- while (loc >= 0) {
- const QString res = nameMatch.cap(2);
- if (!res.isEmpty()) {
- names.insert(counter, res);
- pattern.remove(nameMatch.cap(1));
- }
- pos += loc + nameMatch.matchedLength() - nameMatch.cap(1).length();
- loc = nameMatch.indexIn(pattern, pos);
- ++counter;
- }
- //We need to use indexIn, but that will search past the location we
- //pass in. So prepend '^' and use QRegExp::CaretAtOffset.
- if (pattern.at(0) != QChar(QLatin1Char('^')))
- pattern.prepend(QChar(QLatin1Char('^')));
-#else
- QRegularExpressionMatch match = re.match(text, 0, QRegularExpression::NormalMatch, QRegularExpression::DontCheckSubjectStringMatchOption);
- if (match.hasMatch()) {
- const QString token = match.captured(1);
- const bool multiline = match.captured(2).length() > 0;
- const QString pattern = match.captured(3);
-#endif
- m_tokenNames.append(token);
- int index = i;
- if (token_lookup.contains(token))
- index = token_lookup[token];
- else
- token_lookup[token] = i;
-#ifdef QT_BOOTSTRAPPED
- if (multiline)
- qWarning() << "The multiline grammar option is ignore in force_bootstrap mode.";
-#endif
- REGEX pat(pattern);
-#ifndef QT_BOOTSTRAPPED
- if (multiline)
- pat.setPatternOptions(QRegularExpression::DotMatchesEverythingOption);
-#endif
- if (!pat.isValid())
- qCritical() << "Pattern error for token #" << i << "for" << text << "pattern =" << pat << ":" << pat.errorString();
- else {
-#ifndef QT_BOOTSTRAPPED
- pat.optimize();
- int counter = 0;
- Q_FOREACH (const QString &name, pat.namedCaptureGroups()) {
- if (!name.isEmpty())
- names.insert(counter, name);
- ++counter;
- }
-#endif
- m_names.append(names);
- m_regexes.append(pat);
- if (token.startsWith(QLatin1String("ignore")))
- m_tokens.append(-1);
- else
- m_tokens.append(index);
- }
- } else {
- qCritical() << "Error parsing regex at token #" << i << "for" << text << "Invalid syntax";
- }
- }
-}
-
-template <typename _Parser, typename _Table>
-void QRegexParser<_Parser, _Table>::setBuffer(const QString &buffer)
-{
- m_buffer = buffer;
-}
-
-template <typename _Parser, typename _Table>
-void QRegexParser<_Parser, _Table>::setBufferFromDevice(QIODevice *device)
-{
- QTextStream in(device);
- m_buffer = in.readAll();
-}
-
-template <typename _Parser, typename _Table>
-void QRegexParser<_Parser, _Table>::setDebug()
-{
- m_debug = true;
- for (int r = 0; r < _Table::RULE_COUNT; ++r)
- {
- int ridx = _Table::rule_index[r];
- int _rhs = _Table::rhs[r];
- qDebug("%3d) %s ::=", r + 1, _Table::spell[_Table::rule_info[ridx]]);
- ++ridx;
- for (int i = ridx; i < ridx + _rhs; ++i)
- {
- int symbol = _Table::rule_info[i];
- if (symbol > 0 && symbol < _Table::lhs[0])
- qDebug(" token_%s (pattern = %s)",qPrintable(m_tokenNames[symbol-1]),qPrintable(m_regexes[symbol-1].pattern()));
- else if (const char *name = _Table::spell[symbol])
- qDebug(" %s", name);
- else
- qDebug(" #%d", symbol);
- }
- qDebug();
- }
-}
-
-template <typename _Parser, typename _Table>
-int QRegexParser<_Parser, _Table>::nextToken()
-{
- static const REGEX newline(QLatin1String("(\\n)"));
- int token = -1;
- while (token < 0)
- {
- if (m_loc == (size_t)(m_buffer.size()))
- return _Table::EOF_SYMBOL;
-
- //Check m_lastMatchText for newlines and update m_lineno
- //This isn't necessary, but being able to provide the line # and character #
- //where the match is failing sure makes building/debugging grammars easier.
-#ifdef QT_BOOTSTRAPPED
- int loc = 0, pos = newline.indexIn(m_lastMatchText, loc);
- while (pos >= 0) {
- m_lineno++;
- loc += pos + 1;
- m_lastNewlinePosition += pos + 1;
- pos = newline.indexIn(m_lastMatchText, loc);
- }
-#else //QT_BOOTSTRAPPED
- QRegularExpressionMatchIterator matches = newline.globalMatch(m_lastMatchText);
- while (matches.hasNext()) {
- m_lineno++;
- QRegularExpressionMatch match = matches.next();
- if (!matches.hasNext())
- m_lastNewlinePosition += match.capturedEnd();
- }
-#endif //!QT_BOOTSTRAPPED
- if (m_debug) {
- qDebug();
- qDebug() << "nextToken loop, line =" << m_lineno
- << "line position =" << m_loc - m_lastNewlinePosition
- << "next 5 characters =" << escapeString(m_buffer.mid(m_loc, 5));
- }
- int best = -1, maxLen = -1;
-#ifndef QT_BOOTSTRAPPED
- QRegularExpressionMatch bestRegex;
-#endif
-
- //Find the longest match.
- //If more than one are the same (longest) length, return the first one in
- //the order defined.
- QList<MatchCandidate> candidates;
-#ifndef QT_BOOTSTRAPPED
- {
- //We used PCRE's PartialMatch to eliminate most of the regexes by the first
- //character, so we keep a regexCandidates map with the list of possible regexes
- //based on initial characters found so far.
- const QChar nextChar = m_buffer.at(m_loc);
- //Populate the list if we haven't seeen this character before
- if (!regexCandidates.contains(nextChar)) {
-# if (QT_VERSION >= QT_VERSION_CHECK(5, 5, 0))
- const QStringRef tmp = m_buffer.midRef(m_loc,1);
-# else
- const QString tmp = m_buffer.mid(m_loc,1);
-# endif
- int i = 0;
- regexCandidates[nextChar] = QList<int>();
- Q_FOREACH (const QRegularExpression &re, m_regexes)
- {
- QRegularExpressionMatch match = re.match(tmp, 0, QRegularExpression::PartialPreferFirstMatch, QRegularExpression::DontCheckSubjectStringMatchOption);
- //qDebug() << nextChar << tmp << match.hasMatch() << match.hasPartialMatch() << re.pattern();
- if (match.hasMatch() || match.hasPartialMatch())
- regexCandidates[nextChar] << i;
- i++;
- }
- }
- Q_FOREACH (int i, regexCandidates.value(nextChar))
- {
- //Seems like I should be able to run the regex on the entire string, but performance is horrible
- //unless I use a substring.
- //QRegularExpressionMatch match = m_regexes[i].match(m_buffer, m_loc, QRegularExpression::NormalMatch, QRegularExpression::AnchoredMatchOption);
-# if (QT_VERSION >= QT_VERSION_CHECK(5, 5, 0))
- QRegularExpressionMatch match = m_regexes.at(i).match(m_buffer.midRef(m_loc, m_maxMatchLen), 0, QRegularExpression::NormalMatch, QRegularExpression::AnchoredMatchOption | QRegularExpression::DontCheckSubjectStringMatchOption);
-# else
- QRegularExpressionMatch match = m_regexes.at(i).match(m_buffer.mid(m_loc, m_maxMatchLen), 0, QRegularExpression::NormalMatch, QRegularExpression::AnchoredMatchOption | QRegularExpression::DontCheckSubjectStringMatchOption);
-# endif
- if (match.hasMatch()) {
- if (m_debug)
- candidates << MatchCandidate(m_tokenNames[i], match.captured(), i);
- if (match.capturedLength() > maxLen) {
- best = i;
- maxLen = match.capturedLength();
- bestRegex = match;
- }
- }
- }
- }
-#else
- {
- int i = 0;
- Q_FOREACH (const QRegExp &r, m_regexes)
- {
- if (r.indexIn(m_buffer, m_loc, QRegExp::CaretAtOffset) == (int)m_loc) {
- if (m_debug)
- candidates << MatchCandidate(m_tokenNames[i], r.cap(0), i);
- if (r.matchedLength() > maxLen) {
- best = i;
- maxLen = r.matchedLength();
- }
- }
- ++i;
- }
- }
-#endif
- if (best < 0) {
- setErrorString(QStringLiteral("Error generating tokens from file, next characters >%1<").arg(m_buffer.mid(m_loc, 15)));
- return -1;
- } else {
- QMapIterator<int, QString> iter(m_names.at(best));
- if (iter.hasNext())
- m_captured.clear();
- while (iter.hasNext()) {
- iter.next();
-#ifdef QT_BOOTSTRAPPED
- m_captured.insert(iter.value(), m_regexes.at(best).cap(iter.key()));
-#else
- m_captured.insert(iter.value(), bestRegex.captured(iter.key()));
-#endif
- }
- if (m_debug) {
- qDebug() << "Match candidates:";
- Q_FOREACH (const MatchCandidate &m, candidates) {
- QLatin1String result = m.index == best ? QLatin1String(" * ") : QLatin1String(" ");
- qDebug() << qPrintable(result) << qPrintable(m.name) << qPrintable(escapeString(m.matchText));
- }
- }
- m_loc += maxLen;
- if (m_tokens.at(best) >= 0)
- token = m_tokens.at(best);
-#ifdef QT_BOOTSTRAPPED
- m_lastMatchText = m_regexes.at(best).cap(0);
-#else
- m_lastMatchText = bestRegex.captured(0);
-#endif
- }
- }
- return token;
-}
-
-QT_END_NAMESPACE
-
-#endif // QREGEXPARSER_H