diff --git a/.gitignore b/.gitignore index 307433c..4a785b7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +.antlr/ +build/ + # Prerequisites *.d @@ -31,8 +34,6 @@ *.out *.app -build/ - CMakeLists.txt.user CMakeCache.txt CMakeFiles diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json new file mode 100644 index 0000000..612616b --- /dev/null +++ b/.vscode/c_cpp_properties.json @@ -0,0 +1,18 @@ +{ + "configurations": [ + { + "name": "Linux", + "includePath": [ + "${workspaceFolder}/include", + "${workspaceFolder}/**" + ], + "defines": [], + "compilerPath": "/usr/lib64/ccache/clang", + "cStandard": "c11", + "cppStandard": "c++14", + "intelliSenseMode": "clang-x64", + "configurationProvider": "ms-vscode.cmake-tools" + } + ], + "version": 4 +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 0db5873..4544474 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,3 +1,10 @@ { - "cmake.configureOnOpen": true + "cmake.configureOnOpen": true, + "antlr4.generation": { + "mode": "external", + "language": "Cpp", + "listeners": true, + "visitors": true, + "outputDir": "../lib/generated" + } } \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 057fe38..b53171f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,11 +9,21 @@ set(CMAKE_CXX_STANDARD_REQUIRED True) set(SOURCES src/main.cpp + lib/generated/bfLexer.cpp + lib/generated/bfParser.cpp + lib/generated/bfBaseVisitor.cpp + lib/generated/bfVisitor.cpp + lib/generated/bfBaseListener.cpp + lib/generated/bfListener.cpp ) - add_executable(main.out ${SOURCES}) +target_link_libraries(main.out ${PROJECT_SOURCE_DIR}/lib/antlr4/lib/libantlr4-runtime.a) + target_include_directories(main.out PRIVATE ${PROJECT_SOURCE_DIR}/include + ${PROJECT_SOURCE_DIR}/lib/antlr4/include + ${PROJECT_SOURCE_DIR}/lib/generated + ) diff --git a/grammar/bf.g4 b/grammar/bf.g4 new file mode 100644 index 0000000..fc91c00 --- /dev/null +++ b/grammar/bf.g4 @@ -0,0 +1,13 @@ +grammar bf; + +program: statements; + +statements: INC | DEC | INPUT|OUTPUT; + +COMMENT: '\\\\.*?\\\\' -> skip; +INPUT: '?'; +OUTPUT: '.'; +DEC: '-'; +INC: '+'; +LEFT: '>'; +RIGHT: '<'; diff --git a/include/main.hpp b/include/main.hpp new file mode 100644 index 0000000..b4243fa --- /dev/null +++ b/include/main.hpp @@ -0,0 +1,3 @@ +#ifndef __MAIN_HPP +#define FIVE 5 +#endif \ No newline at end of file diff --git a/lib/antlr4/include/ANTLRErrorListener.h b/lib/antlr4/include/ANTLRErrorListener.h new file mode 100644 index 0000000..d6efad1 --- /dev/null +++ b/lib/antlr4/include/ANTLRErrorListener.h @@ -0,0 +1,167 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RecognitionException.h" + +namespace antlrcpp { + class BitSet; +} + +namespace antlr4 { + + /// How to emit recognition errors (an interface in Java). + class ANTLR4CPP_PUBLIC ANTLRErrorListener { + public: + virtual ~ANTLRErrorListener(); + + /// + /// Upon syntax error, notify any interested parties. This is not how to + /// recover from errors or compute error messages. + /// specifies how to recover from syntax errors and how to compute error + /// messages. This listener's job is simply to emit a computed message, + /// though it has enough information to create its own message in many cases. + ///

+ /// The is non-null for all syntax errors except + /// when we discover mismatched token errors that we can recover from + /// in-line, without returning from the surrounding rule (via the single + /// token insertion and deletion mechanism). + ///

+ /// + /// What parser got the error. From this + /// object, you can access the context as well + /// as the input stream. + /// + /// The offending token in the input token + /// stream, unless recognizer is a lexer (then it's null). If + /// no viable alternative error, {@code e} has token at which we + /// started production for the decision. + /// + /// The line number in the input where the error occurred. + /// + /// The character position within that line where the error occurred. + /// + /// The message to emit. + /// + /// The exception generated by the parser that led to + /// the reporting of an error. It is null in the case where + /// the parser was able to recover in line without exiting the + /// surrounding rule. + virtual void syntaxError(Recognizer *recognizer, Token *offendingSymbol, size_t line, + size_t charPositionInLine, const std::string &msg, std::exception_ptr e) = 0; + + /** + * This method is called by the parser when a full-context prediction + * results in an ambiguity. + * + *

Each full-context prediction which does not result in a syntax error + * will call either {@link #reportContextSensitivity} or + * {@link #reportAmbiguity}.

+ * + *

When {@code ambigAlts} is not null, it contains the set of potentially + * viable alternatives identified by the prediction algorithm. When + * {@code ambigAlts} is null, use {@link ATNConfigSet#getAlts} to obtain the + * represented alternatives from the {@code configs} argument.

+ * + *

When {@code exact} is {@code true}, all of the potentially + * viable alternatives are truly viable, i.e. this is reporting an exact + * ambiguity. When {@code exact} is {@code false}, at least two of + * the potentially viable alternatives are viable for the current input, but + * the prediction algorithm terminated as soon as it determined that at + * least the minimum potentially viable alternative is truly + * viable.

+ * + *

When the {@link PredictionMode#LL_EXACT_AMBIG_DETECTION} prediction + * mode is used, the parser is required to identify exact ambiguities so + * {@code exact} will always be {@code true}.

+ * + *

This method is not used by lexers.

+ * + * @param recognizer the parser instance + * @param dfa the DFA for the current decision + * @param startIndex the input index where the decision started + * @param stopIndex the input input where the ambiguity was identified + * @param exact {@code true} if the ambiguity is exactly known, otherwise + * {@code false}. This is always {@code true} when + * {@link PredictionMode#LL_EXACT_AMBIG_DETECTION} is used. + * @param ambigAlts the potentially ambiguous alternatives, or {@code null} + * to indicate that the potentially ambiguous alternatives are the complete + * set of represented alternatives in {@code configs} + * @param configs the ATN configuration set where the ambiguity was + * identified + */ + virtual void reportAmbiguity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, bool exact, + const antlrcpp::BitSet &ambigAlts, atn::ATNConfigSet *configs) = 0; + + /** + * This method is called when an SLL conflict occurs and the parser is about + * to use the full context information to make an LL decision. + * + *

If one or more configurations in {@code configs} contains a semantic + * predicate, the predicates are evaluated before this method is called. The + * subset of alternatives which are still viable after predicates are + * evaluated is reported in {@code conflictingAlts}.

+ * + *

This method is not used by lexers.

+ * + * @param recognizer the parser instance + * @param dfa the DFA for the current decision + * @param startIndex the input index where the decision started + * @param stopIndex the input index where the SLL conflict occurred + * @param conflictingAlts The specific conflicting alternatives. If this is + * {@code null}, the conflicting alternatives are all alternatives + * represented in {@code configs}. At the moment, conflictingAlts is non-null + * (for the reference implementation, but Sam's optimized version can see this + * as null). + * @param configs the ATN configuration set where the SLL conflict was + * detected + */ + virtual void reportAttemptingFullContext(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + const antlrcpp::BitSet &conflictingAlts, atn::ATNConfigSet *configs) = 0; + + /** + * This method is called by the parser when a full-context prediction has a + * unique result. + * + *

Each full-context prediction which does not result in a syntax error + * will call either {@link #reportContextSensitivity} or + * {@link #reportAmbiguity}.

+ * + *

For prediction implementations that only evaluate full-context + * predictions when an SLL conflict is found (including the default + * {@link ParserATNSimulator} implementation), this method reports cases + * where SLL conflicts were resolved to unique full-context predictions, + * i.e. the decision was context-sensitive. This report does not necessarily + * indicate a problem, and it may appear even in completely unambiguous + * grammars.

+ * + *

{@code configs} may have more than one represented alternative if the + * full-context prediction algorithm does not evaluate predicates before + * beginning the full-context prediction. In all cases, the final prediction + * is passed as the {@code prediction} argument.

+ * + *

Note that the definition of "context sensitivity" in this method + * differs from the concept in {@link DecisionInfo#contextSensitivities}. + * This method reports all instances where an SLL conflict occurred but LL + * parsing produced a unique result, whether or not that unique result + * matches the minimum alternative in the SLL conflicting set.

+ * + *

This method is not used by lexers.

+ * + * @param recognizer the parser instance + * @param dfa the DFA for the current decision + * @param startIndex the input index where the decision started + * @param stopIndex the input index where the context sensitivity was + * finally determined + * @param prediction the unambiguous result of the full-context prediction + * @param configs the ATN configuration set where the unambiguous prediction + * was determined + */ + virtual void reportContextSensitivity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + size_t prediction, atn::ATNConfigSet *configs) = 0; + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/ANTLRErrorStrategy.h b/lib/antlr4/include/ANTLRErrorStrategy.h new file mode 100644 index 0000000..a3eecd1 --- /dev/null +++ b/lib/antlr4/include/ANTLRErrorStrategy.h @@ -0,0 +1,121 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Token.h" + +namespace antlr4 { + + /// + /// The interface for defining strategies to deal with syntax errors encountered + /// during a parse by ANTLR-generated parsers. We distinguish between three + /// different kinds of errors: + /// + /// + /// + /// Implementations of this interface report syntax errors by calling + /// . + ///

+ /// TODO: what to do about lexers + ///

+ class ANTLR4CPP_PUBLIC ANTLRErrorStrategy { + public: + + /// + /// Reset the error handler state for the specified {@code recognizer}. + /// the parser instance + virtual ~ANTLRErrorStrategy(); + + virtual void reset(Parser *recognizer) = 0; + + /** + * This method is called when an unexpected symbol is encountered during an + * inline match operation, such as {@link Parser#match}. If the error + * strategy successfully recovers from the match failure, this method + * returns the {@link Token} instance which should be treated as the + * successful result of the match. + * + *

This method handles the consumption of any tokens - the caller should + * not call {@link Parser#consume} after a successful recovery.

+ * + *

Note that the calling code will not report an error if this method + * returns successfully. The error strategy implementation is responsible + * for calling {@link Parser#notifyErrorListeners} as appropriate.

+ * + * @param recognizer the parser instance + * @throws RecognitionException if the error strategy was not able to + * recover from the unexpected input symbol + */ + virtual Token* recoverInline(Parser *recognizer) = 0; + + /// + /// This method is called to recover from exception {@code e}. This method is + /// called after by the default exception handler + /// generated for a rule method. + /// + /// + /// the parser instance + /// the recognition exception to recover from + /// if the error strategy could not recover from + /// the recognition exception + virtual void recover(Parser *recognizer, std::exception_ptr e) = 0; + + /// + /// This method provides the error handler with an opportunity to handle + /// syntactic or semantic errors in the input stream before they result in a + /// . + ///

+ /// The generated code currently contains calls to after + /// entering the decision state of a closure block ({@code (...)*} or + /// {@code (...)+}). + ///

+ /// For an implementation based on Jim Idle's "magic sync" mechanism, see + /// . + ///

+ /// + /// the parser instance + /// if an error is detected by the error + /// strategy but cannot be automatically recovered at the current state in + /// the parsing process + virtual void sync(Parser *recognizer) = 0; + + /// + /// Tests whether or not {@code recognizer} is in the process of recovering + /// from an error. In error recovery mode, adds + /// symbols to the parse tree by calling + /// {@link Parser#createErrorNode(ParserRuleContext, Token)} then + /// {@link ParserRuleContext#addErrorNode(ErrorNode)} instead of + /// {@link Parser#createTerminalNode(ParserRuleContext, Token)}. + /// + /// the parser instance + /// {@code true} if the parser is currently recovering from a parse + /// error, otherwise {@code false} + virtual bool inErrorRecoveryMode(Parser *recognizer) = 0; + + /// + /// This method is called by when the parser successfully matches an input + /// symbol. + /// + /// the parser instance + virtual void reportMatch(Parser *recognizer) = 0; + + /// + /// Report any kind of . This method is called by + /// the default exception handler generated for a rule method. + /// + /// the parser instance + /// the recognition exception to report + virtual void reportError(Parser *recognizer, const RecognitionException &e) = 0; + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/ANTLRFileStream.h b/lib/antlr4/include/ANTLRFileStream.h new file mode 100644 index 0000000..10c8550 --- /dev/null +++ b/lib/antlr4/include/ANTLRFileStream.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "ANTLRInputStream.h" + +namespace antlr4 { + + /// This is an ANTLRInputStream that is loaded from a file all at once + /// when you construct the object (or call load()). + // TODO: this class needs testing. + class ANTLR4CPP_PUBLIC ANTLRFileStream : public ANTLRInputStream { + protected: + std::string _fileName; // UTF-8 encoded file name. + + public: + // Assumes a file name encoded in UTF-8 and file content in the same encoding (with or w/o BOM). + ANTLRFileStream(const std::string &fileName); + + virtual void loadFromFile(const std::string &fileName); + virtual std::string getSourceName() const override; + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/ANTLRInputStream.h b/lib/antlr4/include/ANTLRInputStream.h new file mode 100644 index 0000000..e985050 --- /dev/null +++ b/lib/antlr4/include/ANTLRInputStream.h @@ -0,0 +1,69 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "CharStream.h" + +namespace antlr4 { + + // Vacuum all input from a stream and then treat it + // like a string. Can also pass in a string or char[] to use. + // Input is expected to be encoded in UTF-8 and converted to UTF-32 internally. + class ANTLR4CPP_PUBLIC ANTLRInputStream : public CharStream { + protected: + /// The data being scanned. + // UTF-32 + UTF32String _data; + + /// 0..n-1 index into string of next char + size_t p; + + public: + /// What is name or source of this char stream? + std::string name; + + ANTLRInputStream(const std::string &input = ""); + ANTLRInputStream(const char data_[], size_t numberOfActualCharsInArray); + ANTLRInputStream(std::istream &stream); + + virtual void load(const std::string &input); + virtual void load(std::istream &stream); + + /// Reset the stream so that it's in the same state it was + /// when the object was created *except* the data array is not + /// touched. + virtual void reset(); + virtual void consume() override; + virtual size_t LA(ssize_t i) override; + virtual size_t LT(ssize_t i); + + /// + /// Return the current input symbol index 0..n where n indicates the + /// last symbol has been read. The index is the index of char to + /// be returned from LA(1). + /// + virtual size_t index() override; + virtual size_t size() override; + + /// + /// mark/release do nothing; we have entire buffer + virtual ssize_t mark() override; + virtual void release(ssize_t marker) override; + + /// + /// consume() ahead until p==index; can't just set p=index as we must + /// update line and charPositionInLine. If we seek backwards, just set p + /// + virtual void seek(size_t index) override; + virtual std::string getText(const misc::Interval &interval) override; + virtual std::string getSourceName() const override; + virtual std::string toString() const override; + + private: + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/BailErrorStrategy.h b/lib/antlr4/include/BailErrorStrategy.h new file mode 100644 index 0000000..2a8c36f --- /dev/null +++ b/lib/antlr4/include/BailErrorStrategy.h @@ -0,0 +1,59 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "DefaultErrorStrategy.h" + +namespace antlr4 { + + /** + * This implementation of {@link ANTLRErrorStrategy} responds to syntax errors + * by immediately canceling the parse operation with a + * {@link ParseCancellationException}. The implementation ensures that the + * {@link ParserRuleContext#exception} field is set for all parse tree nodes + * that were not completed prior to encountering the error. + * + *

+ * This error strategy is useful in the following scenarios.

+ * + * + * + *

+ * {@code myparser.setErrorHandler(new BailErrorStrategy());}

+ * + * @see Parser#setErrorHandler(ANTLRErrorStrategy) + */ + class ANTLR4CPP_PUBLIC BailErrorStrategy : public DefaultErrorStrategy { + /// + /// Instead of recovering from exception {@code e}, re-throw it wrapped + /// in a so it is not caught by the + /// rule function catches. Use to get the + /// original . + /// + public: + virtual void recover(Parser *recognizer, std::exception_ptr e) override; + + /// Make sure we don't attempt to recover inline; if the parser + /// successfully recovers, it won't throw an exception. + virtual Token* recoverInline(Parser *recognizer) override; + + /// + /// Make sure we don't attempt to recover from problems in subrules. + virtual void sync(Parser *recognizer) override; + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/BaseErrorListener.h b/lib/antlr4/include/BaseErrorListener.h new file mode 100644 index 0000000..aad2e5d --- /dev/null +++ b/lib/antlr4/include/BaseErrorListener.h @@ -0,0 +1,36 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "ANTLRErrorListener.h" + +namespace antlrcpp { + class BitSet; +} + +namespace antlr4 { + + /** + * Provides an empty default implementation of {@link ANTLRErrorListener}. The + * default implementation of each method does nothing, but can be overridden as + * necessary. + */ + class ANTLR4CPP_PUBLIC BaseErrorListener : public ANTLRErrorListener { + + virtual void syntaxError(Recognizer *recognizer, Token * offendingSymbol, size_t line, size_t charPositionInLine, + const std::string &msg, std::exception_ptr e) override; + + virtual void reportAmbiguity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, bool exact, + const antlrcpp::BitSet &ambigAlts, atn::ATNConfigSet *configs) override; + + virtual void reportAttemptingFullContext(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + const antlrcpp::BitSet &conflictingAlts, atn::ATNConfigSet *configs) override; + + virtual void reportContextSensitivity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + size_t prediction, atn::ATNConfigSet *configs) override; + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/BufferedTokenStream.h b/lib/antlr4/include/BufferedTokenStream.h new file mode 100644 index 0000000..fab74d2 --- /dev/null +++ b/lib/antlr4/include/BufferedTokenStream.h @@ -0,0 +1,200 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "TokenStream.h" + +namespace antlr4 { + + /** + * This implementation of {@link TokenStream} loads tokens from a + * {@link TokenSource} on-demand, and places the tokens in a buffer to provide + * access to any previous token by index. + * + *

+ * This token stream ignores the value of {@link Token#getChannel}. If your + * parser requires the token stream filter tokens to only those on a particular + * channel, such as {@link Token#DEFAULT_CHANNEL} or + * {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a + * {@link CommonTokenStream}.

+ */ + class ANTLR4CPP_PUBLIC BufferedTokenStream : public TokenStream { + public: + BufferedTokenStream(TokenSource *tokenSource); + BufferedTokenStream(const BufferedTokenStream& other) = delete; + + BufferedTokenStream& operator = (const BufferedTokenStream& other) = delete; + + virtual TokenSource* getTokenSource() const override; + virtual size_t index() override; + virtual ssize_t mark() override; + + virtual void release(ssize_t marker) override; + virtual void reset(); + virtual void seek(size_t index) override; + + virtual size_t size() override; + virtual void consume() override; + + virtual Token* get(size_t i) const override; + + /// Get all tokens from start..stop inclusively. + virtual std::vector get(size_t start, size_t stop); + + virtual size_t LA(ssize_t i) override; + virtual Token* LT(ssize_t k) override; + + /// Reset this token stream by setting its token source. + virtual void setTokenSource(TokenSource *tokenSource); + virtual std::vector getTokens(); + virtual std::vector getTokens(size_t start, size_t stop); + + /// + /// Given a start and stop index, return a List of all tokens in + /// the token type BitSet. Return null if no tokens were found. This + /// method looks at both on and off channel tokens. + /// + virtual std::vector getTokens(size_t start, size_t stop, const std::vector &types); + virtual std::vector getTokens(size_t start, size_t stop, size_t ttype); + + /// Collect all tokens on specified channel to the right of + /// the current token up until we see a token on DEFAULT_TOKEN_CHANNEL or + /// EOF. If channel is -1, find any non default channel token. + virtual std::vector getHiddenTokensToRight(size_t tokenIndex, ssize_t channel); + + /// + /// Collect all hidden tokens (any off-default channel) to the right of + /// the current token up until we see a token on DEFAULT_TOKEN_CHANNEL + /// or EOF. + /// + virtual std::vector getHiddenTokensToRight(size_t tokenIndex); + + /// + /// Collect all tokens on specified channel to the left of + /// the current token up until we see a token on DEFAULT_TOKEN_CHANNEL. + /// If channel is -1, find any non default channel token. + /// + virtual std::vector getHiddenTokensToLeft(size_t tokenIndex, ssize_t channel); + + /// + /// Collect all hidden tokens (any off-default channel) to the left of + /// the current token up until we see a token on DEFAULT_TOKEN_CHANNEL. + /// + virtual std::vector getHiddenTokensToLeft(size_t tokenIndex); + + virtual std::string getSourceName() const override; + virtual std::string getText() override; + virtual std::string getText(const misc::Interval &interval) override; + virtual std::string getText(RuleContext *ctx) override; + virtual std::string getText(Token *start, Token *stop) override; + + /// Get all tokens from lexer until EOF. + virtual void fill(); + + protected: + /** + * The {@link TokenSource} from which tokens for this stream are fetched. + */ + TokenSource *_tokenSource; + + /** + * A collection of all tokens fetched from the token source. The list is + * considered a complete view of the input once {@link #fetchedEOF} is set + * to {@code true}. + */ + std::vector> _tokens; + + /** + * The index into {@link #tokens} of the current token (next token to + * {@link #consume}). {@link #tokens}{@code [}{@link #p}{@code ]} should be + * {@link #LT LT(1)}. + * + *

This field is set to -1 when the stream is first constructed or when + * {@link #setTokenSource} is called, indicating that the first token has + * not yet been fetched from the token source. For additional information, + * see the documentation of {@link IntStream} for a description of + * Initializing Methods.

+ */ + // ml: since -1 requires to make this member signed for just this single aspect we use a member _needSetup instead. + // Use bool isInitialized() to find out if this stream has started reading. + size_t _p; + + /** + * Indicates whether the {@link Token#EOF} token has been fetched from + * {@link #tokenSource} and added to {@link #tokens}. This field improves + * performance for the following cases: + * + *
    + *
  • {@link #consume}: The lookahead check in {@link #consume} to prevent + * consuming the EOF symbol is optimized by checking the values of + * {@link #fetchedEOF} and {@link #p} instead of calling {@link #LA}.
  • + *
  • {@link #fetch}: The check to prevent adding multiple EOF symbols into + * {@link #tokens} is trivial with this field.
  • + *
      + */ + bool _fetchedEOF; + + /// + /// Make sure index {@code i} in tokens has a token. + /// + /// {@code true} if a token is located at index {@code i}, otherwise + /// {@code false}. + /// + virtual bool sync(size_t i); + + /// + /// Add {@code n} elements to buffer. + /// + /// The actual number of elements added to the buffer. + virtual size_t fetch(size_t n); + + virtual Token* LB(size_t k); + + /// Allowed derived classes to modify the behavior of operations which change + /// the current stream position by adjusting the target token index of a seek + /// operation. The default implementation simply returns {@code i}. If an + /// exception is thrown in this method, the current stream index should not be + /// changed. + ///

      + /// For example, overrides this method to ensure that + /// the seek target is always an on-channel token. + /// + /// The target token index. + /// The adjusted target token index. + virtual ssize_t adjustSeekIndex(size_t i); + void lazyInit(); + virtual void setup(); + + /** + * Given a starting index, return the index of the next token on channel. + * Return {@code i} if {@code tokens[i]} is on channel. Return the index of + * the EOF token if there are no tokens on channel between {@code i} and + * EOF. + */ + virtual ssize_t nextTokenOnChannel(size_t i, size_t channel); + + /** + * Given a starting index, return the index of the previous token on + * channel. Return {@code i} if {@code tokens[i]} is on channel. Return -1 + * if there are no tokens on channel between {@code i} and 0. + * + *

      + * If {@code i} specifies an index at or after the EOF token, the EOF token + * index is returned. This is due to the fact that the EOF token is treated + * as though it were on every channel.

      + */ + virtual ssize_t previousTokenOnChannel(size_t i, size_t channel); + + virtual std::vector filterForChannel(size_t from, size_t to, ssize_t channel); + + bool isInitialized() const; + + private: + bool _needSetup; + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/CharStream.h b/lib/antlr4/include/CharStream.h new file mode 100644 index 0000000..5f2a340 --- /dev/null +++ b/lib/antlr4/include/CharStream.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "IntStream.h" +#include "misc/Interval.h" + +namespace antlr4 { + + /// A source of characters for an ANTLR lexer. + class ANTLR4CPP_PUBLIC CharStream : public IntStream { + public: + virtual ~CharStream(); + + /// This method returns the text for a range of characters within this input + /// stream. This method is guaranteed to not throw an exception if the + /// specified interval lies entirely within a marked range. For more + /// information about marked ranges, see IntStream::mark. + /// + /// an interval within the stream + /// the text of the specified interval + /// + /// if {@code interval} is {@code null} + /// if {@code interval.a < 0}, or if + /// {@code interval.b < interval.a - 1}, or if {@code interval.b} lies at or + /// past the end of the stream + /// if the stream does not support + /// getting the text of the specified interval + virtual std::string getText(const misc::Interval &interval) = 0; + + virtual std::string toString() const = 0; + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/CommonToken.h b/lib/antlr4/include/CommonToken.h new file mode 100644 index 0000000..fdaab14 --- /dev/null +++ b/lib/antlr4/include/CommonToken.h @@ -0,0 +1,158 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "WritableToken.h" + +namespace antlr4 { + + class ANTLR4CPP_PUBLIC CommonToken : public WritableToken { + protected: + /** + * An empty {@link Pair} which is used as the default value of + * {@link #source} for tokens that do not have a source. + */ + static const std::pair EMPTY_SOURCE; + + /** + * This is the backing field for {@link #getType} and {@link #setType}. + */ + size_t _type; + + /** + * This is the backing field for {@link #getLine} and {@link #setLine}. + */ + size_t _line; + + /** + * This is the backing field for {@link #getCharPositionInLine} and + * {@link #setCharPositionInLine}. + */ + size_t _charPositionInLine; // set to invalid position + + /** + * This is the backing field for {@link #getChannel} and + * {@link #setChannel}. + */ + size_t _channel; + + /** + * This is the backing field for {@link #getTokenSource} and + * {@link #getInputStream}. + * + *

      + * These properties share a field to reduce the memory footprint of + * {@link CommonToken}. Tokens created by a {@link CommonTokenFactory} from + * the same source and input stream share a reference to the same + * {@link Pair} containing these values.

      + */ + + std::pair _source; // ml: pure references, usually from statically allocated classes. + + /** + * This is the backing field for {@link #getText} when the token text is + * explicitly set in the constructor or via {@link #setText}. + * + * @see #getText() + */ + std::string _text; + + /** + * This is the backing field for {@link #getTokenIndex} and + * {@link #setTokenIndex}. + */ + size_t _index; + + /** + * This is the backing field for {@link #getStartIndex} and + * {@link #setStartIndex}. + */ + size_t _start; + + /** + * This is the backing field for {@link #getStopIndex} and + * {@link #setStopIndex}. + */ + size_t _stop; + + public: + /** + * Constructs a new {@link CommonToken} with the specified token type. + * + * @param type The token type. + */ + CommonToken(size_t type); + CommonToken(std::pair source, size_t type, size_t channel, size_t start, size_t stop); + + /** + * Constructs a new {@link CommonToken} with the specified token type and + * text. + * + * @param type The token type. + * @param text The text of the token. + */ + CommonToken(size_t type, const std::string &text); + + /** + * Constructs a new {@link CommonToken} as a copy of another {@link Token}. + * + *

      + * If {@code oldToken} is also a {@link CommonToken} instance, the newly + * constructed token will share a reference to the {@link #text} field and + * the {@link Pair} stored in {@link #source}. Otherwise, {@link #text} will + * be assigned the result of calling {@link #getText}, and {@link #source} + * will be constructed from the result of {@link Token#getTokenSource} and + * {@link Token#getInputStream}.

      + * + * @param oldToken The token to copy. + */ + CommonToken(Token *oldToken); + + virtual size_t getType() const override; + + /** + * Explicitly set the text for this token. If {code text} is not + * {@code null}, then {@link #getText} will return this value rather than + * extracting the text from the input. + * + * @param text The explicit text of the token, or {@code null} if the text + * should be obtained from the input along with the start and stop indexes + * of the token. + */ + virtual void setText(const std::string &text) override; + virtual std::string getText() const override; + + virtual void setLine(size_t line) override; + virtual size_t getLine() const override; + + virtual size_t getCharPositionInLine() const override; + virtual void setCharPositionInLine(size_t charPositionInLine) override; + + virtual size_t getChannel() const override; + virtual void setChannel(size_t channel) override; + + virtual void setType(size_t type) override; + + virtual size_t getStartIndex() const override; + virtual void setStartIndex(size_t start); + + virtual size_t getStopIndex() const override; + virtual void setStopIndex(size_t stop); + + virtual size_t getTokenIndex() const override; + virtual void setTokenIndex(size_t index) override; + + virtual TokenSource *getTokenSource() const override; + virtual CharStream *getInputStream() const override; + + virtual std::string toString() const override; + + virtual std::string toString(Recognizer *r) const; + private: + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/CommonTokenFactory.h b/lib/antlr4/include/CommonTokenFactory.h new file mode 100644 index 0000000..096f93b --- /dev/null +++ b/lib/antlr4/include/CommonTokenFactory.h @@ -0,0 +1,74 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "TokenFactory.h" + +namespace antlr4 { + + /** + * This default implementation of {@link TokenFactory} creates + * {@link CommonToken} objects. + */ + class ANTLR4CPP_PUBLIC CommonTokenFactory : public TokenFactory { + public: + /** + * The default {@link CommonTokenFactory} instance. + * + *

      + * This token factory does not explicitly copy token text when constructing + * tokens.

      + */ + static const Ref> DEFAULT; + + protected: + /** + * Indicates whether {@link CommonToken#setText} should be called after + * constructing tokens to explicitly set the text. This is useful for cases + * where the input stream might not be able to provide arbitrary substrings + * of text from the input after the lexer creates a token (e.g. the + * implementation of {@link CharStream#getText} in + * {@link UnbufferedCharStream} throws an + * {@link UnsupportedOperationException}). Explicitly setting the token text + * allows {@link Token#getText} to be called at any time regardless of the + * input stream implementation. + * + *

      + * The default value is {@code false} to avoid the performance and memory + * overhead of copying text for every token unless explicitly requested.

      + */ + const bool copyText; + + public: + /** + * Constructs a {@link CommonTokenFactory} with the specified value for + * {@link #copyText}. + * + *

      + * When {@code copyText} is {@code false}, the {@link #DEFAULT} instance + * should be used instead of constructing a new instance.

      + * + * @param copyText The value for {@link #copyText}. + */ + CommonTokenFactory(bool copyText); + + /** + * Constructs a {@link CommonTokenFactory} with {@link #copyText} set to + * {@code false}. + * + *

      + * The {@link #DEFAULT} instance should be used instead of calling this + * directly.

      + */ + CommonTokenFactory(); + + virtual std::unique_ptr create(std::pair source, size_t type, + const std::string &text, size_t channel, size_t start, size_t stop, size_t line, size_t charPositionInLine) override; + + virtual std::unique_ptr create(size_t type, const std::string &text) override; + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/CommonTokenStream.h b/lib/antlr4/include/CommonTokenStream.h new file mode 100644 index 0000000..628a986 --- /dev/null +++ b/lib/antlr4/include/CommonTokenStream.h @@ -0,0 +1,79 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "BufferedTokenStream.h" + +namespace antlr4 { + + /** + * This class extends {@link BufferedTokenStream} with functionality to filter + * token streams to tokens on a particular channel (tokens where + * {@link Token#getChannel} returns a particular value). + * + *

      + * This token stream provides access to all tokens by index or when calling + * methods like {@link #getText}. The channel filtering is only used for code + * accessing tokens via the lookahead methods {@link #LA}, {@link #LT}, and + * {@link #LB}.

      + * + *

      + * By default, tokens are placed on the default channel + * ({@link Token#DEFAULT_CHANNEL}), but may be reassigned by using the + * {@code ->channel(HIDDEN)} lexer command, or by using an embedded action to + * call {@link Lexer#setChannel}. + *

      + * + *

      + * Note: lexer rules which use the {@code ->skip} lexer command or call + * {@link Lexer#skip} do not produce tokens at all, so input text matched by + * such a rule will not be available as part of the token stream, regardless of + * channel.

      + */ + class ANTLR4CPP_PUBLIC CommonTokenStream : public BufferedTokenStream { + public: + /** + * Constructs a new {@link CommonTokenStream} using the specified token + * source and the default token channel ({@link Token#DEFAULT_CHANNEL}). + * + * @param tokenSource The token source. + */ + CommonTokenStream(TokenSource *tokenSource); + + /** + * Constructs a new {@link CommonTokenStream} using the specified token + * source and filtering tokens to the specified channel. Only tokens whose + * {@link Token#getChannel} matches {@code channel} or have the + * {@link Token#getType} equal to {@link Token#EOF} will be returned by the + * token stream lookahead methods. + * + * @param tokenSource The token source. + * @param channel The channel to use for filtering tokens. + */ + CommonTokenStream(TokenSource *tokenSource, size_t channel); + + virtual Token* LT(ssize_t k) override; + + /// Count EOF just once. + virtual int getNumberOfOnChannelTokens(); + + protected: + /** + * Specifies the channel to use for filtering tokens. + * + *

      + * The default value is {@link Token#DEFAULT_CHANNEL}, which matches the + * default channel assigned to tokens created by the lexer.

      + */ + size_t channel; + + virtual ssize_t adjustSeekIndex(size_t i) override; + + virtual Token* LB(size_t k) override; + + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/ConsoleErrorListener.h b/lib/antlr4/include/ConsoleErrorListener.h new file mode 100644 index 0000000..65c6f8c --- /dev/null +++ b/lib/antlr4/include/ConsoleErrorListener.h @@ -0,0 +1,35 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "BaseErrorListener.h" + +namespace antlr4 { + + class ANTLR4CPP_PUBLIC ConsoleErrorListener : public BaseErrorListener { + public: + /** + * Provides a default instance of {@link ConsoleErrorListener}. + */ + static ConsoleErrorListener INSTANCE; + + /** + * {@inheritDoc} + * + *

      + * This implementation prints messages to {@link System#err} containing the + * values of {@code line}, {@code charPositionInLine}, and {@code msg} using + * the following format.

      + * + *
      +     * line line:charPositionInLine msg
      +     * 
      + */ + virtual void syntaxError(Recognizer *recognizer, Token * offendingSymbol, size_t line, size_t charPositionInLine, + const std::string &msg, std::exception_ptr e) override; + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/DefaultErrorStrategy.h b/lib/antlr4/include/DefaultErrorStrategy.h new file mode 100644 index 0000000..47dabb8 --- /dev/null +++ b/lib/antlr4/include/DefaultErrorStrategy.h @@ -0,0 +1,466 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "ANTLRErrorStrategy.h" +#include "misc/IntervalSet.h" + +namespace antlr4 { + + /** + * This is the default implementation of {@link ANTLRErrorStrategy} used for + * error reporting and recovery in ANTLR parsers. + */ + class ANTLR4CPP_PUBLIC DefaultErrorStrategy : public ANTLRErrorStrategy { + public: + DefaultErrorStrategy(); + DefaultErrorStrategy(DefaultErrorStrategy const& other) = delete; + virtual ~DefaultErrorStrategy(); + + DefaultErrorStrategy& operator = (DefaultErrorStrategy const& other) = delete; + + protected: + /** + * Indicates whether the error strategy is currently "recovering from an + * error". This is used to suppress reporting multiple error messages while + * attempting to recover from a detected syntax error. + * + * @see #inErrorRecoveryMode + */ + bool errorRecoveryMode; + + /** The index into the input stream where the last error occurred. + * This is used to prevent infinite loops where an error is found + * but no token is consumed during recovery...another error is found, + * ad nauseum. This is a failsafe mechanism to guarantee that at least + * one token/tree node is consumed for two errors. + */ + int lastErrorIndex; + + misc::IntervalSet lastErrorStates; + + /// + /// {@inheritDoc} + ///

      + /// The default implementation simply calls to + /// ensure that the handler is not in error recovery mode. + ///

      + public: + virtual void reset(Parser *recognizer) override; + + /// + /// This method is called to enter error recovery mode when a recognition + /// exception is reported. + /// + /// the parser instance + protected: + virtual void beginErrorCondition(Parser *recognizer); + + /// + /// {@inheritDoc} + /// + public: + virtual bool inErrorRecoveryMode(Parser *recognizer) override; + + /// + /// This method is called to leave error recovery mode after recovering from + /// a recognition exception. + /// + /// + protected: + virtual void endErrorCondition(Parser *recognizer); + + /// + /// {@inheritDoc} + ///

      + /// The default implementation simply calls . + ///

      + public: + virtual void reportMatch(Parser *recognizer) override; + + /// {@inheritDoc} + ///

      + /// The default implementation returns immediately if the handler is already + /// in error recovery mode. Otherwise, it calls + /// and dispatches the reporting task based on the runtime type of {@code e} + /// according to the following table. + /// + ///

        + ///
      • : Dispatches the call to + ///
      • + ///
      • : Dispatches the call to + ///
      • + ///
      • : Dispatches the call to + ///
      • + ///
      • All other types: calls to report + /// the exception
      • + ///
      + virtual void reportError(Parser *recognizer, const RecognitionException &e) override; + + /// + /// {@inheritDoc} + ///

      + /// The default implementation resynchronizes the parser by consuming tokens + /// until we find one in the resynchronization set--loosely the set of tokens + /// that can follow the current rule. + ///

      + virtual void recover(Parser *recognizer, std::exception_ptr e) override; + + /** + * The default implementation of {@link ANTLRErrorStrategy#sync} makes sure + * that the current lookahead symbol is consistent with what were expecting + * at this point in the ATN. You can call this anytime but ANTLR only + * generates code to check before subrules/loops and each iteration. + * + *

      Implements Jim Idle's magic sync mechanism in closures and optional + * subrules. E.g.,

      + * + *
      +     * a : sync ( stuff sync )* ;
      +     * sync : {consume to what can follow sync} ;
      +     * 
      + * + * At the start of a sub rule upon error, {@link #sync} performs single + * token deletion, if possible. If it can't do that, it bails on the current + * rule and uses the default error recovery, which consumes until the + * resynchronization set of the current rule. + * + *

      If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block + * with an empty alternative), then the expected set includes what follows + * the subrule.

      + * + *

      During loop iteration, it consumes until it sees a token that can start a + * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to + * stay in the loop as long as possible.

      + * + *

      ORIGINS

      + * + *

      Previous versions of ANTLR did a poor job of their recovery within loops. + * A single mismatch token or missing token would force the parser to bail + * out of the entire rules surrounding the loop. So, for rule

      + * + *
      +     * classDef : 'class' ID '{' member* '}'
      +     * 
      + * + * input with an extra token between members would force the parser to + * consume until it found the next class definition rather than the next + * member definition of the current class. + * + *

      This functionality cost a little bit of effort because the parser has to + * compare token set at the start of the loop and at each iteration. If for + * some reason speed is suffering for you, you can turn off this + * functionality by simply overriding this method as a blank { }.

      + */ + virtual void sync(Parser *recognizer) override; + + /// + /// This is called by when the exception is a + /// . + /// + /// + /// the parser instance + /// the recognition exception + protected: + virtual void reportNoViableAlternative(Parser *recognizer, const NoViableAltException &e); + + /// + /// This is called by when the exception is an + /// . + /// + /// + /// the parser instance + /// the recognition exception + virtual void reportInputMismatch(Parser *recognizer, const InputMismatchException &e); + + /// + /// This is called by when the exception is a + /// . + /// + /// + /// the parser instance + /// the recognition exception + virtual void reportFailedPredicate(Parser *recognizer, const FailedPredicateException &e); + + /** + * This method is called to report a syntax error which requires the removal + * of a token from the input stream. At the time this method is called, the + * erroneous symbol is current {@code LT(1)} symbol and has not yet been + * removed from the input stream. When this method returns, + * {@code recognizer} is in error recovery mode. + * + *

      This method is called when {@link #singleTokenDeletion} identifies + * single-token deletion as a viable recovery strategy for a mismatched + * input error.

      + * + *

      The default implementation simply returns if the handler is already in + * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to + * enter error recovery mode, followed by calling + * {@link Parser#notifyErrorListeners}.

      + * + * @param recognizer the parser instance + */ + virtual void reportUnwantedToken(Parser *recognizer); + + /** + * This method is called to report a syntax error which requires the + * insertion of a missing token into the input stream. At the time this + * method is called, the missing token has not yet been inserted. When this + * method returns, {@code recognizer} is in error recovery mode. + * + *

      This method is called when {@link #singleTokenInsertion} identifies + * single-token insertion as a viable recovery strategy for a mismatched + * input error.

      + * + *

      The default implementation simply returns if the handler is already in + * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to + * enter error recovery mode, followed by calling + * {@link Parser#notifyErrorListeners}.

      + * + * @param recognizer the parser instance + */ + virtual void reportMissingToken(Parser *recognizer); + + public: + /** + * {@inheritDoc} + * + *

      The default implementation attempts to recover from the mismatched input + * by using single token insertion and deletion as described below. If the + * recovery attempt fails, this method throws an + * {@link InputMismatchException}.

      + * + *

      EXTRA TOKEN (single token deletion)

      + * + *

      {@code LA(1)} is not what we are looking for. If {@code LA(2)} has the + * right token, however, then assume {@code LA(1)} is some extra spurious + * token and delete it. Then consume and return the next token (which was + * the {@code LA(2)} token) as the successful result of the match operation.

      + * + *

      This recovery strategy is implemented by {@link #singleTokenDeletion}.

      + * + *

      MISSING TOKEN (single token insertion)

      + * + *

      If current token (at {@code LA(1)}) is consistent with what could come + * after the expected {@code LA(1)} token, then assume the token is missing + * and use the parser's {@link TokenFactory} to create it on the fly. The + * "insertion" is performed by returning the created token as the successful + * result of the match operation.

      + * + *

      This recovery strategy is implemented by {@link #singleTokenInsertion}.

      + * + *

      EXAMPLE

      + * + *

      For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When + * the parser returns from the nested call to {@code expr}, it will have + * call chain:

      + * + *
      +     * stat → expr → atom
      +     * 
      + * + * and it will be trying to match the {@code ')'} at this point in the + * derivation: + * + *
      +     * => ID '=' '(' INT ')' ('+' atom)* ';'
      +     *                    ^
      +     * 
      + * + * The attempt to match {@code ')'} will fail when it sees {@code ';'} and + * call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'} + * is in the set of tokens that can follow the {@code ')'} token reference + * in rule {@code atom}. It can assume that you forgot the {@code ')'}. + */ + virtual Token* recoverInline(Parser *recognizer) override; + + /// + /// This method implements the single-token insertion inline error recovery + /// strategy. It is called by if the single-token + /// deletion strategy fails to recover from the mismatched input. If this + /// method returns {@code true}, {@code recognizer} will be in error recovery + /// mode. + ///

      + /// This method determines whether or not single-token insertion is viable by + /// checking if the {@code LA(1)} input symbol could be successfully matched + /// if it were instead the {@code LA(2)} symbol. If this method returns + /// {@code true}, the caller is responsible for creating and inserting a + /// token with the correct type to produce this behavior. + ///

      + /// the parser instance + /// {@code true} if single-token insertion is a viable recovery + /// strategy for the current mismatched input, otherwise {@code false} + protected: + virtual bool singleTokenInsertion(Parser *recognizer); + + /// + /// This method implements the single-token deletion inline error recovery + /// strategy. It is called by to attempt to recover + /// from mismatched input. If this method returns null, the parser and error + /// handler state will not have changed. If this method returns non-null, + /// {@code recognizer} will not be in error recovery mode since the + /// returned token was a successful match. + ///

      + /// If the single-token deletion is successful, this method calls + /// to report the error, followed by + /// to actually "delete" the extraneous token. Then, + /// before returning is called to signal a successful + /// match. + ///

      + /// the parser instance + /// the successfully matched instance if single-token + /// deletion successfully recovers from the mismatched input, otherwise + /// {@code null} + virtual Token* singleTokenDeletion(Parser *recognizer); + + /// + /// Conjure up a missing token during error recovery. + /// + /// The recognizer attempts to recover from single missing + /// symbols. But, actions might refer to that missing symbol. + /// For example, x=ID {f($x);}. The action clearly assumes + /// that there has been an identifier matched previously and that + /// $x points at that token. If that token is missing, but + /// the next token in the stream is what we want we assume that + /// this token is missing and we keep going. Because we + /// have to return some token to replace the missing token, + /// we have to conjure one up. This method gives the user control + /// over the tokens returned for missing tokens. Mostly, + /// you will want to create something special for identifier + /// tokens. For literals such as '{' and ',', the default + /// action in the parser or tree parser works. It simply creates + /// a CommonToken of the appropriate type. The text will be the token. + /// If you change what tokens must be created by the lexer, + /// override this method to create the appropriate tokens. + /// + virtual Token* getMissingSymbol(Parser *recognizer); + + virtual misc::IntervalSet getExpectedTokens(Parser *recognizer); + + /// + /// How should a token be displayed in an error message? The default + /// is to display just the text, but during development you might + /// want to have a lot of information spit out. Override in that case + /// to use t.toString() (which, for CommonToken, dumps everything about + /// the token). This is better than forcing you to override a method in + /// your token objects because you don't have to go modify your lexer + /// so that it creates a new class. + /// + virtual std::string getTokenErrorDisplay(Token *t); + + virtual std::string getSymbolText(Token *symbol); + + virtual size_t getSymbolType(Token *symbol); + + virtual std::string escapeWSAndQuote(const std::string &s) const; + + /* Compute the error recovery set for the current rule. During + * rule invocation, the parser pushes the set of tokens that can + * follow that rule reference on the stack; this amounts to + * computing FIRST of what follows the rule reference in the + * enclosing rule. See LinearApproximator.FIRST(). + * This local follow set only includes tokens + * from within the rule; i.e., the FIRST computation done by + * ANTLR stops at the end of a rule. + * + * EXAMPLE + * + * When you find a "no viable alt exception", the input is not + * consistent with any of the alternatives for rule r. The best + * thing to do is to consume tokens until you see something that + * can legally follow a call to r *or* any rule that called r. + * You don't want the exact set of viable next tokens because the + * input might just be missing a token--you might consume the + * rest of the input looking for one of the missing tokens. + * + * Consider grammar: + * + * a : '[' b ']' + * | '(' b ')' + * ; + * b : c '^' INT ; + * c : ID + * | INT + * ; + * + * At each rule invocation, the set of tokens that could follow + * that rule is pushed on a stack. Here are the various + * context-sensitive follow sets: + * + * FOLLOW(b1_in_a) = FIRST(']') = ']' + * FOLLOW(b2_in_a) = FIRST(')') = ')' + * FOLLOW(c_in_b) = FIRST('^') = '^' + * + * Upon erroneous input "[]", the call chain is + * + * a -> b -> c + * + * and, hence, the follow context stack is: + * + * depth follow set start of rule execution + * 0 a (from main()) + * 1 ']' b + * 2 '^' c + * + * Notice that ')' is not included, because b would have to have + * been called from a different context in rule a for ')' to be + * included. + * + * For error recovery, we cannot consider FOLLOW(c) + * (context-sensitive or otherwise). We need the combined set of + * all context-sensitive FOLLOW sets--the set of all tokens that + * could follow any reference in the call chain. We need to + * resync to one of those tokens. Note that FOLLOW(c)='^' and if + * we resync'd to that token, we'd consume until EOF. We need to + * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. + * In this case, for input "[]", LA(1) is ']' and in the set, so we would + * not consume anything. After printing an error, rule c would + * return normally. Rule b would not find the required '^' though. + * At this point, it gets a mismatched token error and throws an + * exception (since LA(1) is not in the viable following token + * set). The rule exception handler tries to recover, but finds + * the same recovery set and doesn't consume anything. Rule b + * exits normally returning to rule a. Now it finds the ']' (and + * with the successful match exits errorRecovery mode). + * + * So, you can see that the parser walks up the call chain looking + * for the token that was a member of the recovery set. + * + * Errors are not generated in errorRecovery mode. + * + * ANTLR's error recovery mechanism is based upon original ideas: + * + * "Algorithms + Data Structures = Programs" by Niklaus Wirth + * + * and + * + * "A note on error recovery in recursive descent parsers": + * http://portal.acm.org/citation.cfm?id=947902.947905 + * + * Later, Josef Grosch had some good ideas: + * + * "Efficient and Comfortable Error Recovery in Recursive Descent + * Parsers": + * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip + * + * Like Grosch I implement context-sensitive FOLLOW sets that are combined + * at run-time upon error to avoid overhead during parsing. + */ + virtual misc::IntervalSet getErrorRecoverySet(Parser *recognizer); + + /// + /// Consume tokens until one matches the given token set. + virtual void consumeUntil(Parser *recognizer, const misc::IntervalSet &set); + + private: + std::vector> _errorSymbols; // Temporarily created token. + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/DiagnosticErrorListener.h b/lib/antlr4/include/DiagnosticErrorListener.h new file mode 100644 index 0000000..8419fdc --- /dev/null +++ b/lib/antlr4/include/DiagnosticErrorListener.h @@ -0,0 +1,80 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "BaseErrorListener.h" + +namespace antlr4 { + + /// + /// This implementation of can be used to identify + /// certain potential correctness and performance problems in grammars. "Reports" + /// are made by calling with the appropriate + /// message. + /// + ///
        + ///
      • Ambiguities: These are cases where more than one path through the + /// grammar can match the input.
      • + ///
      • Weak context sensitivity: These are cases where full-context + /// prediction resolved an SLL conflict to a unique alternative which equaled the + /// minimum alternative of the SLL conflict.
      • + ///
      • Strong (forced) context sensitivity: These are cases where the + /// full-context prediction resolved an SLL conflict to a unique alternative, + /// and the minimum alternative of the SLL conflict was found to not be + /// a truly viable alternative. Two-stage parsing cannot be used for inputs where + /// this situation occurs.
      • + ///
      + /// + /// @author Sam Harwell + ///
      + class ANTLR4CPP_PUBLIC DiagnosticErrorListener : public BaseErrorListener { + /// + /// When {@code true}, only exactly known ambiguities are reported. + /// + protected: + const bool exactOnly; + + /// + /// Initializes a new instance of which only + /// reports exact ambiguities. + /// + public: + DiagnosticErrorListener(); + + /// + /// Initializes a new instance of , specifying + /// whether all ambiguities or only exact ambiguities are reported. + /// + /// {@code true} to report only exact ambiguities, otherwise + /// {@code false} to report all ambiguities. + DiagnosticErrorListener(bool exactOnly); + + virtual void reportAmbiguity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, bool exact, + const antlrcpp::BitSet &ambigAlts, atn::ATNConfigSet *configs) override; + + virtual void reportAttemptingFullContext(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + const antlrcpp::BitSet &conflictingAlts, atn::ATNConfigSet *configs) override; + + virtual void reportContextSensitivity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + size_t prediction, atn::ATNConfigSet *configs) override; + + protected: + virtual std::string getDecisionDescription(Parser *recognizer, const dfa::DFA &dfa); + + /// + /// Computes the set of conflicting or ambiguous alternatives from a + /// configuration set, if that information was not already provided by the + /// parser. + /// + /// The set of conflicting or ambiguous alternatives, as + /// reported by the parser. + /// The conflicting or ambiguous configuration set. + /// Returns {@code reportedAlts} if it is not {@code null}, otherwise + /// returns the set of alternatives represented in {@code configs}. + virtual antlrcpp::BitSet getConflictingAlts(const antlrcpp::BitSet &reportedAlts, atn::ATNConfigSet *configs); + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/Exceptions.h b/lib/antlr4/include/Exceptions.h new file mode 100644 index 0000000..d57b26a --- /dev/null +++ b/lib/antlr4/include/Exceptions.h @@ -0,0 +1,99 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { + + // An exception hierarchy modelled loosely after java.lang.* exceptions. + class ANTLR4CPP_PUBLIC RuntimeException : public std::exception { + private: + std::string _message; + public: + RuntimeException(const std::string &msg = ""); + + virtual const char* what() const NOEXCEPT override; + }; + + class ANTLR4CPP_PUBLIC IllegalStateException : public RuntimeException { + public: + IllegalStateException(const std::string &msg = "") : RuntimeException(msg) {} + IllegalStateException(IllegalStateException const&) = default; + ~IllegalStateException(); + IllegalStateException& operator=(IllegalStateException const&) = default; + }; + + class ANTLR4CPP_PUBLIC IllegalArgumentException : public RuntimeException { + public: + IllegalArgumentException(IllegalArgumentException const&) = default; + IllegalArgumentException(const std::string &msg = "") : RuntimeException(msg) {} + ~IllegalArgumentException(); + IllegalArgumentException& operator=(IllegalArgumentException const&) = default; + }; + + class ANTLR4CPP_PUBLIC NullPointerException : public RuntimeException { + public: + NullPointerException(const std::string &msg = "") : RuntimeException(msg) {} + NullPointerException(NullPointerException const&) = default; + ~NullPointerException(); + NullPointerException& operator=(NullPointerException const&) = default; + }; + + class ANTLR4CPP_PUBLIC IndexOutOfBoundsException : public RuntimeException { + public: + IndexOutOfBoundsException(const std::string &msg = "") : RuntimeException(msg) {} + IndexOutOfBoundsException(IndexOutOfBoundsException const&) = default; + ~IndexOutOfBoundsException(); + IndexOutOfBoundsException& operator=(IndexOutOfBoundsException const&) = default; + }; + + class ANTLR4CPP_PUBLIC UnsupportedOperationException : public RuntimeException { + public: + UnsupportedOperationException(const std::string &msg = "") : RuntimeException(msg) {} + UnsupportedOperationException(UnsupportedOperationException const&) = default; + ~UnsupportedOperationException(); + UnsupportedOperationException& operator=(UnsupportedOperationException const&) = default; + + }; + + class ANTLR4CPP_PUBLIC EmptyStackException : public RuntimeException { + public: + EmptyStackException(const std::string &msg = "") : RuntimeException(msg) {} + EmptyStackException(EmptyStackException const&) = default; + ~EmptyStackException(); + EmptyStackException& operator=(EmptyStackException const&) = default; + }; + + // IOException is not a runtime exception (in the java hierarchy). + // Hence we have to duplicate the RuntimeException implementation. + class ANTLR4CPP_PUBLIC IOException : public std::exception { + private: + std::string _message; + + public: + IOException(const std::string &msg = ""); + + virtual const char* what() const NOEXCEPT override; + }; + + class ANTLR4CPP_PUBLIC CancellationException : public IllegalStateException { + public: + CancellationException(const std::string &msg = "") : IllegalStateException(msg) {} + CancellationException(CancellationException const&) = default; + ~CancellationException(); + CancellationException& operator=(CancellationException const&) = default; + }; + + class ANTLR4CPP_PUBLIC ParseCancellationException : public CancellationException { + public: + ParseCancellationException(const std::string &msg = "") : CancellationException(msg) {} + ParseCancellationException(ParseCancellationException const&) = default; + ~ParseCancellationException(); + ParseCancellationException& operator=(ParseCancellationException const&) = default; + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/FailedPredicateException.h b/lib/antlr4/include/FailedPredicateException.h new file mode 100644 index 0000000..16e37f7 --- /dev/null +++ b/lib/antlr4/include/FailedPredicateException.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RecognitionException.h" + +namespace antlr4 { + + /// A semantic predicate failed during validation. Validation of predicates + /// occurs when normally parsing the alternative just like matching a token. + /// Disambiguating predicate evaluation occurs when we test a predicate during + /// prediction. + class ANTLR4CPP_PUBLIC FailedPredicateException : public RecognitionException { + public: + FailedPredicateException(Parser *recognizer); + FailedPredicateException(Parser *recognizer, const std::string &predicate); + FailedPredicateException(Parser *recognizer, const std::string &predicate, const std::string &message); + + virtual size_t getRuleIndex(); + virtual size_t getPredIndex(); + virtual std::string getPredicate(); + + private: + size_t _ruleIndex; + size_t _predicateIndex; + std::string _predicate; + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/InputMismatchException.h b/lib/antlr4/include/InputMismatchException.h new file mode 100644 index 0000000..051a2a4 --- /dev/null +++ b/lib/antlr4/include/InputMismatchException.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RecognitionException.h" + +namespace antlr4 { + + /// + /// This signifies any kind of mismatched input exceptions such as + /// when the current input does not match the expected token. + /// + class ANTLR4CPP_PUBLIC InputMismatchException : public RecognitionException { + public: + InputMismatchException(Parser *recognizer); + InputMismatchException(InputMismatchException const&) = default; + ~InputMismatchException(); + InputMismatchException& operator=(InputMismatchException const&) = default; + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/IntStream.h b/lib/antlr4/include/IntStream.h new file mode 100644 index 0000000..9932a97 --- /dev/null +++ b/lib/antlr4/include/IntStream.h @@ -0,0 +1,218 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { + + /// + /// A simple stream of symbols whose values are represented as integers. This + /// interface provides marked ranges with support for a minimum level + /// of buffering necessary to implement arbitrary lookahead during prediction. + /// For more information on marked ranges, see . + ///

      + /// Initializing Methods: Some methods in this interface have + /// unspecified behavior if no call to an initializing method has occurred after + /// the stream was constructed. The following is a list of initializing methods: + /// + ///

        + ///
      • + ///
      • + ///
      • + ///
      + ///
      + class ANTLR4CPP_PUBLIC IntStream { + public: + static const size_t EOF = static_cast(-1); // std::numeric_limits::max(); doesn't work in VS 2013 + + /// The value returned by when the end of the stream is + /// reached. + /// No explicit EOF definition. We got EOF on all platforms. + //static const size_t _EOF = std::ios::eofbit; + + /// + /// The value returned by when the actual name of the + /// underlying source is not known. + /// + static const std::string UNKNOWN_SOURCE_NAME; + + virtual ~IntStream(); + + /// + /// Consumes the current symbol in the stream. This method has the following + /// effects: + /// + ///
        + ///
      • Forward movement: The value of + /// before calling this method is less than the value of {@code index()} + /// after calling this method.
      • + ///
      • Ordered lookahead: The value of {@code LA(1)} before + /// calling this method becomes the value of {@code LA(-1)} after calling + /// this method.
      • + ///
      + /// + /// Note that calling this method does not guarantee that {@code index()} is + /// incremented by exactly 1, as that would preclude the ability to implement + /// filtering streams (e.g. which distinguishes + /// between "on-channel" and "off-channel" tokens). + ///
      + /// if an attempt is made to consume the the + /// end of the stream (i.e. if {@code LA(1)==} before calling + /// {@code consume}). + virtual void consume() = 0; + + /// + /// Gets the value of the symbol at offset {@code i} from the current + /// position. When {@code i==1}, this method returns the value of the current + /// symbol in the stream (which is the next symbol to be consumed). When + /// {@code i==-1}, this method returns the value of the previously read + /// symbol in the stream. It is not valid to call this method with + /// {@code i==0}, but the specific behavior is unspecified because this + /// method is frequently called from performance-critical code. + ///

      + /// This method is guaranteed to succeed if any of the following are true: + /// + ///

        + ///
      • {@code i>0}
      • + ///
      • {@code i==-1} and returns a value greater + /// than the value of {@code index()} after the stream was constructed + /// and {@code LA(1)} was called in that order. Specifying the current + /// {@code index()} relative to the index after the stream was created + /// allows for filtering implementations that do not return every symbol + /// from the underlying source. Specifying the call to {@code LA(1)} + /// allows for lazily initialized streams.
      • + ///
      • {@code LA(i)} refers to a symbol consumed within a marked region + /// that has not yet been released.
      • + ///
      + /// + /// If {@code i} represents a position at or beyond the end of the stream, + /// this method returns . + ///

      + /// The return value is unspecified if {@code i<0} and fewer than {@code -i} + /// calls to have occurred from the beginning of + /// the stream before calling this method. + ///

      + /// if the stream does not support + /// retrieving the value of the specified symbol + virtual size_t LA(ssize_t i) = 0; + + /// + /// A mark provides a guarantee that operations will be + /// valid over a "marked range" extending from the index where {@code mark()} + /// was called to the current . This allows the use of + /// streaming input sources by specifying the minimum buffering requirements + /// to support arbitrary lookahead during prediction. + ///

      + /// The returned mark is an opaque handle (type {@code int}) which is passed + /// to when the guarantees provided by the marked + /// range are no longer necessary. When calls to + /// {@code mark()}/{@code release()} are nested, the marks must be released + /// in reverse order of which they were obtained. Since marked regions are + /// used during performance-critical sections of prediction, the specific + /// behavior of invalid usage is unspecified (i.e. a mark is not released, or + /// a mark is released twice, or marks are not released in reverse order from + /// which they were created). + ///

      + /// The behavior of this method is unspecified if no call to an + /// has occurred after this stream was + /// constructed. + ///

      + /// This method does not change the current position in the input stream. + ///

      + /// The following example shows the use of , + /// , , and + /// as part of an operation to safely work within a + /// marked region, then restore the stream position to its original value and + /// release the mark. + ///

      +    /// IntStream stream = ...;
      +    /// int index = -1;
      +    /// int mark = stream.mark();
      +    /// try {
      +    ///   index = stream.index();
      +    ///   // perform work here...
      +    /// } finally {
      +    ///   if (index != -1) {
      +    ///     stream.seek(index);
      +    ///   }
      +    ///   stream.release(mark);
      +    /// }
      +    /// 
      + ///
      + /// An opaque marker which should be passed to + /// when the marked range is no longer required. + virtual ssize_t mark() = 0; + + /// + /// This method releases a marked range created by a call to + /// . Calls to {@code release()} must appear in the + /// reverse order of the corresponding calls to {@code mark()}. If a mark is + /// released twice, or if marks are not released in reverse order of the + /// corresponding calls to {@code mark()}, the behavior is unspecified. + ///

      + /// For more information and an example, see . + ///

      + /// A marker returned by a call to {@code mark()}. + /// + virtual void release(ssize_t marker) = 0; + + /// + /// Return the index into the stream of the input symbol referred to by + /// {@code LA(1)}. + ///

      + /// The behavior of this method is unspecified if no call to an + /// has occurred after this stream was + /// constructed. + ///

      + virtual size_t index() = 0; + + /// + /// Set the input cursor to the position indicated by {@code index}. If the + /// specified index lies past the end of the stream, the operation behaves as + /// though {@code index} was the index of the EOF symbol. After this method + /// returns without throwing an exception, the at least one of the following + /// will be true. + /// + ///
        + ///
      • will return the index of the first symbol + /// appearing at or after the specified {@code index}. Specifically, + /// implementations which filter their sources should automatically + /// adjust {@code index} forward the minimum amount required for the + /// operation to target a non-ignored symbol.
      • + ///
      • {@code LA(1)} returns
      • + ///
      + /// + /// This operation is guaranteed to not throw an exception if {@code index} + /// lies within a marked region. For more information on marked regions, see + /// . The behavior of this method is unspecified if no call to + /// an has occurred after this stream + /// was constructed. + ///
      + /// The absolute index to seek to. + /// + /// if {@code index} is less than 0 + /// if the stream does not support + /// seeking to the specified index + virtual void seek(size_t index) = 0; + + /// + /// Returns the total number of symbols in the stream, including a single EOF + /// symbol. + /// + /// if the size of the stream is + /// unknown. + virtual size_t size() = 0; + + /// + /// Gets the name of the underlying symbol source. This method returns a + /// non-null, non-empty string. If such a name is not known, this method + /// returns . + /// + virtual std::string getSourceName() const = 0; + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/InterpreterRuleContext.h b/lib/antlr4/include/InterpreterRuleContext.h new file mode 100644 index 0000000..cb6973e --- /dev/null +++ b/lib/antlr4/include/InterpreterRuleContext.h @@ -0,0 +1,45 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "ParserRuleContext.h" + +namespace antlr4 { + + /** + * This class extends {@link ParserRuleContext} by allowing the value of + * {@link #getRuleIndex} to be explicitly set for the context. + * + *

      + * {@link ParserRuleContext} does not include field storage for the rule index + * since the context classes created by the code generator override the + * {@link #getRuleIndex} method to return the correct value for that context. + * Since the parser interpreter does not use the context classes generated for a + * parser, this class (with slightly more memory overhead per node) is used to + * provide equivalent functionality.

      + */ + class ANTLR4CPP_PUBLIC InterpreterRuleContext : public ParserRuleContext { + public: + InterpreterRuleContext(); + + /** + * Constructs a new {@link InterpreterRuleContext} with the specified + * parent, invoking state, and rule index. + * + * @param parent The parent context. + * @param invokingStateNumber The invoking state number. + * @param ruleIndex The rule index for the current context. + */ + InterpreterRuleContext(ParserRuleContext *parent, size_t invokingStateNumber, size_t ruleIndex); + + virtual size_t getRuleIndex() const override; + + protected: + /** This is the backing field for {@link #getRuleIndex}. */ + const size_t _ruleIndex = INVALID_INDEX; +}; + +} // namespace antlr4 diff --git a/lib/antlr4/include/Lexer.h b/lib/antlr4/include/Lexer.h new file mode 100644 index 0000000..f722f7f --- /dev/null +++ b/lib/antlr4/include/Lexer.h @@ -0,0 +1,196 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Recognizer.h" +#include "TokenSource.h" +#include "CharStream.h" +#include "Token.h" + +namespace antlr4 { + + /// A lexer is recognizer that draws input symbols from a character stream. + /// lexer grammars result in a subclass of this object. A Lexer object + /// uses simplified match() and error recovery mechanisms in the interest + /// of speed. + class ANTLR4CPP_PUBLIC Lexer : public Recognizer, public TokenSource { + public: + static const size_t DEFAULT_MODE = 0; + static const size_t MORE = static_cast(-2); + static const size_t SKIP = static_cast(-3); + + static const size_t DEFAULT_TOKEN_CHANNEL = Token::DEFAULT_CHANNEL; + static const size_t HIDDEN = Token::HIDDEN_CHANNEL; + static const size_t MIN_CHAR_VALUE = 0; + static const size_t MAX_CHAR_VALUE = 0x10FFFF; + + CharStream *_input; // Pure reference, usually from statically allocated instance. + + protected: + /// How to create token objects. + Ref> _factory; + + public: + /// The goal of all lexer rules/methods is to create a token object. + /// This is an instance variable as multiple rules may collaborate to + /// create a single token. nextToken will return this object after + /// matching lexer rule(s). If you subclass to allow multiple token + /// emissions, then set this to the last token to be matched or + /// something nonnull so that the auto token emit mechanism will not + /// emit another token. + + // Life cycle of a token is this: + // Created by emit() (via the token factory) or by action code, holding ownership of it. + // Ownership is handed over to the token stream when calling nextToken(). + std::unique_ptr token; + + /// + /// What character index in the stream did the current token start at? + /// Needed, for example, to get the text for current token. Set at + /// the start of nextToken. + /// + size_t tokenStartCharIndex; + + /// + /// The line on which the first character of the token resides + size_t tokenStartLine; + + /// The character position of first character within the line. + size_t tokenStartCharPositionInLine; + + /// Once we see EOF on char stream, next token will be EOF. + /// If you have DONE : EOF ; then you see DONE EOF. + bool hitEOF; + + /// The channel number for the current token. + size_t channel; + + /// The token type for the current token. + size_t type; + + // Use the vector as a stack. + std::vector modeStack; + size_t mode; + + Lexer(); + Lexer(CharStream *input); + virtual ~Lexer() {} + + virtual void reset(); + + /// Return a token from this source; i.e., match a token on the char stream. + virtual std::unique_ptr nextToken() override; + + /// Instruct the lexer to skip creating a token for current lexer rule + /// and look for another token. nextToken() knows to keep looking when + /// a lexer rule finishes with token set to SKIP_TOKEN. Recall that + /// if token == null at end of any token rule, it creates one for you + /// and emits it. + virtual void skip(); + virtual void more(); + virtual void setMode(size_t m); + virtual void pushMode(size_t m); + virtual size_t popMode(); + + template + void setTokenFactory(TokenFactory *factory) { + this->_factory = factory; + } + + virtual Ref> getTokenFactory() override; + + /// Set the char stream and reset the lexer + virtual void setInputStream(IntStream *input) override; + + virtual std::string getSourceName() override; + + virtual CharStream* getInputStream() override; + + /// By default does not support multiple emits per nextToken invocation + /// for efficiency reasons. Subclasses can override this method, nextToken, + /// and getToken (to push tokens into a list and pull from that list + /// rather than a single variable as this implementation does). + virtual void emit(std::unique_ptr newToken); + + /// The standard method called to automatically emit a token at the + /// outermost lexical rule. The token object should point into the + /// char buffer start..stop. If there is a text override in 'text', + /// use that to set the token's text. Override this method to emit + /// custom Token objects or provide a new factory. + virtual Token* emit(); + + virtual Token* emitEOF(); + + virtual size_t getLine() const override; + + virtual size_t getCharPositionInLine() override; + + virtual void setLine(size_t line); + + virtual void setCharPositionInLine(size_t charPositionInLine); + + /// What is the index of the current character of lookahead? + virtual size_t getCharIndex(); + + /// Return the text matched so far for the current token or any + /// text override. + virtual std::string getText(); + + /// Set the complete text of this token; it wipes any previous + /// changes to the text. + virtual void setText(const std::string &text); + + /// Override if emitting multiple tokens. + virtual std::unique_ptr getToken(); + + virtual void setToken(std::unique_ptr newToken); + + virtual void setType(size_t ttype); + + virtual size_t getType(); + + virtual void setChannel(size_t newChannel); + + virtual size_t getChannel(); + + virtual const std::vector& getChannelNames() const = 0; + + virtual const std::vector& getModeNames() const = 0; + + /// Return a list of all Token objects in input char stream. + /// Forces load of all tokens. Does not include EOF token. + virtual std::vector> getAllTokens(); + + virtual void recover(const LexerNoViableAltException &e); + + virtual void notifyListeners(const LexerNoViableAltException &e); + + virtual std::string getErrorDisplay(const std::string &s); + + /// Lexers can normally match any char in it's vocabulary after matching + /// a token, so do the easy thing and just kill a character and hope + /// it all works out. You can instead use the rule invocation stack + /// to do sophisticated error recovery if you are in a fragment rule. + virtual void recover(RecognitionException *re); + + /// + /// Gets the number of syntax errors reported during parsing. This value is + /// incremented each time is called. + /// + /// + virtual size_t getNumberOfSyntaxErrors(); + + protected: + /// You can set the text for the current token to override what is in + /// the input char buffer (via setText()). + std::string _text; + + private: + size_t _syntaxErrors; + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/LexerInterpreter.h b/lib/antlr4/include/LexerInterpreter.h new file mode 100644 index 0000000..e8707e9 --- /dev/null +++ b/lib/antlr4/include/LexerInterpreter.h @@ -0,0 +1,52 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Lexer.h" +#include "atn/PredictionContext.h" +#include "Vocabulary.h" + +namespace antlr4 { + + class ANTLR4CPP_PUBLIC LexerInterpreter : public Lexer { + public: + // @deprecated + LexerInterpreter(const std::string &grammarFileName, const std::vector &tokenNames, + const std::vector &ruleNames, const std::vector &channelNames, + const std::vector &modeNames, const atn::ATN &atn, CharStream *input); + LexerInterpreter(const std::string &grammarFileName, const dfa::Vocabulary &vocabulary, + const std::vector &ruleNames, const std::vector &channelNames, + const std::vector &modeNames, const atn::ATN &atn, CharStream *input); + + ~LexerInterpreter(); + + virtual const atn::ATN& getATN() const override; + virtual std::string getGrammarFileName() const override; + virtual const std::vector& getTokenNames() const override; + virtual const std::vector& getRuleNames() const override; + virtual const std::vector& getChannelNames() const override; + virtual const std::vector& getModeNames() const override; + + virtual const dfa::Vocabulary& getVocabulary() const override; + + protected: + const std::string _grammarFileName; + const atn::ATN &_atn; + + // @deprecated + std::vector _tokenNames; + const std::vector &_ruleNames; + const std::vector &_channelNames; + const std::vector &_modeNames; + std::vector _decisionToDFA; + + atn::PredictionContextCache _sharedContextCache; + + private: + dfa::Vocabulary _vocabulary; + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/LexerNoViableAltException.h b/lib/antlr4/include/LexerNoViableAltException.h new file mode 100644 index 0000000..bc827e8 --- /dev/null +++ b/lib/antlr4/include/LexerNoViableAltException.h @@ -0,0 +1,31 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RecognitionException.h" +#include "atn/ATNConfigSet.h" + +namespace antlr4 { + + class ANTLR4CPP_PUBLIC LexerNoViableAltException : public RecognitionException { + public: + LexerNoViableAltException(Lexer *lexer, CharStream *input, size_t startIndex, + atn::ATNConfigSet *deadEndConfigs); + + virtual size_t getStartIndex(); + virtual atn::ATNConfigSet* getDeadEndConfigs(); + virtual std::string toString(); + + private: + /// Matching attempted at what input index? + const size_t _startIndex; + + /// Which configurations did we try at input.index() that couldn't match input.LA(1)? + atn::ATNConfigSet *_deadEndConfigs; + + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/ListTokenSource.h b/lib/antlr4/include/ListTokenSource.h new file mode 100644 index 0000000..2f5f633 --- /dev/null +++ b/lib/antlr4/include/ListTokenSource.h @@ -0,0 +1,88 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "TokenSource.h" +#include "CommonTokenFactory.h" + +namespace antlr4 { + + /// Provides an implementation of as a wrapper around a list + /// of objects. + /// + /// If the final token in the list is an token, it will be used + /// as the EOF token for every call to after the end of the + /// list is reached. Otherwise, an EOF token will be created. + class ANTLR4CPP_PUBLIC ListTokenSource : public TokenSource { + protected: + // This list will be emptied token by token as we call nextToken(). + // Token streams can be used to buffer tokens for a while. + std::vector> tokens; + + private: + /// + /// The name of the input source. If this value is {@code null}, a call to + /// should return the source name used to create the + /// the next token in (or the previous token if the end of + /// the input has been reached). + /// + const std::string sourceName; + + protected: + /// The index into of token to return by the next call to + /// . The end of the input is indicated by this value + /// being greater than or equal to the number of items in . + size_t i; + + private: + /// This is the backing field for and + /// . + Ref> _factory = CommonTokenFactory::DEFAULT; + + public: + /// Constructs a new instance from the specified + /// collection of objects. + /// + /// The collection of objects to provide as a + /// . + /// if {@code tokens} is {@code null} + ListTokenSource(std::vector> tokens); + ListTokenSource(const ListTokenSource& other) = delete; + + ListTokenSource& operator = (const ListTokenSource& other) = delete; + + /// + /// Constructs a new instance from the specified + /// collection of objects and source name. + /// + /// The collection of objects to provide as a + /// . + /// The name of the . If this value is + /// {@code null}, will attempt to infer the name from + /// the next (or the previous token if the end of the input has + /// been reached). + /// + /// if {@code tokens} is {@code null} + ListTokenSource(std::vector> tokens_, const std::string &sourceName_); + + virtual size_t getCharPositionInLine() override; + virtual std::unique_ptr nextToken() override; + virtual size_t getLine() const override; + virtual CharStream* getInputStream() override; + virtual std::string getSourceName() override; + + template + void setTokenFactory(TokenFactory *factory) { + this->_factory = factory; + } + + virtual Ref> getTokenFactory() override; + + private: + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/NoViableAltException.h b/lib/antlr4/include/NoViableAltException.h new file mode 100644 index 0000000..b15039d --- /dev/null +++ b/lib/antlr4/include/NoViableAltException.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RecognitionException.h" +#include "Token.h" +#include "atn/ATNConfigSet.h" + +namespace antlr4 { + + /// Indicates that the parser could not decide which of two or more paths + /// to take based upon the remaining input. It tracks the starting token + /// of the offending input and also knows where the parser was + /// in the various paths when the error. Reported by reportNoViableAlternative() + class ANTLR4CPP_PUBLIC NoViableAltException : public RecognitionException { + public: + NoViableAltException(Parser *recognizer); // LL(1) error + NoViableAltException(Parser *recognizer, TokenStream *input,Token *startToken, + Token *offendingToken, atn::ATNConfigSet *deadEndConfigs, ParserRuleContext *ctx, bool deleteConfigs); + ~NoViableAltException(); + + virtual Token* getStartToken() const; + virtual atn::ATNConfigSet* getDeadEndConfigs() const; + + private: + /// Which configurations did we try at input.index() that couldn't match input.LT(1)? + /// Shared pointer that conditionally deletes the configurations (based on flag + /// passed during construction) + Ref _deadEndConfigs; + + /// The token object at the start index; the input stream might + /// not be buffering tokens so get a reference to it. (At the + /// time the error occurred, of course the stream needs to keep a + /// buffer all of the tokens but later we might not have access to those.) + Token *_startToken; + + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/Parser.h b/lib/antlr4/include/Parser.h new file mode 100644 index 0000000..515f835 --- /dev/null +++ b/lib/antlr4/include/Parser.h @@ -0,0 +1,467 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Recognizer.h" +#include "tree/ParseTreeListener.h" +#include "tree/ParseTree.h" +#include "TokenStream.h" +#include "TokenSource.h" +#include "misc/Interval.h" + +namespace antlr4 { + + /// This is all the parsing support code essentially; most of it is error recovery stuff. + class ANTLR4CPP_PUBLIC Parser : public Recognizer { + public: + + class TraceListener : public tree::ParseTreeListener { + public: + TraceListener(Parser *outerInstance); + virtual ~TraceListener(); + + virtual void enterEveryRule(ParserRuleContext *ctx) override; + virtual void visitTerminal(tree::TerminalNode *node) override; + virtual void visitErrorNode(tree::ErrorNode *node) override; + virtual void exitEveryRule(ParserRuleContext *ctx) override; + + private: + Parser *const outerInstance; + }; + + class TrimToSizeListener : public tree::ParseTreeListener { + public: + static TrimToSizeListener INSTANCE; + + virtual ~TrimToSizeListener(); + + virtual void enterEveryRule(ParserRuleContext *ctx) override; + virtual void visitTerminal(tree::TerminalNode *node) override; + virtual void visitErrorNode(tree::ErrorNode *node) override; + virtual void exitEveryRule(ParserRuleContext *ctx) override; + }; + + Parser(TokenStream *input); + virtual ~Parser(); + + /// reset the parser's state + virtual void reset(); + + /// + /// Match current input symbol against {@code ttype}. If the symbol type + /// matches, and are + /// called to complete the match process. + /// + /// If the symbol type does not match, + /// is called on the current error + /// strategy to attempt recovery. If is + /// {@code true} and the token index of the symbol returned by + /// is -1, the symbol is added to + /// the parse tree by calling {@link #createErrorNode(ParserRuleContext, Token)} then + /// {@link ParserRuleContext#addErrorNode(ErrorNode)}. + /// + /// the token type to match + /// the matched symbol + /// if the current input symbol did not match + /// {@code ttype} and the error strategy could not recover from the + /// mismatched symbol + virtual Token* match(size_t ttype); + + /// + /// Match current input symbol as a wildcard. If the symbol type matches + /// (i.e. has a value greater than 0), + /// and are called to complete the match process. + ///

      + /// If the symbol type does not match, + /// is called on the current error + /// strategy to attempt recovery. If is + /// {@code true} and the token index of the symbol returned by + /// is -1, the symbol is added to + /// the parse tree by calling . + ///

      + /// the matched symbol + /// if the current input symbol did not match + /// a wildcard and the error strategy could not recover from the mismatched + /// symbol + virtual Token* matchWildcard(); + + /// + /// Track the objects during the parse and hook + /// them up using the list so that it + /// forms a parse tree. The returned from the start + /// rule represents the root of the parse tree. + ///

      + /// Note that if we are not building parse trees, rule contexts only point + /// upwards. When a rule exits, it returns the context but that gets garbage + /// collected if nobody holds a reference. It points upwards but nobody + /// points at it. + ///

      + /// When we build parse trees, we are adding all of these contexts to + /// list. Contexts are then not candidates + /// for garbage collection. + ///

      + virtual void setBuildParseTree(bool buildParseTrees); + + /// + /// Gets whether or not a complete parse tree will be constructed while + /// parsing. This property is {@code true} for a newly constructed parser. + /// + /// {@code true} if a complete parse tree will be constructed while + /// parsing, otherwise {@code false} + virtual bool getBuildParseTree(); + + /// + /// Trim the internal lists of the parse tree during parsing to conserve memory. + /// This property is set to {@code false} by default for a newly constructed parser. + /// + /// {@code true} to trim the capacity of the + /// list to its size after a rule is parsed. + virtual void setTrimParseTree(bool trimParseTrees); + + /// {@code true} if the list is trimmed + /// using the default during the parse process. + virtual bool getTrimParseTree(); + + virtual std::vector getParseListeners(); + + /// + /// Registers {@code listener} to receive events during the parsing process. + ///

      + /// To support output-preserving grammar transformations (including but not + /// limited to left-recursion removal, automated left-factoring, and + /// optimized code generation), calls to listener methods during the parse + /// may differ substantially from calls made by + /// used after the parse is complete. In + /// particular, rule entry and exit events may occur in a different order + /// during the parse than after the parser. In addition, calls to certain + /// rule entry methods may be omitted. + ///

      + /// With the following specific exceptions, calls to listener events are + /// deterministic, i.e. for identical input the calls to listener + /// methods will be the same. + /// + ///

        + ///
      • Alterations to the grammar used to generate code may change the + /// behavior of the listener calls.
      • + ///
      • Alterations to the command line options passed to ANTLR 4 when + /// generating the parser may change the behavior of the listener calls.
      • + ///
      • Changing the version of the ANTLR Tool used to generate the parser + /// may change the behavior of the listener calls.
      • + ///
      + ///
      + /// the listener to add + /// + /// if {@code} listener is {@code null} + virtual void addParseListener(tree::ParseTreeListener *listener); + + /// + /// Remove {@code listener} from the list of parse listeners. + ///

      + /// If {@code listener} is {@code null} or has not been added as a parse + /// listener, this method does nothing. + ///

      + /// + /// the listener to remove + virtual void removeParseListener(tree::ParseTreeListener *listener); + + /// + /// Remove all parse listeners. + /// + /// + virtual void removeParseListeners(); + + /// + /// Notify any parse listeners of an enter rule event. + /// + /// + virtual void triggerEnterRuleEvent(); + + /// + /// Notify any parse listeners of an exit rule event. + /// + /// + virtual void triggerExitRuleEvent(); + + /// + /// Gets the number of syntax errors reported during parsing. This value is + /// incremented each time is called. + /// + /// + virtual size_t getNumberOfSyntaxErrors(); + + virtual Ref> getTokenFactory() override; + + /// + /// Tell our token source and error strategy about a new way to create tokens. + template + void setTokenFactory(TokenFactory *factory) { + _input->getTokenSource()->setTokenFactory(factory); + } + + /// The ATN with bypass alternatives is expensive to create so we create it + /// lazily. The ATN is owned by us. + virtual const atn::ATN& getATNWithBypassAlts(); + + /// + /// The preferred method of getting a tree pattern. For example, here's a + /// sample use: + /// + ///
      +    /// ParseTree t = parser.expr();
      +    /// ParseTreePattern p = parser.compileParseTreePattern("+0", MyParser.RULE_expr);
      +    /// ParseTreeMatch m = p.match(t);
      +    /// String id = m.get("ID");
      +    /// 
      + ///
      + virtual tree::pattern::ParseTreePattern compileParseTreePattern(const std::string &pattern, int patternRuleIndex); + + /// + /// The same as but specify a + /// rather than trying to deduce it from this parser. + /// + virtual tree::pattern::ParseTreePattern compileParseTreePattern(const std::string &pattern, int patternRuleIndex, + Lexer *lexer); + + virtual Ref getErrorHandler(); + virtual void setErrorHandler(Ref const& handler); + + virtual IntStream* getInputStream() override; + void setInputStream(IntStream *input) override; + + virtual TokenStream* getTokenStream(); + + /// Set the token stream and reset the parser. + virtual void setTokenStream(TokenStream *input); + + /// + /// Match needs to return the current input symbol, which gets put + /// into the label for the associated token ref; e.g., x=ID. + /// + virtual Token* getCurrentToken(); + + void notifyErrorListeners(const std::string &msg); + + virtual void notifyErrorListeners(Token *offendingToken, const std::string &msg, std::exception_ptr e); + + /// Consume and return the . + ///

      + /// E.g., given the following input with {@code A} being the current + /// lookahead symbol, this function moves the cursor to {@code B} and returns + /// {@code A}. + /// + ///

      +    ///  A B
      +    ///  ^
      +    /// 
      + /// + /// If the parser is not in error recovery mode, the consumed symbol is added + /// to the parse tree using , and + /// is called on any parse listeners. + /// If the parser is in error recovery mode, the consumed symbol is + /// added to the parse tree using {@link #createErrorNode(ParserRuleContext, Token)} then + /// {@link ParserRuleContext#addErrorNode(ErrorNode)} and + /// is called on any parse + /// listeners. + virtual Token* consume(); + + /// Always called by generated parsers upon entry to a rule. Access field + /// get the current context. + virtual void enterRule(ParserRuleContext *localctx, size_t state, size_t ruleIndex); + + void exitRule(); + + virtual void enterOuterAlt(ParserRuleContext *localctx, size_t altNum); + + /** + * Get the precedence level for the top-most precedence rule. + * + * @return The precedence level for the top-most precedence rule, or -1 if + * the parser context is not nested within a precedence rule. + */ + int getPrecedence() const; + + /// @deprecated Use + /// instead. + virtual void enterRecursionRule(ParserRuleContext *localctx, size_t ruleIndex); + virtual void enterRecursionRule(ParserRuleContext *localctx, size_t state, size_t ruleIndex, int precedence); + + /** Like {@link #enterRule} but for recursive rules. + * Make the current context the child of the incoming localctx. + */ + virtual void pushNewRecursionContext(ParserRuleContext *localctx, size_t state, size_t ruleIndex); + virtual void unrollRecursionContexts(ParserRuleContext *parentctx); + virtual ParserRuleContext* getInvokingContext(size_t ruleIndex); + virtual ParserRuleContext* getContext(); + virtual void setContext(ParserRuleContext *ctx); + virtual bool precpred(RuleContext *localctx, int precedence) override; + virtual bool inContext(const std::string &context); + + /// + /// Checks whether or not {@code symbol} can follow the current state in the + /// ATN. The behavior of this method is equivalent to the following, but is + /// implemented such that the complete context-sensitive follow set does not + /// need to be explicitly constructed. + /// + ///
      +    /// return getExpectedTokens().contains(symbol);
      +    /// 
      + ///
      + /// the symbol type to check + /// {@code true} if {@code symbol} can follow the current state in + /// the ATN, otherwise {@code false}. + virtual bool isExpectedToken(size_t symbol); + + bool isMatchedEOF() const; + + /// + /// Computes the set of input symbols which could follow the current parser + /// state and context, as given by and , + /// respectively. + /// + /// + virtual misc::IntervalSet getExpectedTokens(); + + virtual misc::IntervalSet getExpectedTokensWithinCurrentRule(); + + /// Get a rule's index (i.e., {@code RULE_ruleName} field) or INVALID_INDEX if not found. + virtual size_t getRuleIndex(const std::string &ruleName); + + virtual ParserRuleContext* getRuleContext(); + + /// + /// Return List<String> of the rule names in your parser instance + /// leading up to a call to the current rule. You could override if + /// you want more details such as the file/line info of where + /// in the ATN a rule is invoked. + /// + /// This is very useful for error messages. + /// + virtual std::vector getRuleInvocationStack(); + + virtual std::vector getRuleInvocationStack(RuleContext *p); + + /// + /// For debugging and other purposes. + virtual std::vector getDFAStrings(); + + /// + /// For debugging and other purposes. + virtual void dumpDFA(); + + virtual std::string getSourceName(); + + atn::ParseInfo getParseInfo() const; + + /** + * @since 4.3 + */ + void setProfile(bool profile); + + /// + /// During a parse is sometimes useful to listen in on the rule entry and exit + /// events as well as token matches. This is for quick and dirty debugging. + /// + virtual void setTrace(bool trace); + + /** + * Gets whether a {@link TraceListener} is registered as a parse listener + * for the parser. + * + * @see #setTrace(boolean) + */ + bool isTrace() const; + + tree::ParseTreeTracker& getTreeTracker() { return _tracker; } + + /** How to create a token leaf node associated with a parent. + * Typically, the terminal node to create is not a function of the parent + * but this method must still set the parent pointer of the terminal node + * returned. I would prefer having {@link ParserRuleContext#addAnyChild(ParseTree)} + * set the parent pointer, but the parent pointer is implementation dependent + * and currently there is no setParent() in {@link TerminalNode} (and can't + * add method in Java 1.7 without breaking backward compatibility). + * + * @since 4.7 + */ + tree::TerminalNode *createTerminalNode(Token *t); + + /** How to create an error node, given a token, associated with a parent. + * Typically, the error node to create is not a function of the parent + * but this method must still set the parent pointer of the terminal node + * returned. I would prefer having {@link ParserRuleContext#addAnyChild(ParseTree)} + * set the parent pointer, but the parent pointer is implementation dependent + * and currently there is no setParent() in {@link ErrorNode} (and can't + * add method in Java 1.7 without breaking backward compatibility). + * + * @since 4.7 + */ + tree::ErrorNode *createErrorNode(Token *t); + + protected: + /// The ParserRuleContext object for the currently executing rule. + /// This is always non-null during the parsing process. + // ml: this is one of the contexts tracked in _allocatedContexts. + ParserRuleContext *_ctx; + + /// The error handling strategy for the parser. The default is DefaultErrorStrategy. + /// See also getErrorHandler. + Ref _errHandler; + + /// + /// The input stream. + /// + /// + /// + TokenStream *_input; + + std::vector _precedenceStack; + + /// + /// Specifies whether or not the parser should construct a parse tree during + /// the parsing process. The default value is {@code true}. + /// + /// + /// + bool _buildParseTrees; + + /// The list of listeners registered to receive + /// events during the parse. + /// + std::vector _parseListeners; + + /// + /// The number of syntax errors reported during parsing. This value is + /// incremented each time is called. + /// + size_t _syntaxErrors; + + /** Indicates parser has match()ed EOF token. See {@link #exitRule()}. */ + bool _matchedEOF; + + virtual void addContextToParseTree(); + + // All rule contexts created during a parse run. This is cleared when calling reset(). + tree::ParseTreeTracker _tracker; + + private: + /// This field maps from the serialized ATN string to the deserialized with + /// bypass alternatives. + /// + /// + static std::map, atn::ATN> bypassAltsAtnCache; + + /// When setTrace(true) is called, a reference to the + /// TraceListener is stored here so it can be easily removed in a + /// later call to setTrace(false). The listener itself is + /// implemented as a parser listener so this field is not directly used by + /// other parser methods. + TraceListener *_tracer; + + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/ParserInterpreter.h b/lib/antlr4/include/ParserInterpreter.h new file mode 100644 index 0000000..f25fc53 --- /dev/null +++ b/lib/antlr4/include/ParserInterpreter.h @@ -0,0 +1,179 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Parser.h" +#include "atn/ATN.h" +#include "support/BitSet.h" +#include "atn/PredictionContext.h" +#include "Vocabulary.h" + +namespace antlr4 { + + /// + /// A parser simulator that mimics what ANTLR's generated + /// parser code does. A ParserATNSimulator is used to make + /// predictions via adaptivePredict but this class moves a pointer through the + /// ATN to simulate parsing. ParserATNSimulator just + /// makes us efficient rather than having to backtrack, for example. + /// + /// This properly creates parse trees even for left recursive rules. + /// + /// We rely on the left recursive rule invocation and special predicate + /// transitions to make left recursive rules work. + /// + /// See TestParserInterpreter for examples. + /// + class ANTLR4CPP_PUBLIC ParserInterpreter : public Parser { + public: + // @deprecated + ParserInterpreter(const std::string &grammarFileName, const std::vector& tokenNames, + const std::vector& ruleNames, const atn::ATN &atn, TokenStream *input); + ParserInterpreter(const std::string &grammarFileName, const dfa::Vocabulary &vocabulary, + const std::vector &ruleNames, const atn::ATN &atn, TokenStream *input); + ~ParserInterpreter(); + + virtual void reset() override; + + virtual const atn::ATN& getATN() const override; + + // @deprecated + virtual const std::vector& getTokenNames() const override; + + virtual const dfa::Vocabulary& getVocabulary() const override; + + virtual const std::vector& getRuleNames() const override; + virtual std::string getGrammarFileName() const override; + + /// Begin parsing at startRuleIndex + virtual ParserRuleContext* parse(size_t startRuleIndex); + + virtual void enterRecursionRule(ParserRuleContext *localctx, size_t state, size_t ruleIndex, int precedence) override; + + + /** Override this parser interpreters normal decision-making process + * at a particular decision and input token index. Instead of + * allowing the adaptive prediction mechanism to choose the + * first alternative within a block that leads to a successful parse, + * force it to take the alternative, 1..n for n alternatives. + * + * As an implementation limitation right now, you can only specify one + * override. This is sufficient to allow construction of different + * parse trees for ambiguous input. It means re-parsing the entire input + * in general because you're never sure where an ambiguous sequence would + * live in the various parse trees. For example, in one interpretation, + * an ambiguous input sequence would be matched completely in expression + * but in another it could match all the way back to the root. + * + * s : e '!'? ; + * e : ID + * | ID '!' + * ; + * + * Here, x! can be matched as (s (e ID) !) or (s (e ID !)). In the first + * case, the ambiguous sequence is fully contained only by the root. + * In the second case, the ambiguous sequences fully contained within just + * e, as in: (e ID !). + * + * Rather than trying to optimize this and make + * some intelligent decisions for optimization purposes, I settled on + * just re-parsing the whole input and then using + * {link Trees#getRootOfSubtreeEnclosingRegion} to find the minimal + * subtree that contains the ambiguous sequence. I originally tried to + * record the call stack at the point the parser detected and ambiguity but + * left recursive rules create a parse tree stack that does not reflect + * the actual call stack. That impedance mismatch was enough to make + * it it challenging to restart the parser at a deeply nested rule + * invocation. + * + * Only parser interpreters can override decisions so as to avoid inserting + * override checking code in the critical ALL(*) prediction execution path. + * + * @since 4.5.1 + */ + void addDecisionOverride(int decision, int tokenIndex, int forcedAlt); + + Ref getOverrideDecisionRoot() const; + + /** Return the root of the parse, which can be useful if the parser + * bails out. You still can access the top node. Note that, + * because of the way left recursive rules add children, it's possible + * that the root will not have any children if the start rule immediately + * called and left recursive rule that fails. + * + * @since 4.5.1 + */ + InterpreterRuleContext* getRootContext(); + + protected: + const std::string _grammarFileName; + std::vector _tokenNames; + const atn::ATN &_atn; + + std::vector _ruleNames; + + std::vector _decisionToDFA; // not shared like it is for generated parsers + atn::PredictionContextCache _sharedContextCache; + + /** This stack corresponds to the _parentctx, _parentState pair of locals + * that would exist on call stack frames with a recursive descent parser; + * in the generated function for a left-recursive rule you'd see: + * + * private EContext e(int _p) throws RecognitionException { + * ParserRuleContext _parentctx = _ctx; // Pair.a + * int _parentState = getState(); // Pair.b + * ... + * } + * + * Those values are used to create new recursive rule invocation contexts + * associated with left operand of an alt like "expr '*' expr". + */ + std::stack> _parentContextStack; + + /** We need a map from (decision,inputIndex)->forced alt for computing ambiguous + * parse trees. For now, we allow exactly one override. + */ + int _overrideDecision = -1; + size_t _overrideDecisionInputIndex = INVALID_INDEX; + size_t _overrideDecisionAlt = INVALID_INDEX; + bool _overrideDecisionReached = false; // latch and only override once; error might trigger infinite loop + + /** What is the current context when we override a decision? This tells + * us what the root of the parse tree is when using override + * for an ambiguity/lookahead check. + */ + Ref _overrideDecisionRoot; + InterpreterRuleContext* _rootContext; + + virtual atn::ATNState *getATNState(); + virtual void visitState(atn::ATNState *p); + + /** Method visitDecisionState() is called when the interpreter reaches + * a decision state (instance of DecisionState). It gives an opportunity + * for subclasses to track interesting things. + */ + size_t visitDecisionState(atn::DecisionState *p); + + /** Provide simple "factory" for InterpreterRuleContext's. + * @since 4.5.1 + */ + InterpreterRuleContext* createInterpreterRuleContext(ParserRuleContext *parent, size_t invokingStateNumber, size_t ruleIndex); + + virtual void visitRuleStopState(atn::ATNState *p); + + /** Rely on the error handler for this parser but, if no tokens are consumed + * to recover, add an error node. Otherwise, nothing is seen in the parse + * tree. + */ + void recover(RecognitionException &e); + Token* recoverInline(); + + private: + const dfa::Vocabulary &_vocabulary; + std::unique_ptr _errorToken; + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/ParserRuleContext.h b/lib/antlr4/include/ParserRuleContext.h new file mode 100644 index 0000000..e117c3b --- /dev/null +++ b/lib/antlr4/include/ParserRuleContext.h @@ -0,0 +1,147 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RuleContext.h" +#include "support/CPPUtils.h" + +namespace antlr4 { + + /// + /// A rule invocation record for parsing. + /// + /// Contains all of the information about the current rule not stored in the + /// RuleContext. It handles parse tree children list, Any ATN state + /// tracing, and the default values available for rule invocatons: + /// start, stop, rule index, current alt number. + /// + /// Subclasses made for each rule and grammar track the parameters, + /// return values, locals, and labels specific to that rule. These + /// are the objects that are returned from rules. + /// + /// Note text is not an actual field of a rule return value; it is computed + /// from start and stop using the input stream's toString() method. I + /// could add a ctor to this so that we can pass in and store the input + /// stream, but I'm not sure we want to do that. It would seem to be undefined + /// to get the .text property anyway if the rule matches tokens from multiple + /// input streams. + /// + /// I do not use getters for fields of objects that are used simply to + /// group values such as this aggregate. The getters/setters are there to + /// satisfy the superclass interface. + /// + class ANTLR4CPP_PUBLIC ParserRuleContext : public RuleContext { + public: + static ParserRuleContext EMPTY; + + /// + /// For debugging/tracing purposes, we want to track all of the nodes in + /// the ATN traversed by the parser for a particular rule. + /// This list indicates the sequence of ATN nodes used to match + /// the elements of the children list. This list does not include + /// ATN nodes and other rules used to match rule invocations. It + /// traces the rule invocation node itself but nothing inside that + /// other rule's ATN submachine. + /// + /// There is NOT a one-to-one correspondence between the children and + /// states list. There are typically many nodes in the ATN traversed + /// for each element in the children list. For example, for a rule + /// invocation there is the invoking state and the following state. + /// + /// The parser setState() method updates field s and adds it to this list + /// if we are debugging/tracing. + /// + /// This does not trace states visited during prediction. + /// + // public List states; + + Token *start; + Token *stop; + + /// The exception that forced this rule to return. If the rule successfully + /// completed, this is "null exception pointer". + std::exception_ptr exception; + + ParserRuleContext(); + ParserRuleContext(ParserRuleContext *parent, size_t invokingStateNumber); + virtual ~ParserRuleContext() {} + + /** COPY a ctx (I'm deliberately not using copy constructor) to avoid + * confusion with creating node with parent. Does not copy children + * (except error leaves). + */ + virtual void copyFrom(ParserRuleContext *ctx); + + + // Double dispatch methods for listeners + + virtual void enterRule(tree::ParseTreeListener *listener); + virtual void exitRule(tree::ParseTreeListener *listener); + + /** Add a token leaf node child and force its parent to be this node. */ + tree::TerminalNode* addChild(tree::TerminalNode *t); + RuleContext* addChild(RuleContext *ruleInvocation); + + /// Used by enterOuterAlt to toss out a RuleContext previously added as + /// we entered a rule. If we have # label, we will need to remove + /// generic ruleContext object. + virtual void removeLastChild(); + + virtual tree::TerminalNode* getToken(size_t ttype, std::size_t i); + + virtual std::vector getTokens(size_t ttype); + + template + T* getRuleContext(size_t i) { + if (children.empty()) { + return nullptr; + } + + size_t j = 0; // what element have we found with ctxType? + for (auto &child : children) { + if (antlrcpp::is(child)) { + if (j++ == i) { + return dynamic_cast(child); + } + } + } + return nullptr; + } + + template + std::vector getRuleContexts() { + std::vector contexts; + for (auto child : children) { + if (antlrcpp::is(child)) { + contexts.push_back(dynamic_cast(child)); + } + } + + return contexts; + } + + virtual misc::Interval getSourceInterval() override; + + /** + * Get the initial token in this context. + * Note that the range from start to stop is inclusive, so for rules that do not consume anything + * (for example, zero length or error productions) this token may exceed stop. + */ + virtual Token *getStart(); + + /** + * Get the final token in this context. + * Note that the range from start to stop is inclusive, so for rules that do not consume anything + * (for example, zero length or error productions) this token may precede start. + */ + virtual Token *getStop(); + + /// + /// Used for rule context info debugging during parse-time, not so much for ATN debugging + virtual std::string toInfoString(Parser *recognizer); + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/ProxyErrorListener.h b/lib/antlr4/include/ProxyErrorListener.h new file mode 100644 index 0000000..6720f97 --- /dev/null +++ b/lib/antlr4/include/ProxyErrorListener.h @@ -0,0 +1,38 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "ANTLRErrorListener.h" +#include "Exceptions.h" + +namespace antlr4 { + + /// This implementation of ANTLRErrorListener dispatches all calls to a + /// collection of delegate listeners. This reduces the effort required to support multiple + /// listeners. + class ANTLR4CPP_PUBLIC ProxyErrorListener : public ANTLRErrorListener { + private: + std::set _delegates; // Not owned. + + public: + void addErrorListener(ANTLRErrorListener *listener); + void removeErrorListener(ANTLRErrorListener *listener); + void removeErrorListeners(); + + void syntaxError(Recognizer *recognizer, Token *offendingSymbol, size_t line, size_t charPositionInLine, + const std::string &msg, std::exception_ptr e) override; + + virtual void reportAmbiguity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, bool exact, + const antlrcpp::BitSet &ambigAlts, atn::ATNConfigSet *configs) override; + + virtual void reportAttemptingFullContext(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + const antlrcpp::BitSet &conflictingAlts, atn::ATNConfigSet *configs) override; + + virtual void reportContextSensitivity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + size_t prediction, atn::ATNConfigSet *configs) override; + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/RecognitionException.h b/lib/antlr4/include/RecognitionException.h new file mode 100644 index 0000000..aa204f7 --- /dev/null +++ b/lib/antlr4/include/RecognitionException.h @@ -0,0 +1,98 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Exceptions.h" + +namespace antlr4 { + + /// The root of the ANTLR exception hierarchy. In general, ANTLR tracks just + /// 3 kinds of errors: prediction errors, failed predicate errors, and + /// mismatched input errors. In each case, the parser knows where it is + /// in the input, where it is in the ATN, the rule invocation stack, + /// and what kind of problem occurred. + class ANTLR4CPP_PUBLIC RecognitionException : public RuntimeException { + private: + /// The Recognizer where this exception originated. + Recognizer *_recognizer; + IntStream *_input; + ParserRuleContext *_ctx; + + /// The current Token when an error occurred. Since not all streams + /// support accessing symbols by index, we have to track the Token + /// instance itself. + Token *_offendingToken; + + size_t _offendingState; + + public: + RecognitionException(Recognizer *recognizer, IntStream *input, ParserRuleContext *ctx, + Token *offendingToken = nullptr); + RecognitionException(const std::string &message, Recognizer *recognizer, IntStream *input, + ParserRuleContext *ctx, Token *offendingToken = nullptr); + RecognitionException(RecognitionException const&) = default; + ~RecognitionException(); + RecognitionException& operator=(RecognitionException const&) = default; + + /// Get the ATN state number the parser was in at the time the error + /// occurred. For NoViableAltException and + /// LexerNoViableAltException exceptions, this is the + /// DecisionState number. For others, it is the state whose outgoing + /// edge we couldn't match. + /// + /// If the state number is not known, this method returns -1. + virtual size_t getOffendingState() const; + + protected: + void setOffendingState(size_t offendingState); + + /// Gets the set of input symbols which could potentially follow the + /// previously matched symbol at the time this exception was thrown. + /// + /// If the set of expected tokens is not known and could not be computed, + /// this method returns an empty set. + /// + /// @returns The set of token types that could potentially follow the current + /// state in the ATN, or an empty set if the information is not available. + public: + virtual misc::IntervalSet getExpectedTokens() const; + + /// + /// Gets the at the time this exception was thrown. + ///

      + /// If the context is not available, this method returns {@code null}. + ///

      + /// The at the time this exception was thrown. + /// If the context is not available, this method returns {@code null}. + virtual RuleContext* getCtx() const; + + /// + /// Gets the input stream which is the symbol source for the recognizer where + /// this exception was thrown. + ///

      + /// If the input stream is not available, this method returns {@code null}. + ///

      + /// The input stream which is the symbol source for the recognizer + /// where this exception was thrown, or {@code null} if the stream is not + /// available. + virtual IntStream* getInputStream() const; + + virtual Token* getOffendingToken() const; + + /// + /// Gets the where this exception occurred. + ///

      + /// If the recognizer is not available, this method returns {@code null}. + ///

      + /// The recognizer where this exception occurred, or {@code null} if + /// the recognizer is not available. + virtual Recognizer* getRecognizer() const; + + private: + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/Recognizer.h b/lib/antlr4/include/Recognizer.h new file mode 100644 index 0000000..8c0bcb0 --- /dev/null +++ b/lib/antlr4/include/Recognizer.h @@ -0,0 +1,164 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "ProxyErrorListener.h" + +namespace antlr4 { + + class ANTLR4CPP_PUBLIC Recognizer { + public: + static const size_t EOF = static_cast(-1); // std::numeric_limits::max(); doesn't work in VS 2013. + + Recognizer(); + Recognizer(Recognizer const&) = delete; + virtual ~Recognizer(); + + Recognizer& operator=(Recognizer const&) = delete; + + /** Used to print out token names like ID during debugging and + * error reporting. The generated parsers implement a method + * that overrides this to point to their String[] tokenNames. + * + * @deprecated Use {@link #getVocabulary()} instead. + */ + virtual std::vector const& getTokenNames() const = 0; + virtual std::vector const& getRuleNames() const = 0; + + /** + * Get the vocabulary used by the recognizer. + * + * @return A {@link Vocabulary} instance providing information about the + * vocabulary used by the grammar. + */ + virtual dfa::Vocabulary const& getVocabulary() const; + + /// + /// Get a map from token names to token types. + ///

      + /// Used for XPath and tree pattern compilation. + ///

      + virtual std::map getTokenTypeMap(); + + /// + /// Get a map from rule names to rule indexes. + ///

      + /// Used for XPath and tree pattern compilation. + ///

      + virtual std::map getRuleIndexMap(); + + virtual size_t getTokenType(const std::string &tokenName); + + /// + /// If this recognizer was generated, it will have a serialized ATN + /// representation of the grammar. + ///

      + /// For interpreters, we don't know their serialized ATN despite having + /// created the interpreter from it. + ///

      + virtual const std::vector getSerializedATN() const { + throw "there is no serialized ATN"; + } + + /// + /// For debugging and other purposes, might want the grammar name. + /// Have ANTLR generate an implementation for this method. + /// + virtual std::string getGrammarFileName() const = 0; + + /// Get the ATN interpreter (in fact one of it's descendants) used by the recognizer for prediction. + /// @returns The ATN interpreter used by the recognizer for prediction. + template + T* getInterpreter() const { + return dynamic_cast(_interpreter); + } + + /** + * Set the ATN interpreter used by the recognizer for prediction. + * + * @param interpreter The ATN interpreter used by the recognizer for + * prediction. + */ + void setInterpreter(atn::ATNSimulator *interpreter); + + /// What is the error header, normally line/character position information? + virtual std::string getErrorHeader(RecognitionException *e); + + /** How should a token be displayed in an error message? The default + * is to display just the text, but during development you might + * want to have a lot of information spit out. Override in that case + * to use t.toString() (which, for CommonToken, dumps everything about + * the token). This is better than forcing you to override a method in + * your token objects because you don't have to go modify your lexer + * so that it creates a new Java type. + * + * @deprecated This method is not called by the ANTLR 4 Runtime. Specific + * implementations of {@link ANTLRErrorStrategy} may provide a similar + * feature when necessary. For example, see + * {@link DefaultErrorStrategy#getTokenErrorDisplay}. + */ + virtual std::string getTokenErrorDisplay(Token *t); + + /// if {@code listener} is {@code null}. + virtual void addErrorListener(ANTLRErrorListener *listener); + + virtual void removeErrorListener(ANTLRErrorListener *listener); + + virtual void removeErrorListeners(); + + virtual ProxyErrorListener& getErrorListenerDispatch(); + + // subclass needs to override these if there are sempreds or actions + // that the ATN interp needs to execute + virtual bool sempred(RuleContext *localctx, size_t ruleIndex, size_t actionIndex); + + virtual bool precpred(RuleContext *localctx, int precedence); + + virtual void action(RuleContext *localctx, size_t ruleIndex, size_t actionIndex); + + virtual size_t getState() const ; + + // Get the ATN used by the recognizer for prediction. + virtual const atn::ATN& getATN() const = 0; + + /// + /// Indicate that the recognizer has changed internal state that is + /// consistent with the ATN state passed in. This way we always know + /// where we are in the ATN as the parser goes along. The rule + /// context objects form a stack that lets us see the stack of + /// invoking rules. Combine this and we have complete ATN + /// configuration information. + /// + void setState(size_t atnState); + + virtual IntStream* getInputStream() = 0; + + virtual void setInputStream(IntStream *input) = 0; + + virtual Ref> getTokenFactory() = 0; + + template + void setTokenFactory(TokenFactory *input); + + protected: + atn::ATNSimulator *_interpreter; // Set and deleted in descendants (or the profiler). + + // Mutex to manage synchronized access for multithreading. + std::mutex _mutex; + + private: + static std::map> _tokenTypeMapCache; + static std::map, std::map> _ruleIndexMapCache; + + ProxyErrorListener _proxListener; // Manages a collection of listeners. + + size_t _stateNumber; + + void InitializeInstanceFields(); + + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/RuleContext.h b/lib/antlr4/include/RuleContext.h new file mode 100644 index 0000000..9ee0d2d --- /dev/null +++ b/lib/antlr4/include/RuleContext.h @@ -0,0 +1,137 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "tree/ParseTree.h" + +namespace antlr4 { + + /** A rule context is a record of a single rule invocation. + * + * We form a stack of these context objects using the parent + * pointer. A parent pointer of null indicates that the current + * context is the bottom of the stack. The ParserRuleContext subclass + * as a children list so that we can turn this data structure into a + * tree. + * + * The root node always has a null pointer and invokingState of -1. + * + * Upon entry to parsing, the first invoked rule function creates a + * context object (asubclass specialized for that rule such as + * SContext) and makes it the root of a parse tree, recorded by field + * Parser._ctx. + * + * public final SContext s() throws RecognitionException { + * SContext _localctx = new SContext(_ctx, getState()); <-- create new node + * enterRule(_localctx, 0, RULE_s); <-- push it + * ... + * exitRule(); <-- pop back to _localctx + * return _localctx; + * } + * + * A subsequent rule invocation of r from the start rule s pushes a + * new context object for r whose parent points at s and use invoking + * state is the state with r emanating as edge label. + * + * The invokingState fields from a context object to the root + * together form a stack of rule indication states where the root + * (bottom of the stack) has a -1 sentinel value. If we invoke start + * symbol s then call r1, which calls r2, the would look like + * this: + * + * SContext[-1] <- root node (bottom of the stack) + * R1Context[p] <- p in rule s called r1 + * R2Context[q] <- q in rule r1 called r2 + * + * So the top of the stack, _ctx, represents a call to the current + * rule and it holds the return address from another rule that invoke + * to this rule. To invoke a rule, we must always have a current context. + * + * The parent contexts are useful for computing lookahead sets and + * getting error information. + * + * These objects are used during parsing and prediction. + * For the special case of parsers, we use the subclass + * ParserRuleContext. + * + * @see ParserRuleContext + */ + class ANTLR4CPP_PUBLIC RuleContext : public tree::ParseTree { + public: + /// What state invoked the rule associated with this context? + /// The "return address" is the followState of invokingState + /// If parent is null, this should be -1 and this context object represents the start rule. + size_t invokingState; + + RuleContext(); + RuleContext(RuleContext *parent, size_t invokingState); + + virtual int depth(); + + /// A context is empty if there is no invoking state; meaning nobody called current context. + virtual bool isEmpty(); + + // satisfy the ParseTree / SyntaxTree interface + + virtual misc::Interval getSourceInterval() override; + + virtual std::string getText() override; + + virtual size_t getRuleIndex() const; + + /** For rule associated with this parse tree internal node, return + * the outer alternative number used to match the input. Default + * implementation does not compute nor store this alt num. Create + * a subclass of ParserRuleContext with backing field and set + * option contextSuperClass. + * to set it. + * + * @since 4.5.3 + */ + virtual size_t getAltNumber() const; + + /** Set the outer alternative number for this context node. Default + * implementation does nothing to avoid backing field overhead for + * trees that don't need it. Create + * a subclass of ParserRuleContext with backing field and set + * option contextSuperClass. + * + * @since 4.5.3 + */ + virtual void setAltNumber(size_t altNumber); + + virtual antlrcpp::Any accept(tree::ParseTreeVisitor *visitor) override; + + /// + /// Print out a whole tree, not just a node, in LISP format + /// (root child1 .. childN). Print just a node if this is a leaf. + /// We have to know the recognizer so we can get rule names. + /// + virtual std::string toStringTree(Parser *recog, bool pretty = false) override; + + /// + /// Print out a whole tree, not just a node, in LISP format + /// (root child1 .. childN). Print just a node if this is a leaf. + /// + virtual std::string toStringTree(std::vector &ruleNames, bool pretty = false); + + virtual std::string toStringTree(bool pretty = false) override; + virtual std::string toString() override; + std::string toString(Recognizer *recog); + std::string toString(const std::vector &ruleNames); + + // recog null unless ParserRuleContext, in which case we use subclass toString(...) + std::string toString(Recognizer *recog, RuleContext *stop); + + virtual std::string toString(const std::vector &ruleNames, RuleContext *stop); + + bool operator == (const RuleContext &other) { return this == &other; } // Simple address comparison. + + private: + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/RuleContextWithAltNum.h b/lib/antlr4/include/RuleContextWithAltNum.h new file mode 100644 index 0000000..995d9aa --- /dev/null +++ b/lib/antlr4/include/RuleContextWithAltNum.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "ParserRuleContext.h" + +namespace antlr4 { + + /// A handy class for use with + /// + /// options {contextSuperClass=org.antlr.v4.runtime.RuleContextWithAltNum;} + /// + /// that provides a backing field / impl for the outer alternative number + /// matched for an internal parse tree node. + /// + /// I'm only putting into Java runtime as I'm certain I'm the only one that + /// will really every use this. + class ANTLR4CPP_PUBLIC RuleContextWithAltNum : public ParserRuleContext { + public: + size_t altNum = 0; + + RuleContextWithAltNum(); + RuleContextWithAltNum(ParserRuleContext *parent, int invokingStateNumber); + + virtual size_t getAltNumber() const override; + virtual void setAltNumber(size_t altNum) override; + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/RuntimeMetaData.h b/lib/antlr4/include/RuntimeMetaData.h new file mode 100644 index 0000000..f178cfe --- /dev/null +++ b/lib/antlr4/include/RuntimeMetaData.h @@ -0,0 +1,155 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { + + /// + /// This class provides access to the current version of the ANTLR 4 runtime + /// library as compile-time and runtime constants, along with methods for + /// checking for matching version numbers and notifying listeners in the case + /// where a version mismatch is detected. + /// + /// + /// The runtime version information is provided by and + /// . Detailed information about these values is + /// provided in the documentation for each member. + /// + /// + /// The runtime version check is implemented by . Detailed + /// information about incorporating this call into user code, as well as its use + /// in generated code, is provided in the documentation for the method. + /// + /// + /// Version strings x.y and x.y.z are considered "compatible" and no error + /// would be generated. Likewise, version strings x.y-SNAPSHOT and x.y.z are + /// considered "compatible" because the major and minor components x.y + /// are the same in each. + /// + /// + /// To trap any error messages issued by this code, use System.setErr() + /// in your main() startup code. + /// + /// + /// @since 4.3 + /// + class ANTLR4CPP_PUBLIC RuntimeMetaData { + public: + /// A compile-time constant containing the current version of the ANTLR 4 + /// runtime library. + /// + /// + /// This compile-time constant value allows generated parsers and other + /// libraries to include a literal reference to the version of the ANTLR 4 + /// runtime library the code was compiled against. At each release, we + /// change this value. + /// + /// Version numbers are assumed to have the form + /// + /// major.minor.patch.revision-suffix, + /// + /// with the individual components defined as follows. + /// + ///
        + ///
      • major is a required non-negative integer, and is equal to + /// {@code 4} for ANTLR 4.
      • + ///
      • minor is a required non-negative integer.
      • + ///
      • patch is an optional non-negative integer. When + /// patch is omitted, the {@code .} (dot) appearing before it is + /// also omitted.
      • + ///
      • revision is an optional non-negative integer, and may only + /// be included when patch is also included. When revision + /// is omitted, the {@code .} (dot) appearing before it is also omitted.
      • + ///
      • suffix is an optional string. When suffix is + /// omitted, the {@code -} (hyphen-minus) appearing before it is also + /// omitted.
      • + ///
      + static const std::string VERSION; + + /// + /// Gets the currently executing version of the ANTLR 4 runtime library. + /// + /// + /// This method provides runtime access to the field, as + /// opposed to directly referencing the field as a compile-time constant. + /// + /// The currently executing version of the ANTLR 4 library + + static std::string getRuntimeVersion(); + + /// + /// This method provides the ability to detect mismatches between the version + /// of ANTLR 4 used to generate a parser, the version of the ANTLR runtime a + /// parser was compiled against, and the version of the ANTLR runtime which + /// is currently executing. + /// + /// + /// The version check is designed to detect the following two specific + /// scenarios. + /// + ///
        + ///
      • The ANTLR Tool version used for code generation does not match the + /// currently executing runtime version.
      • + ///
      • The ANTLR Runtime version referenced at the time a parser was + /// compiled does not match the currently executing runtime version.
      • + ///
      + /// + /// + /// Starting with ANTLR 4.3, the code generator emits a call to this method + /// using two constants in each generated lexer and parser: a hard-coded + /// constant indicating the version of the tool used to generate the parser + /// and a reference to the compile-time constant . At + /// runtime, this method is called during the initialization of the generated + /// parser to detect mismatched versions, and notify the registered listeners + /// prior to creating instances of the parser. + /// + /// + /// This method does not perform any detection or filtering of semantic + /// changes between tool and runtime versions. It simply checks for a + /// version match and emits an error to stderr if a difference + /// is detected. + /// + /// + /// Note that some breaking changes between releases could result in other + /// types of runtime exceptions, such as a , prior to + /// calling this method. In these cases, the underlying version mismatch will + /// not be reported here. This method is primarily intended to + /// notify users of potential semantic changes between releases that do not + /// result in binary compatibility problems which would be detected by the + /// class loader. As with semantic changes, changes that break binary + /// compatibility between releases are mentioned in the release notes + /// accompanying the affected release. + /// + /// + /// Additional note for target developers: The version check + /// implemented by this class is designed to address specific compatibility + /// concerns that may arise during the execution of Java applications. Other + /// targets should consider the implementation of this method in the context + /// of that target's known execution environment, which may or may not + /// resemble the design provided for the Java target. + ///
      + /// The version of the tool used to generate a parser. + /// This value may be null when called from user code that was not generated + /// by, and does not reference, the ANTLR 4 Tool itself. + /// The version of the runtime the parser was + /// compiled against. This should always be passed using a direct reference + /// to . + static void checkVersion(const std::string &generatingToolVersion, const std::string &compileTimeVersion); + + /// + /// Gets the major and minor version numbers from a version string. For + /// details about the syntax of the input {@code version}. + /// E.g., from x.y.z return x.y. + /// + /// The complete version string. + /// A string of the form major.minor containing + /// only the major and minor components of the version string. + static std::string getMajorMinorVersion(const std::string &version); + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/Token.h b/lib/antlr4/include/Token.h new file mode 100644 index 0000000..a7c1594 --- /dev/null +++ b/lib/antlr4/include/Token.h @@ -0,0 +1,92 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "IntStream.h" + +namespace antlr4 { + + /// A token has properties: text, type, line, character position in the line + /// (so we can ignore tabs), token channel, index, and source from which + /// we obtained this token. + class ANTLR4CPP_PUBLIC Token { + public: + static const size_t INVALID_TYPE = 0; + + /// During lookahead operations, this "token" signifies we hit rule end ATN state + /// and did not follow it despite needing to. + static const size_t EPSILON = static_cast(-2); + static const size_t MIN_USER_TOKEN_TYPE = 1; + static const size_t EOF = IntStream::EOF; + + virtual ~Token(); + + /// All tokens go to the parser (unless skip() is called in that rule) + /// on a particular "channel". The parser tunes to a particular channel + /// so that whitespace etc... can go to the parser on a "hidden" channel. + static const size_t DEFAULT_CHANNEL = 0; + + /// Anything on different channel than DEFAULT_CHANNEL is not parsed + /// by parser. + static const size_t HIDDEN_CHANNEL = 1; + + /** + * This is the minimum constant value which can be assigned to a + * user-defined token channel. + * + *

      + * The non-negative numbers less than {@link #MIN_USER_CHANNEL_VALUE} are + * assigned to the predefined channels {@link #DEFAULT_CHANNEL} and + * {@link #HIDDEN_CHANNEL}.

      + * + * @see Token#getChannel() + */ + static const size_t MIN_USER_CHANNEL_VALUE = 2; + + /// Get the text of the token. + virtual std::string getText() const = 0; + + /// Get the token type of the token + virtual size_t getType() const = 0; + + /// The line number on which the 1st character of this token was matched, line=1..n + virtual size_t getLine() const = 0; + + /// The index of the first character of this token relative to the + /// beginning of the line at which it occurs, 0..n-1 + virtual size_t getCharPositionInLine() const = 0; + + /// Return the channel this token. Each token can arrive at the parser + /// on a different channel, but the parser only "tunes" to a single channel. + /// The parser ignores everything not on DEFAULT_CHANNEL. + virtual size_t getChannel() const = 0; + + /// An index from 0..n-1 of the token object in the input stream. + /// This must be valid in order to print token streams and + /// use TokenRewriteStream. + /// + /// Return INVALID_INDEX to indicate that this token was conjured up since + /// it doesn't have a valid index. + virtual size_t getTokenIndex() const = 0; + + /// The starting character index of the token + /// This method is optional; return INVALID_INDEX if not implemented. + virtual size_t getStartIndex() const = 0; + + /// The last character index of the token. + /// This method is optional; return INVALID_INDEX if not implemented. + virtual size_t getStopIndex() const = 0; + + /// Gets the which created this token. + virtual TokenSource *getTokenSource() const = 0; + + /// Gets the from which this token was derived. + virtual CharStream *getInputStream() const = 0; + + virtual std::string toString() const = 0; + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/TokenFactory.h b/lib/antlr4/include/TokenFactory.h new file mode 100644 index 0000000..e29335f --- /dev/null +++ b/lib/antlr4/include/TokenFactory.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { + + /// The default mechanism for creating tokens. It's used by default in Lexer and + /// the error handling strategy (to create missing tokens). Notifying the parser + /// of a new factory means that it notifies it's token source and error strategy. + template + class ANTLR4CPP_PUBLIC TokenFactory { + public: + virtual ~TokenFactory() {} + + /// This is the method used to create tokens in the lexer and in the + /// error handling strategy. If text!=null, than the start and stop positions + /// are wiped to -1 in the text override is set in the CommonToken. + virtual std::unique_ptr create(std::pair source, size_t type, const std::string &text, + size_t channel, size_t start, size_t stop, size_t line, size_t charPositionInLine) = 0; + + /// Generically useful + virtual std::unique_ptr create(size_t type, const std::string &text) = 0; + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/TokenSource.h b/lib/antlr4/include/TokenSource.h new file mode 100644 index 0000000..72981ce --- /dev/null +++ b/lib/antlr4/include/TokenSource.h @@ -0,0 +1,85 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "TokenFactory.h" + +namespace antlr4 { + + /// + /// A source of tokens must provide a sequence of tokens via + /// and also must reveal it's source of characters; 's text is + /// computed from a ; it only store indices into the char + /// stream. + ///

      + /// Errors from the lexer are never passed to the parser. Either you want to keep + /// going or you do not upon token recognition error. If you do not want to + /// continue lexing then you do not want to continue parsing. Just throw an + /// exception not under and Java will naturally toss + /// you all the way out of the recognizers. If you want to continue lexing then + /// you should not throw an exception to the parser--it has already requested a + /// token. Keep lexing until you get a valid one. Just report errors and keep + /// going, looking for a valid token. + ///

      + class ANTLR4CPP_PUBLIC TokenSource { + public: + virtual ~TokenSource(); + + /// Return a object from your input stream (usually a + /// ). Do not fail/return upon lexing error; keep chewing + /// on the characters until you get a good one; errors are not passed through + /// to the parser. + virtual std::unique_ptr nextToken() = 0; + + /// + /// Get the line number for the current position in the input stream. The + /// first line in the input is line 1. + /// + /// The line number for the current position in the input stream, or + /// 0 if the current token source does not track line numbers. + virtual size_t getLine() const = 0; + + /// + /// Get the index into the current line for the current position in the input + /// stream. The first character on a line has position 0. + /// + /// The line number for the current position in the input stream, or + /// (sze_t)-1 if the current token source does not track character positions. + virtual size_t getCharPositionInLine() = 0; + + /// + /// Get the from which this token source is currently + /// providing tokens. + /// + /// The associated with the current position in + /// the input, or {@code null} if no input stream is available for the token + /// source. + virtual CharStream* getInputStream() = 0; + + /// + /// Gets the name of the underlying input source. This method returns a + /// non-null, non-empty string. If such a name is not known, this method + /// returns . + /// + virtual std::string getSourceName() = 0; + + /// + /// Set the this token source should use for creating + /// objects from the input. + /// + /// The to use for creating tokens. + template + void setTokenFactory(TokenFactory * /*factory*/) {} + + /// + /// Gets the this token source is currently using for + /// creating objects from the input. + /// + /// The currently used by this token source. + virtual Ref> getTokenFactory() = 0; + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/TokenStream.h b/lib/antlr4/include/TokenStream.h new file mode 100644 index 0000000..c7dd0d4 --- /dev/null +++ b/lib/antlr4/include/TokenStream.h @@ -0,0 +1,137 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "IntStream.h" + +namespace antlr4 { + + /// + /// An whose symbols are instances. + /// + class ANTLR4CPP_PUBLIC TokenStream : public IntStream { + /// + /// Get the instance associated with the value returned by + /// . This method has the same pre- and post-conditions as + /// . In addition, when the preconditions of this method + /// are met, the return value is non-null and the value of + /// {@code LT(k).getType()==LA(k)}. + /// + /// + public: + virtual ~TokenStream(); + + virtual Token* LT(ssize_t k) = 0; + + /// + /// Gets the at the specified {@code index} in the stream. When + /// the preconditions of this method are met, the return value is non-null. + ///

      + /// The preconditions for this method are the same as the preconditions of + /// . If the behavior of {@code seek(index)} is + /// unspecified for the current state and given {@code index}, then the + /// behavior of this method is also unspecified. + ///

      + /// The symbol referred to by {@code index} differs from {@code seek()} only + /// in the case of filtering streams where {@code index} lies before the end + /// of the stream. Unlike {@code seek()}, this method does not adjust + /// {@code index} to point to a non-ignored symbol. + ///

      + /// if {code index} is less than 0 + /// if the stream does not support + /// retrieving the token at the specified index + virtual Token* get(size_t index) const = 0; + + /// Gets the underlying TokenSource which provides tokens for this stream. + virtual TokenSource* getTokenSource() const = 0; + + /// + /// Return the text of all tokens within the specified {@code interval}. This + /// method behaves like the following code (including potential exceptions + /// for violating preconditions of , but may be optimized by the + /// specific implementation. + /// + ///
      +    /// TokenStream stream = ...;
      +    /// String text = "";
      +    /// for (int i = interval.a; i <= interval.b; i++) {
      +    ///   text += stream.get(i).getText();
      +    /// }
      +    /// 
      + ///
      + /// The interval of tokens within this stream to get text + /// for. + /// The text of all tokens within the specified interval in this + /// stream. + /// + /// if {@code interval} is {@code null} + virtual std::string getText(const misc::Interval &interval) = 0; + + /// + /// Return the text of all tokens in the stream. This method behaves like the + /// following code, including potential exceptions from the calls to + /// and , but may be + /// optimized by the specific implementation. + /// + ///
      +    /// TokenStream stream = ...;
      +    /// String text = stream.getText(new Interval(0, stream.size()));
      +    /// 
      + ///
      + /// The text of all tokens in the stream. + virtual std::string getText() = 0; + + /// + /// Return the text of all tokens in the source interval of the specified + /// context. This method behaves like the following code, including potential + /// exceptions from the call to , but may be + /// optimized by the specific implementation. + ///

      + /// If {@code ctx.getSourceInterval()} does not return a valid interval of + /// tokens provided by this stream, the behavior is unspecified. + /// + ///
      +    /// TokenStream stream = ...;
      +    /// String text = stream.getText(ctx.getSourceInterval());
      +    /// 
      + ///
      + /// The context providing the source interval of tokens to get + /// text for. + /// The text of all tokens within the source interval of {@code ctx}. + virtual std::string getText(RuleContext *ctx) = 0; + + /// + /// Return the text of all tokens in this stream between {@code start} and + /// {@code stop} (inclusive). + ///

      + /// If the specified {@code start} or {@code stop} token was not provided by + /// this stream, or if the {@code stop} occurred before the {@code start} + /// token, the behavior is unspecified. + ///

      + /// For streams which ensure that the method is + /// accurate for all of its provided tokens, this method behaves like the + /// following code. Other streams may implement this method in other ways + /// provided the behavior is consistent with this at a high level. + /// + ///

      +    /// TokenStream stream = ...;
      +    /// String text = "";
      +    /// for (int i = start.getTokenIndex(); i <= stop.getTokenIndex(); i++) {
      +    ///   text += stream.get(i).getText();
      +    /// }
      +    /// 
      + ///
      + /// The first token in the interval to get text for. + /// The last token in the interval to get text for (inclusive). + /// The text of all tokens lying between the specified {@code start} + /// and {@code stop} tokens. + /// + /// if this stream does not support + /// this method for the specified tokens + virtual std::string getText(Token *start, Token *stop) = 0; + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/TokenStreamRewriter.h b/lib/antlr4/include/TokenStreamRewriter.h new file mode 100644 index 0000000..102a9e9 --- /dev/null +++ b/lib/antlr4/include/TokenStreamRewriter.h @@ -0,0 +1,293 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +namespace antlr4 { + + /** + * Useful for rewriting out a buffered input token stream after doing some + * augmentation or other manipulations on it. + * + *

      + * You can insert stuff, replace, and delete chunks. Note that the operations + * are done lazily--only if you convert the buffer to a {@link String} with + * {@link TokenStream#getText()}. This is very efficient because you are not + * moving data around all the time. As the buffer of tokens is converted to + * strings, the {@link #getText()} method(s) scan the input token stream and + * check to see if there is an operation at the current index. If so, the + * operation is done and then normal {@link String} rendering continues on the + * buffer. This is like having multiple Turing machine instruction streams + * (programs) operating on a single input tape. :)

      + * + *

      + * This rewriter makes no modifications to the token stream. It does not ask the + * stream to fill itself up nor does it advance the input cursor. The token + * stream {@link TokenStream#index()} will return the same value before and + * after any {@link #getText()} call.

      + * + *

      + * The rewriter only works on tokens that you have in the buffer and ignores the + * current input cursor. If you are buffering tokens on-demand, calling + * {@link #getText()} halfway through the input will only do rewrites for those + * tokens in the first half of the file.

      + * + *

      + * Since the operations are done lazily at {@link #getText}-time, operations do + * not screw up the token index values. That is, an insert operation at token + * index {@code i} does not change the index values for tokens + * {@code i}+1..n-1.

      + * + *

      + * Because operations never actually alter the buffer, you may always get the + * original token stream back without undoing anything. Since the instructions + * are queued up, you can easily simulate transactions and roll back any changes + * if there is an error just by removing instructions. For example,

      + * + *
      +   * CharStream input = new ANTLRFileStream("input");
      +   * TLexer lex = new TLexer(input);
      +   * CommonTokenStream tokens = new CommonTokenStream(lex);
      +   * T parser = new T(tokens);
      +   * TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens);
      +   * parser.startRule();
      +   * 
      + * + *

      + * Then in the rules, you can execute (assuming rewriter is visible):

      + * + *
      +   * Token t,u;
      +   * ...
      +   * rewriter.insertAfter(t, "text to put after t");}
      +   * rewriter.insertAfter(u, "text after u");}
      +   * System.out.println(rewriter.getText());
      +   * 
      + * + *

      + * You can also have multiple "instruction streams" and get multiple rewrites + * from a single pass over the input. Just name the instruction streams and use + * that name again when printing the buffer. This could be useful for generating + * a C file and also its header file--all from the same buffer:

      + * + *
      +   * rewriter.insertAfter("pass1", t, "text to put after t");}
      +   * rewriter.insertAfter("pass2", u, "text after u");}
      +   * System.out.println(rewriter.getText("pass1"));
      +   * System.out.println(rewriter.getText("pass2"));
      +   * 
      + * + *

      + * If you don't use named rewrite streams, a "default" stream is used as the + * first example shows.

      + */ + class ANTLR4CPP_PUBLIC TokenStreamRewriter { + public: + static const std::string DEFAULT_PROGRAM_NAME; + static const size_t PROGRAM_INIT_SIZE = 100; + static const size_t MIN_TOKEN_INDEX = 0; + + TokenStreamRewriter(TokenStream *tokens); + virtual ~TokenStreamRewriter(); + + TokenStream *getTokenStream(); + + virtual void rollback(size_t instructionIndex); + + /// Rollback the instruction stream for a program so that + /// the indicated instruction (via instructionIndex) is no + /// longer in the stream. UNTESTED! + virtual void rollback(const std::string &programName, size_t instructionIndex); + + virtual void deleteProgram(); + + /// Reset the program so that no instructions exist. + virtual void deleteProgram(const std::string &programName); + virtual void insertAfter(Token *t, const std::string& text); + virtual void insertAfter(size_t index, const std::string& text); + virtual void insertAfter(const std::string &programName, Token *t, const std::string& text); + virtual void insertAfter(const std::string &programName, size_t index, const std::string& text); + + virtual void insertBefore(Token *t, const std::string& text); + virtual void insertBefore(size_t index, const std::string& text); + virtual void insertBefore(const std::string &programName, Token *t, const std::string& text); + virtual void insertBefore(const std::string &programName, size_t index, const std::string& text); + + virtual void replace(size_t index, const std::string& text); + virtual void replace(size_t from, size_t to, const std::string& text); + virtual void replace(Token *indexT, const std::string& text); + virtual void replace(Token *from, Token *to, const std::string& text); + virtual void replace(const std::string &programName, size_t from, size_t to, const std::string& text); + virtual void replace(const std::string &programName, Token *from, Token *to, const std::string& text); + + virtual void Delete(size_t index); + virtual void Delete(size_t from, size_t to); + virtual void Delete(Token *indexT); + virtual void Delete(Token *from, Token *to); + virtual void Delete(const std::string &programName, size_t from, size_t to); + virtual void Delete(const std::string &programName, Token *from, Token *to); + + virtual size_t getLastRewriteTokenIndex(); + + /// Return the text from the original tokens altered per the + /// instructions given to this rewriter. + virtual std::string getText(); + + /** Return the text from the original tokens altered per the + * instructions given to this rewriter in programName. + */ + std::string getText(std::string programName); + + /// Return the text associated with the tokens in the interval from the + /// original token stream but with the alterations given to this rewriter. + /// The interval refers to the indexes in the original token stream. + /// We do not alter the token stream in any way, so the indexes + /// and intervals are still consistent. Includes any operations done + /// to the first and last token in the interval. So, if you did an + /// insertBefore on the first token, you would get that insertion. + /// The same is true if you do an insertAfter the stop token. + virtual std::string getText(const misc::Interval &interval); + + virtual std::string getText(const std::string &programName, const misc::Interval &interval); + + protected: + class RewriteOperation { + public: + /// What index into rewrites List are we? + size_t index; + std::string text; + + /// Token buffer index. + size_t instructionIndex; + + RewriteOperation(TokenStreamRewriter *outerInstance, size_t index); + RewriteOperation(TokenStreamRewriter *outerInstance, size_t index, const std::string& text); + virtual ~RewriteOperation(); + + /// Execute the rewrite operation by possibly adding to the buffer. + /// Return the index of the next token to operate on. + + virtual size_t execute(std::string *buf); + virtual std::string toString(); + + private: + TokenStreamRewriter *const outerInstance; + void InitializeInstanceFields(); + }; + + class InsertBeforeOp : public RewriteOperation { + private: + TokenStreamRewriter *const outerInstance; + + public: + InsertBeforeOp(TokenStreamRewriter *outerInstance, size_t index, const std::string& text); + + virtual size_t execute(std::string *buf) override; + }; + + class ReplaceOp : public RewriteOperation { + private: + TokenStreamRewriter *const outerInstance; + + public: + size_t lastIndex; + + ReplaceOp(TokenStreamRewriter *outerInstance, size_t from, size_t to, const std::string& text); + virtual size_t execute(std::string *buf) override; + virtual std::string toString() override; + + private: + void InitializeInstanceFields(); + }; + + /// Our source stream + TokenStream *const tokens; + + /// You may have multiple, named streams of rewrite operations. + /// I'm calling these things "programs." + /// Maps String (name) -> rewrite (List) + std::map> _programs; + + /// + /// Map String (program name) -> Integer index + std::map _lastRewriteTokenIndexes; + virtual size_t getLastRewriteTokenIndex(const std::string &programName); + virtual void setLastRewriteTokenIndex(const std::string &programName, size_t i); + virtual std::vector& getProgram(const std::string &name); + + /// + /// We need to combine operations and report invalid operations (like + /// overlapping replaces that are not completed nested). Inserts to + /// same index need to be combined etc... Here are the cases: + /// + /// I.i.u I.j.v leave alone, nonoverlapping + /// I.i.u I.i.v combine: Iivu + /// + /// R.i-j.u R.x-y.v | i-j in x-y delete first R + /// R.i-j.u R.i-j.v delete first R + /// R.i-j.u R.x-y.v | x-y in i-j ERROR + /// R.i-j.u R.x-y.v | boundaries overlap ERROR + /// + /// Delete special case of replace (text==null): + /// D.i-j.u D.x-y.v | boundaries overlap combine to max(min)..max(right) + /// + /// I.i.u R.x-y.v | i in (x+1)-y delete I (since insert before + /// we're not deleting i) + /// I.i.u R.x-y.v | i not in (x+1)-y leave alone, nonoverlapping + /// R.x-y.v I.i.u | i in x-y ERROR + /// R.x-y.v I.x.u R.x-y.uv (combine, delete I) + /// R.x-y.v I.i.u | i not in x-y leave alone, nonoverlapping + /// + /// I.i.u = insert u before op @ index i + /// R.x-y.u = replace x-y indexed tokens with u + /// + /// First we need to examine replaces. For any replace op: + /// + /// 1. wipe out any insertions before op within that range. + /// 2. Drop any replace op before that is contained completely within + /// that range. + /// 3. Throw exception upon boundary overlap with any previous replace. + /// + /// Then we can deal with inserts: + /// + /// 1. for any inserts to same index, combine even if not adjacent. + /// 2. for any prior replace with same left boundary, combine this + /// insert with replace and delete this replace. + /// 3. throw exception if index in same range as previous replace + /// + /// Don't actually delete; make op null in list. Easier to walk list. + /// Later we can throw as we add to index -> op map. + /// + /// Note that I.2 R.2-2 will wipe out I.2 even though, technically, the + /// inserted stuff would be before the replace range. But, if you + /// add tokens in front of a method body '{' and then delete the method + /// body, I think the stuff before the '{' you added should disappear too. + /// + /// Return a map from token index to operation. + /// + virtual std::unordered_map reduceToSingleOperationPerIndex(std::vector &rewrites); + + virtual std::string catOpText(std::string *a, std::string *b); + + /// Get all operations before an index of a particular kind. + template + std::vector getKindOfOps(std::vector rewrites, size_t before) { + std::vector ops; + for (size_t i = 0; i < before && i < rewrites.size(); i++) { + T *op = dynamic_cast(rewrites[i]); + if (op == nullptr) { // ignore deleted or non matching entries + continue; + } + ops.push_back(op); + } + return ops; + } + + private: + std::vector& initializeProgram(const std::string &name); + + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/UnbufferedCharStream.h b/lib/antlr4/include/UnbufferedCharStream.h new file mode 100644 index 0000000..98cdcc6 --- /dev/null +++ b/lib/antlr4/include/UnbufferedCharStream.h @@ -0,0 +1,123 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "CharStream.h" + +namespace antlr4 { + + /// Do not buffer up the entire char stream. It does keep a small buffer + /// for efficiency and also buffers while a mark exists (set by the + /// lookahead prediction in parser). "Unbuffered" here refers to fact + /// that it doesn't buffer all data, not that's it's on demand loading of char. + class ANTLR4CPP_PUBLIC UnbufferedCharStream : public CharStream { + public: + /// The name or source of this char stream. + std::string name; + + UnbufferedCharStream(std::wistream &input); + + virtual void consume() override; + virtual size_t LA(ssize_t i) override; + + /// + /// Return a marker that we can release later. + ///

      + /// The specific marker value used for this class allows for some level of + /// protection against misuse where {@code seek()} is called on a mark or + /// {@code release()} is called in the wrong order. + ///

      + virtual ssize_t mark() override; + + /// + /// Decrement number of markers, resetting buffer if we hit 0. + /// + virtual void release(ssize_t marker) override; + virtual size_t index() override; + + /// + /// Seek to absolute character index, which might not be in the current + /// sliding window. Move {@code p} to {@code index-bufferStartIndex}. + /// + virtual void seek(size_t index) override; + virtual size_t size() override; + virtual std::string getSourceName() const override; + virtual std::string getText(const misc::Interval &interval) override; + + protected: + /// A moving window buffer of the data being scanned. While there's a marker, + /// we keep adding to buffer. Otherwise, resets so + /// we start filling at index 0 again. + // UTF-32 encoded. +#if defined(_MSC_VER) && _MSC_VER == 1900 + i32string _data; // Custom type for VS 2015. + typedef __int32 storage_type; +#else + std::u32string _data; + typedef char32_t storage_type; +#endif + + /// + /// 0..n-1 index into of next character. + ///

      + /// The {@code LA(1)} character is {@code data[p]}. If {@code p == n}, we are + /// out of buffered characters. + ///

      + size_t _p; + + /// + /// Count up with and down with + /// . When we {@code release()} the last mark, + /// {@code numMarkers} reaches 0 and we reset the buffer. Copy + /// {@code data[p]..data[n-1]} to {@code data[0]..data[(n-1)-p]}. + /// + size_t _numMarkers; + + /// This is the {@code LA(-1)} character for the current position. + size_t _lastChar; // UTF-32 + + /// + /// When {@code numMarkers > 0}, this is the {@code LA(-1)} character for the + /// first character in . Otherwise, this is unspecified. + /// + size_t _lastCharBufferStart; // UTF-32 + + /// + /// Absolute character index. It's the index of the character about to be + /// read via {@code LA(1)}. Goes from 0 to the number of characters in the + /// entire stream, although the stream size is unknown before the end is + /// reached. + /// + size_t _currentCharIndex; + + std::wistream &_input; + + /// + /// Make sure we have 'want' elements from current position . + /// Last valid {@code p} index is {@code data.length-1}. {@code p+need-1} is + /// the char index 'need' elements ahead. If we need 1 element, + /// {@code (p+1-1)==p} must be less than {@code data.length}. + /// + virtual void sync(size_t want); + + /// + /// Add {@code n} characters to the buffer. Returns the number of characters + /// actually added to the buffer. If the return value is less than {@code n}, + /// then EOF was reached before {@code n} characters could be added. + /// + virtual size_t fill(size_t n); + + /// Override to provide different source of characters than + /// . + virtual char32_t nextChar(); + virtual void add(char32_t c); + size_t getBufferStartIndex() const; + + private: + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/UnbufferedTokenStream.h b/lib/antlr4/include/UnbufferedTokenStream.h new file mode 100644 index 0000000..244cc8d --- /dev/null +++ b/lib/antlr4/include/UnbufferedTokenStream.h @@ -0,0 +1,115 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "TokenStream.h" + +namespace antlr4 { + + class ANTLR4CPP_PUBLIC UnbufferedTokenStream : public TokenStream { + public: + UnbufferedTokenStream(TokenSource *tokenSource); + UnbufferedTokenStream(TokenSource *tokenSource, int bufferSize); + UnbufferedTokenStream(const UnbufferedTokenStream& other) = delete; + virtual ~UnbufferedTokenStream(); + + UnbufferedTokenStream& operator = (const UnbufferedTokenStream& other) = delete; + + virtual Token* get(size_t i) const override; + virtual Token* LT(ssize_t i) override; + virtual size_t LA(ssize_t i) override; + + virtual TokenSource* getTokenSource() const override; + + virtual std::string getText(const misc::Interval &interval) override; + virtual std::string getText() override; + virtual std::string getText(RuleContext *ctx) override; + virtual std::string getText(Token *start, Token *stop) override; + + virtual void consume() override; + + /// + /// Return a marker that we can release later. + ///

      + /// The specific marker value used for this class allows for some level of + /// protection against misuse where {@code seek()} is called on a mark or + /// {@code release()} is called in the wrong order. + ///

      + virtual ssize_t mark() override; + virtual void release(ssize_t marker) override; + virtual size_t index() override; + virtual void seek(size_t index) override; + virtual size_t size() override; + virtual std::string getSourceName() const override; + + protected: + /// Make sure we have 'need' elements from current position p. Last valid + /// p index is tokens.length - 1. p + need - 1 is the tokens index 'need' elements + /// ahead. If we need 1 element, (p+1-1)==p must be less than tokens.length. + TokenSource *_tokenSource; + + /// + /// A moving window buffer of the data being scanned. While there's a marker, + /// we keep adding to buffer. Otherwise, resets so + /// we start filling at index 0 again. + /// + + std::vector> _tokens; + + /// + /// 0..n-1 index into of next token. + ///

      + /// The {@code LT(1)} token is {@code tokens[p]}. If {@code p == n}, we are + /// out of buffered tokens. + ///

      + size_t _p; + + /// + /// Count up with and down with + /// . When we {@code release()} the last mark, + /// {@code numMarkers} reaches 0 and we reset the buffer. Copy + /// {@code tokens[p]..tokens[n-1]} to {@code tokens[0]..tokens[(n-1)-p]}. + /// + int _numMarkers; + + /// + /// This is the {@code LT(-1)} token for the current position. + /// + Token *_lastToken; + + /// + /// When {@code numMarkers > 0}, this is the {@code LT(-1)} token for the + /// first token in . Otherwise, this is {@code null}. + /// + Token *_lastTokenBufferStart; + + /// + /// Absolute token index. It's the index of the token about to be read via + /// {@code LT(1)}. Goes from 0 to the number of tokens in the entire stream, + /// although the stream size is unknown before the end is reached. + ///

      + /// This value is used to set the token indexes if the stream provides tokens + /// that implement . + ///

      + size_t _currentTokenIndex; + + virtual void sync(ssize_t want); + + /// + /// Add {@code n} elements to the buffer. Returns the number of tokens + /// actually added to the buffer. If the return value is less than {@code n}, + /// then EOF was reached before {@code n} tokens could be added. + /// + virtual size_t fill(size_t n); + virtual void add(std::unique_ptr t); + + size_t getBufferStartIndex() const; + + private: + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/Vocabulary.h b/lib/antlr4/include/Vocabulary.h new file mode 100644 index 0000000..7dbf85c --- /dev/null +++ b/lib/antlr4/include/Vocabulary.h @@ -0,0 +1,193 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace dfa { + + /// This class provides a default implementation of the + /// interface. + class ANTLR4CPP_PUBLIC Vocabulary { + public: + Vocabulary(Vocabulary const&) = default; + virtual ~Vocabulary(); + + /// Gets an empty instance. + /// + /// + /// No literal or symbol names are assigned to token types, so + /// returns the numeric value for all tokens + /// except . + static const Vocabulary EMPTY_VOCABULARY; + + Vocabulary() {} + + /// + /// Constructs a new instance of from the specified + /// literal and symbolic token names. + /// + /// The literal names assigned to tokens, or {@code null} + /// if no literal names are assigned. + /// The symbolic names assigned to tokens, or + /// {@code null} if no symbolic names are assigned. + /// + /// + /// + Vocabulary(const std::vector &literalNames, const std::vector &symbolicNames); + + /// + /// Constructs a new instance of from the specified + /// literal, symbolic, and display token names. + /// + /// The literal names assigned to tokens, or {@code null} + /// if no literal names are assigned. + /// The symbolic names assigned to tokens, or + /// {@code null} if no symbolic names are assigned. + /// The display names assigned to tokens, or {@code null} + /// to use the values in {@code literalNames} and {@code symbolicNames} as + /// the source of display names, as described in + /// . + /// + /// + /// + /// + Vocabulary(const std::vector &literalNames, const std::vector &symbolicNames, + const std::vector &displayNames); + + /// + /// Returns a instance from the specified set of token + /// names. This method acts as a compatibility layer for the single + /// {@code tokenNames} array generated by previous releases of ANTLR. + /// + /// The resulting vocabulary instance returns {@code null} for + /// and , and the + /// value from {@code tokenNames} for the display names. + /// + /// The token names, or {@code null} if no token names are + /// available. + /// A instance which uses {@code tokenNames} for + /// the display names of tokens. + static Vocabulary fromTokenNames(const std::vector &tokenNames); + + /// + /// Returns the highest token type value. It can be used to iterate from + /// zero to that number, inclusively, thus querying all stored entries. + /// the highest token type value + virtual size_t getMaxTokenType() const; + + /// + /// Gets the string literal associated with a token type. The string returned + /// by this method, when not {@code null}, can be used unaltered in a parser + /// grammar to represent this token type. + /// + /// The following table shows examples of lexer rules and the literal + /// names assigned to the corresponding token types. + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + ///
      RuleLiteral NameJava String Literal
      {@code THIS : 'this';}{@code 'this'}{@code "'this'"}
      {@code SQUOTE : '\'';}{@code '\''}{@code "'\\''"}
      {@code ID : [A-Z]+;}n/a{@code null}
      + ///
      + /// The token type. + /// + /// The string literal associated with the specified token type, or + /// {@code null} if no string literal is associated with the type. + virtual std::string getLiteralName(size_t tokenType) const; + + /// + /// Gets the symbolic name associated with a token type. The string returned + /// by this method, when not {@code null}, can be used unaltered in a parser + /// grammar to represent this token type. + /// + /// This method supports token types defined by any of the following + /// methods: + /// + ///
        + ///
      • Tokens created by lexer rules.
      • + ///
      • Tokens defined in a tokens{} block in a lexer or parser + /// grammar.
      • + ///
      • The implicitly defined {@code EOF} token, which has the token type + /// .
      • + ///
      + /// + /// The following table shows examples of lexer rules and the literal + /// names assigned to the corresponding token types. + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + ///
      RuleSymbolic Name
      {@code THIS : 'this';}{@code THIS}
      {@code SQUOTE : '\'';}{@code SQUOTE}
      {@code ID : [A-Z]+;}{@code ID}
      + ///
      + /// The token type. + /// + /// The symbolic name associated with the specified token type, or + /// {@code null} if no symbolic name is associated with the type. + virtual std::string getSymbolicName(size_t tokenType) const; + + /// + /// Gets the display name of a token type. + /// + /// ANTLR provides a default implementation of this method, but + /// applications are free to override the behavior in any manner which makes + /// sense for the application. The default implementation returns the first + /// result from the following list which produces a non-{@code null} + /// result. + /// + ///
        + ///
      1. The result of
      2. + ///
      3. The result of
      4. + ///
      5. The result of
      6. + ///
      + ///
      + /// The token type. + /// + /// The display name of the token type, for use in error reporting or + /// other user-visible messages which reference specific token types. + virtual std::string getDisplayName(size_t tokenType) const; + + private: + std::vector const _literalNames; + std::vector const _symbolicNames; + std::vector const _displayNames; + const size_t _maxTokenType = 0; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/WritableToken.h b/lib/antlr4/include/WritableToken.h new file mode 100644 index 0000000..56bc9d0 --- /dev/null +++ b/lib/antlr4/include/WritableToken.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Token.h" + +namespace antlr4 { + + class ANTLR4CPP_PUBLIC WritableToken : public Token { + public: + virtual ~WritableToken(); + virtual void setText(const std::string &text) = 0; + virtual void setType(size_t ttype) = 0; + virtual void setLine(size_t line) = 0; + virtual void setCharPositionInLine(size_t pos) = 0; + virtual void setChannel(size_t channel) = 0; + virtual void setTokenIndex(size_t index) = 0; + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/antlr4-common.h b/lib/antlr4/include/antlr4-common.h new file mode 100644 index 0000000..25d890b --- /dev/null +++ b/lib/antlr4/include/antlr4-common.h @@ -0,0 +1,137 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Defines for the Guid class and other platform dependent stuff. +#ifdef _WIN32 + #ifdef _MSC_VER + #pragma warning (disable: 4250) // Class inherits by dominance. + #pragma warning (disable: 4512) // assignment operator could not be generated + + #if _MSC_VER < 1900 + // Before VS 2015 code like "while (true)" will create a (useless) warning in level 4. + #pragma warning (disable: 4127) // conditional expression is constant + #endif + #endif + + #define GUID_WINDOWS + + #ifdef _WIN64 + typedef __int64 ssize_t; + #else + typedef __int32 ssize_t; + #endif + + #if _MSC_VER >= 1900 && _MSC_VER < 2000 + // VS 2015 has a known bug when using std::codecvt_utf8 + // so we have to temporarily use __int32 instead. + // https://connect.microsoft.com/VisualStudio/feedback/details/1403302/unresolved-external-when-using-codecvt-utf8 + typedef std::basic_string<__int32> i32string; + + typedef i32string UTF32String; + #else + typedef std::u32string UTF32String; + #endif + + #ifdef ANTLR4CPP_EXPORTS + #define ANTLR4CPP_PUBLIC __declspec(dllexport) + #else + #ifdef ANTLR4CPP_STATIC + #define ANTLR4CPP_PUBLIC + #else + #define ANTLR4CPP_PUBLIC __declspec(dllimport) + #endif + #endif + + #if defined(_MSC_VER) && !defined(__clang__) + // clang-cl should escape this to prevent [ignored-attributes]. + namespace std { + class ANTLR4CPP_PUBLIC exception; // Prevents warning C4275 from MSVC. + } // namespace std + #endif + +#elif defined(__APPLE__) + typedef std::u32string UTF32String; + + #define GUID_CFUUID + #if __GNUC__ >= 4 + #define ANTLR4CPP_PUBLIC __attribute__ ((visibility ("default"))) + #else + #define ANTLR4CPP_PUBLIC + #endif +#else + typedef std::u32string UTF32String; + + #define GUID_LIBUUID + #if __GNUC__ >= 6 + #define ANTLR4CPP_PUBLIC __attribute__ ((visibility ("default"))) + #else + #define ANTLR4CPP_PUBLIC + #endif +#endif + +#include "support/guid.h" +#include "support/Declarations.h" + +#if !defined(HAS_NOEXCEPT) + #if defined(__clang__) + #if __has_feature(cxx_noexcept) + #define HAS_NOEXCEPT + #endif + #else + #if defined(__GXX_EXPERIMENTAL_CXX0X__) && __GNUC__ * 10 + __GNUC_MINOR__ >= 46 || \ + defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 190023026 + #define HAS_NOEXCEPT + #endif + #endif + + #ifdef HAS_NOEXCEPT + #define NOEXCEPT noexcept + #else + #define NOEXCEPT + #endif +#endif + +// We have to undefine this symbol as ANTLR will use this name for own members and even +// generated functions. Because EOF is a global macro we cannot use e.g. a namespace scope to disambiguate. +#ifdef EOF +#undef EOF +#endif + +#define INVALID_INDEX std::numeric_limits::max() +template using Ref = std::shared_ptr; diff --git a/lib/antlr4/include/antlr4-runtime.h b/lib/antlr4/include/antlr4-runtime.h new file mode 100644 index 0000000..d58e459 --- /dev/null +++ b/lib/antlr4/include/antlr4-runtime.h @@ -0,0 +1,167 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +// This is the umbrella header for all ANTLR4 C++ runtime headers. + +#include "antlr4-common.h" + +#include "ANTLRErrorListener.h" +#include "ANTLRErrorStrategy.h" +#include "ANTLRFileStream.h" +#include "ANTLRInputStream.h" +#include "BailErrorStrategy.h" +#include "BaseErrorListener.h" +#include "BufferedTokenStream.h" +#include "CharStream.h" +#include "CommonToken.h" +#include "CommonTokenFactory.h" +#include "CommonTokenStream.h" +#include "ConsoleErrorListener.h" +#include "DefaultErrorStrategy.h" +#include "DiagnosticErrorListener.h" +#include "Exceptions.h" +#include "FailedPredicateException.h" +#include "InputMismatchException.h" +#include "IntStream.h" +#include "InterpreterRuleContext.h" +#include "Lexer.h" +#include "LexerInterpreter.h" +#include "LexerNoViableAltException.h" +#include "ListTokenSource.h" +#include "NoViableAltException.h" +#include "Parser.h" +#include "ParserInterpreter.h" +#include "ParserRuleContext.h" +#include "ProxyErrorListener.h" +#include "RecognitionException.h" +#include "Recognizer.h" +#include "RuleContext.h" +#include "RuleContextWithAltNum.h" +#include "RuntimeMetaData.h" +#include "Token.h" +#include "TokenFactory.h" +#include "TokenSource.h" +#include "TokenStream.h" +#include "TokenStreamRewriter.h" +#include "UnbufferedCharStream.h" +#include "UnbufferedTokenStream.h" +#include "Vocabulary.h" +#include "Vocabulary.h" +#include "WritableToken.h" +#include "atn/ATN.h" +#include "atn/ATNConfig.h" +#include "atn/ATNConfigSet.h" +#include "atn/ATNDeserializationOptions.h" +#include "atn/ATNDeserializer.h" +#include "atn/ATNSerializer.h" +#include "atn/ATNSimulator.h" +#include "atn/ATNState.h" +#include "atn/ATNType.h" +#include "atn/AbstractPredicateTransition.h" +#include "atn/ActionTransition.h" +#include "atn/AmbiguityInfo.h" +#include "atn/ArrayPredictionContext.h" +#include "atn/AtomTransition.h" +#include "atn/BasicBlockStartState.h" +#include "atn/BasicState.h" +#include "atn/BlockEndState.h" +#include "atn/BlockStartState.h" +#include "atn/ContextSensitivityInfo.h" +#include "atn/DecisionEventInfo.h" +#include "atn/DecisionInfo.h" +#include "atn/DecisionState.h" +#include "atn/EmptyPredictionContext.h" +#include "atn/EpsilonTransition.h" +#include "atn/ErrorInfo.h" +#include "atn/LL1Analyzer.h" +#include "atn/LexerATNConfig.h" +#include "atn/LexerATNSimulator.h" +#include "atn/LexerAction.h" +#include "atn/LexerActionExecutor.h" +#include "atn/LexerActionType.h" +#include "atn/LexerChannelAction.h" +#include "atn/LexerCustomAction.h" +#include "atn/LexerIndexedCustomAction.h" +#include "atn/LexerModeAction.h" +#include "atn/LexerMoreAction.h" +#include "atn/LexerPopModeAction.h" +#include "atn/LexerPushModeAction.h" +#include "atn/LexerSkipAction.h" +#include "atn/LexerTypeAction.h" +#include "atn/LookaheadEventInfo.h" +#include "atn/LoopEndState.h" +#include "atn/NotSetTransition.h" +#include "atn/OrderedATNConfigSet.h" +#include "atn/ParseInfo.h" +#include "atn/ParserATNSimulator.h" +#include "atn/PlusBlockStartState.h" +#include "atn/PlusLoopbackState.h" +#include "atn/PrecedencePredicateTransition.h" +#include "atn/PredicateEvalInfo.h" +#include "atn/PredicateTransition.h" +#include "atn/PredictionContext.h" +#include "atn/PredictionMode.h" +#include "atn/ProfilingATNSimulator.h" +#include "atn/RangeTransition.h" +#include "atn/RuleStartState.h" +#include "atn/RuleStopState.h" +#include "atn/RuleTransition.h" +#include "atn/SemanticContext.h" +#include "atn/SetTransition.h" +#include "atn/SingletonPredictionContext.h" +#include "atn/StarBlockStartState.h" +#include "atn/StarLoopEntryState.h" +#include "atn/StarLoopbackState.h" +#include "atn/TokensStartState.h" +#include "atn/Transition.h" +#include "atn/WildcardTransition.h" +#include "dfa/DFA.h" +#include "dfa/DFASerializer.h" +#include "dfa/DFAState.h" +#include "dfa/LexerDFASerializer.h" +#include "misc/InterpreterDataReader.h" +#include "misc/Interval.h" +#include "misc/IntervalSet.h" +#include "misc/MurmurHash.h" +#include "misc/Predicate.h" +#include "support/Any.h" +#include "support/Arrays.h" +#include "support/BitSet.h" +#include "support/CPPUtils.h" +#include "support/StringUtils.h" +#include "support/guid.h" +#include "tree/AbstractParseTreeVisitor.h" +#include "tree/ErrorNode.h" +#include "tree/ErrorNodeImpl.h" +#include "tree/ParseTree.h" +#include "tree/ParseTreeListener.h" +#include "tree/ParseTreeProperty.h" +#include "tree/ParseTreeVisitor.h" +#include "tree/ParseTreeWalker.h" +#include "tree/TerminalNode.h" +#include "tree/TerminalNodeImpl.h" +#include "tree/Trees.h" +#include "tree/pattern/Chunk.h" +#include "tree/pattern/ParseTreeMatch.h" +#include "tree/pattern/ParseTreePattern.h" +#include "tree/pattern/ParseTreePatternMatcher.h" +#include "tree/pattern/RuleTagToken.h" +#include "tree/pattern/TagChunk.h" +#include "tree/pattern/TextChunk.h" +#include "tree/pattern/TokenTagToken.h" +#include "tree/xpath/XPath.h" +#include "tree/xpath/XPathElement.h" +#include "tree/xpath/XPathLexer.h" +#include "tree/xpath/XPathLexerErrorListener.h" +#include "tree/xpath/XPathRuleAnywhereElement.h" +#include "tree/xpath/XPathRuleElement.h" +#include "tree/xpath/XPathTokenAnywhereElement.h" +#include "tree/xpath/XPathTokenElement.h" +#include "tree/xpath/XPathWildcardAnywhereElement.h" +#include "tree/xpath/XPathWildcardElement.h" + + diff --git a/lib/antlr4/include/atn/ATN.h b/lib/antlr4/include/atn/ATN.h new file mode 100644 index 0000000..9c40cee --- /dev/null +++ b/lib/antlr4/include/atn/ATN.h @@ -0,0 +1,112 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RuleContext.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ATN { + public: + static const size_t INVALID_ALT_NUMBER = 0; + + /// Used for runtime deserialization of ATNs from strings. + ATN(); + ATN(ATN &&other); + ATN(ATNType grammarType, size_t maxTokenType); + virtual ~ATN(); + + std::vector states; + + /// Each subrule/rule is a decision point and we must track them so we + /// can go back later and build DFA predictors for them. This includes + /// all the rules, subrules, optional blocks, ()+, ()* etc... + std::vector decisionToState; + + /// Maps from rule index to starting state number. + std::vector ruleToStartState; + + /// Maps from rule index to stop state number. + std::vector ruleToStopState; + + /// The type of the ATN. + ATNType grammarType; + + /// The maximum value for any symbol recognized by a transition in the ATN. + size_t maxTokenType; + + /// + /// For lexer ATNs, this maps the rule index to the resulting token type. + /// For parser ATNs, this maps the rule index to the generated bypass token + /// type if the + /// + /// deserialization option was specified; otherwise, this is {@code null}. + /// + std::vector ruleToTokenType; + + /// For lexer ATNs, this is an array of {@link LexerAction} objects which may + /// be referenced by action transitions in the ATN. + std::vector> lexerActions; + + std::vector modeToStartState; + + ATN& operator = (ATN &other) NOEXCEPT; + ATN& operator = (ATN &&other) NOEXCEPT; + + /// + /// Compute the set of valid tokens that can occur starting in state {@code s}. + /// If {@code ctx} is null, the set of tokens will not include what can follow + /// the rule surrounding {@code s}. In other words, the set will be + /// restricted to tokens reachable staying within {@code s}'s rule. + /// + virtual misc::IntervalSet nextTokens(ATNState *s, RuleContext *ctx) const; + + /// + /// Compute the set of valid tokens that can occur starting in {@code s} and + /// staying in same rule. is in set if we reach end of + /// rule. + /// + virtual misc::IntervalSet const& nextTokens(ATNState *s) const; + + virtual void addState(ATNState *state); + + virtual void removeState(ATNState *state); + + virtual int defineDecisionState(DecisionState *s); + + virtual DecisionState *getDecisionState(size_t decision) const; + + virtual size_t getNumberOfDecisions() const; + + /// + /// Computes the set of input symbols which could follow ATN state number + /// {@code stateNumber} in the specified full {@code context}. This method + /// considers the complete parser context, but does not evaluate semantic + /// predicates (i.e. all predicates encountered during the calculation are + /// assumed true). If a path in the ATN exists from the starting state to the + /// of the outermost context without matching any + /// symbols, is added to the returned set. + ///

      + /// If {@code context} is {@code null}, it is treated as + /// . + ///

      + /// the ATN state number + /// the full parse context + /// The set of potentially valid input symbols which could follow the + /// specified state in the specified context. + /// if the ATN does not contain a state with + /// number {@code stateNumber} + virtual misc::IntervalSet getExpectedTokens(size_t stateNumber, RuleContext *context) const; + + std::string toString() const; + + private: + mutable std::mutex _mutex; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/ATNConfig.h b/lib/antlr4/include/atn/ATNConfig.h new file mode 100644 index 0000000..a78b5c0 --- /dev/null +++ b/lib/antlr4/include/atn/ATNConfig.h @@ -0,0 +1,148 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +namespace antlr4 { +namespace atn { + + /// + /// A tuple: (ATN state, predicted alt, syntactic, semantic context). + /// The syntactic context is a graph-structured stack node whose + /// path(s) to the root is the rule invocation(s) + /// chain used to arrive at the state. The semantic context is + /// the tree of semantic predicates encountered before reaching + /// an ATN state. + /// + class ANTLR4CPP_PUBLIC ATNConfig { + public: + struct Hasher + { + size_t operator()(ATNConfig const& k) const { + return k.hashCode(); + } + }; + + struct Comparer { + bool operator()(ATNConfig const& lhs, ATNConfig const& rhs) const { + return (&lhs == &rhs) || (lhs == rhs); + } + }; + + + using Set = std::unordered_set, Hasher, Comparer>; + + /// The ATN state associated with this configuration. + ATNState * state; + + /// What alt (or lexer rule) is predicted by this configuration. + const size_t alt; + + /// The stack of invoking states leading to the rule/states associated + /// with this config. We track only those contexts pushed during + /// execution of the ATN simulator. + /// + /// Can be shared between multiple ANTConfig instances. + Ref context; + + /** + * We cannot execute predicates dependent upon local context unless + * we know for sure we are in the correct context. Because there is + * no way to do this efficiently, we simply cannot evaluate + * dependent predicates unless we are in the rule that initially + * invokes the ATN simulator. + * + *

      + * closure() tracks the depth of how far we dip into the outer context: + * depth > 0. Note that it may not be totally accurate depth since I + * don't ever decrement. TODO: make it a boolean then

      + * + *

      + * For memory efficiency, the {@link #isPrecedenceFilterSuppressed} method + * is also backed by this field. Since the field is publicly accessible, the + * highest bit which would not cause the value to become negative is used to + * store this field. This choice minimizes the risk that code which only + * compares this value to 0 would be affected by the new purpose of the + * flag. It also ensures the performance of the existing {@link ATNConfig} + * constructors as well as certain operations like + * {@link ATNConfigSet#add(ATNConfig, DoubleKeyMap)} method are + * completely unaffected by the change.

      + */ + size_t reachesIntoOuterContext; + + /// Can be shared between multiple ATNConfig instances. + Ref semanticContext; + + ATNConfig(ATNState *state, size_t alt, Ref const& context); + ATNConfig(ATNState *state, size_t alt, Ref const& context, Ref const& semanticContext); + + ATNConfig(Ref const& c); // dup + ATNConfig(Ref const& c, ATNState *state); + ATNConfig(Ref const& c, ATNState *state, Ref const& semanticContext); + ATNConfig(Ref const& c, Ref const& semanticContext); + ATNConfig(Ref const& c, ATNState *state, Ref const& context); + ATNConfig(Ref const& c, ATNState *state, Ref const& context, Ref const& semanticContext); + + ATNConfig(ATNConfig const&) = default; + virtual ~ATNConfig(); + + virtual size_t hashCode() const; + + /** + * This method gets the value of the {@link #reachesIntoOuterContext} field + * as it existed prior to the introduction of the + * {@link #isPrecedenceFilterSuppressed} method. + */ + size_t getOuterContextDepth() const ; + bool isPrecedenceFilterSuppressed() const; + void setPrecedenceFilterSuppressed(bool value); + + /// An ATN configuration is equal to another if both have + /// the same state, they predict the same alternative, and + /// syntactic/semantic contexts are the same. + bool operator == (const ATNConfig &other) const; + bool operator != (const ATNConfig &other) const; + + virtual std::string toString(); + std::string toString(bool showAlt); + + private: + /** + * This field stores the bit mask for implementing the + * {@link #isPrecedenceFilterSuppressed} property as a bit within the + * existing {@link #reachesIntoOuterContext} field. + */ + static const size_t SUPPRESS_PRECEDENCE_FILTER; + }; + +} // namespace atn +} // namespace antlr4 + + +// Hash function for ATNConfig. + +namespace std { + using antlr4::atn::ATNConfig; + + template <> struct hash + { + size_t operator() (const ATNConfig &x) const + { + return x.hashCode(); + } + }; + + template <> struct hash>> + { + size_t operator() (const std::vector> &vector) const + { + std::size_t seed = 0; + for (auto &config : vector) { + seed ^= config->hashCode() + 0x9e3779b9 + (seed << 6) + (seed >> 2); + } + return seed; + } + }; +} diff --git a/lib/antlr4/include/atn/ATNConfigSet.h b/lib/antlr4/include/atn/ATNConfigSet.h new file mode 100644 index 0000000..850a07c --- /dev/null +++ b/lib/antlr4/include/atn/ATNConfigSet.h @@ -0,0 +1,110 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "support/BitSet.h" +#include "atn/PredictionContext.h" + +namespace antlr4 { +namespace atn { + + /// Specialized set that can track info about the set, with support for combining similar configurations using a + /// graph-structured stack. + class ANTLR4CPP_PUBLIC ATNConfigSet { + public: + /// Track the elements as they are added to the set; supports get(i) + std::vector> configs; + + // TODO: these fields make me pretty uncomfortable but nice to pack up info together, saves recomputation + // TODO: can we track conflicts as they are added to save scanning configs later? + size_t uniqueAlt; + + /** Currently this is only used when we detect SLL conflict; this does + * not necessarily represent the ambiguous alternatives. In fact, + * I should also point out that this seems to include predicated alternatives + * that have predicates that evaluate to false. Computed in computeTargetState(). + */ + antlrcpp::BitSet conflictingAlts; + + // Used in parser and lexer. In lexer, it indicates we hit a pred + // while computing a closure operation. Don't make a DFA state from this. + bool hasSemanticContext; + bool dipsIntoOuterContext; + + /// Indicates that this configuration set is part of a full context + /// LL prediction. It will be used to determine how to merge $. With SLL + /// it's a wildcard whereas it is not for LL context merge. + const bool fullCtx; + + ATNConfigSet(bool fullCtx = true); + ATNConfigSet(const Ref &old); + + virtual ~ATNConfigSet(); + + virtual bool add(const Ref &config); + + /// + /// Adding a new config means merging contexts with existing configs for + /// {@code (s, i, pi, _)}, where {@code s} is the + /// , {@code i} is the , and + /// {@code pi} is the . We use + /// {@code (s,i,pi)} as key. + ///

      + /// This method updates and + /// when necessary. + ///

      + virtual bool add(const Ref &config, PredictionContextMergeCache *mergeCache); + + virtual std::vector getStates(); + + /** + * Gets the complete set of represented alternatives for the configuration + * set. + * + * @return the set of represented alternatives in this configuration set + * + * @since 4.3 + */ + antlrcpp::BitSet getAlts(); + virtual std::vector> getPredicates(); + + virtual Ref get(size_t i) const; + + virtual void optimizeConfigs(ATNSimulator *interpreter); + + bool addAll(const Ref &other); + + bool operator == (const ATNConfigSet &other); + virtual size_t hashCode(); + virtual size_t size(); + virtual bool isEmpty(); + virtual void clear(); + virtual bool isReadonly(); + virtual void setReadonly(bool readonly); + virtual std::string toString(); + + protected: + /// Indicates that the set of configurations is read-only. Do not + /// allow any code to manipulate the set; DFA states will point at + /// the sets and they must not change. This does not protect the other + /// fields; in particular, conflictingAlts is set after + /// we've made this readonly. + bool _readonly; + + virtual size_t getHash(ATNConfig *c); // Hash differs depending on set type. + + private: + size_t _cachedHashCode; + + /// All configs but hashed by (s, i, _, pi) not including context. Wiped out + /// when we go readonly as this set becomes a DFA state. + std::unordered_map _configLookup; + + void InitializeInstanceFields(); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/ATNDeserializationOptions.h b/lib/antlr4/include/atn/ATNDeserializationOptions.h new file mode 100644 index 0000000..66aa37d --- /dev/null +++ b/lib/antlr4/include/atn/ATNDeserializationOptions.h @@ -0,0 +1,50 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ATNDeserializationOptions { + private: + static ATNDeserializationOptions defaultOptions; + + bool readOnly; + bool verifyATN; + bool generateRuleBypassTransitions; + + public: + ATNDeserializationOptions(); + ATNDeserializationOptions(ATNDeserializationOptions *options); + ATNDeserializationOptions(ATNDeserializationOptions const&) = default; + virtual ~ATNDeserializationOptions(); + ATNDeserializationOptions& operator=(ATNDeserializationOptions const&) = default; + + static const ATNDeserializationOptions& getDefaultOptions(); + + bool isReadOnly(); + + void makeReadOnly(); + + bool isVerifyATN(); + + void setVerifyATN(bool verify); + + bool isGenerateRuleBypassTransitions(); + + void setGenerateRuleBypassTransitions(bool generate); + + protected: + virtual void throwIfReadOnly(); + + private: + void InitializeInstanceFields(); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/ATNDeserializer.h b/lib/antlr4/include/atn/ATNDeserializer.h new file mode 100644 index 0000000..621e03d --- /dev/null +++ b/lib/antlr4/include/atn/ATNDeserializer.h @@ -0,0 +1,85 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/ATNDeserializationOptions.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ATNDeserializer { + public: + static const size_t SERIALIZED_VERSION; + + /// This is the current serialized UUID. + // ml: defined as function to avoid the “static initialization order fiasco”. + static Guid SERIALIZED_UUID(); + + ATNDeserializer(); + ATNDeserializer(const ATNDeserializationOptions& dso); + virtual ~ATNDeserializer(); + + static Guid toUUID(const unsigned short *data, size_t offset); + + virtual ATN deserialize(const std::vector &input); + virtual void verifyATN(const ATN &atn); + + static void checkCondition(bool condition); + static void checkCondition(bool condition, const std::string &message); + + static Transition *edgeFactory(const ATN &atn, size_t type, size_t src, size_t trg, size_t arg1, size_t arg2, + size_t arg3, const std::vector &sets); + + static ATNState *stateFactory(size_t type, size_t ruleIndex); + + protected: + /// Determines if a particular serialized representation of an ATN supports + /// a particular feature, identified by the used for serializing + /// the ATN at the time the feature was first introduced. + /// + /// The marking the first time the feature was + /// supported in the serialized ATN. + /// The of the actual serialized ATN which is + /// currently being deserialized. + /// {@code true} if the {@code actualUuid} value represents a + /// serialized ATN at or after the feature identified by {@code feature} was + /// introduced; otherwise, {@code false}. + virtual bool isFeatureSupported(const Guid &feature, const Guid &actualUuid); + void markPrecedenceDecisions(const ATN &atn); + Ref lexerActionFactory(LexerActionType type, int data1, int data2); + + private: + /// This is the earliest supported serialized UUID. + static Guid BASE_SERIALIZED_UUID(); + + /// This UUID indicates an extension of for the + /// addition of precedence predicates. + static Guid ADDED_PRECEDENCE_TRANSITIONS(); + + /** + * This UUID indicates an extension of ADDED_PRECEDENCE_TRANSITIONS + * for the addition of lexer actions encoded as a sequence of + * LexerAction instances. + */ + static Guid ADDED_LEXER_ACTIONS(); + + /** + * This UUID indicates the serialized ATN contains two sets of + * IntervalSets, where the second set's values are encoded as + * 32-bit integers to support the full Unicode SMP range up to U+10FFFF. + */ + static Guid ADDED_UNICODE_SMP(); + + /// This list contains all of the currently supported UUIDs, ordered by when + /// the feature first appeared in this branch. + static std::vector& SUPPORTED_UUIDS(); + + ATNDeserializationOptions deserializationOptions; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/ATNSerializer.h b/lib/antlr4/include/atn/ATNSerializer.h new file mode 100644 index 0000000..a6d1d69 --- /dev/null +++ b/lib/antlr4/include/atn/ATNSerializer.h @@ -0,0 +1,61 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ATNSerializer { + public: + ATN *atn; + + ATNSerializer(ATN *atn); + ATNSerializer(ATN *atn, const std::vector &tokenNames); + virtual ~ATNSerializer(); + + /// + /// Serialize state descriptors, edge descriptors, and decision->state map + /// into list of ints: + /// + /// grammar-type, (ANTLRParser.LEXER, ...) + /// max token type, + /// num states, + /// state-0-type ruleIndex, state-1-type ruleIndex, ... state-i-type + /// ruleIndex optional-arg ... + /// num rules, + /// rule-1-start-state rule-1-args, rule-2-start-state rule-2-args, ... + /// (args are token type,actionIndex in lexer else 0,0) + /// num modes, + /// mode-0-start-state, mode-1-start-state, ... (parser has 0 modes) + /// num sets + /// set-0-interval-count intervals, set-1-interval-count intervals, ... + /// num total edges, + /// src, trg, edge-type, edge arg1, optional edge arg2 (present always), + /// ... + /// num decisions, + /// decision-0-start-state, decision-1-start-state, ... + /// + /// Convenient to pack into unsigned shorts to make as Java string. + /// + virtual std::vector serialize(); + + virtual std::string decode(const std::wstring& data); + virtual std::string getTokenName(size_t t); + + /// Used by Java target to encode short/int array as chars in string. + static std::wstring getSerializedAsString(ATN *atn); + static std::vector getSerialized(ATN *atn); + + static std::string getDecoded(ATN *atn, std::vector &tokenNames); + + private: + std::vector _tokenNames; + + void serializeUUID(std::vector &data, Guid uuid); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/ATNSimulator.h b/lib/antlr4/include/atn/ATNSimulator.h new file mode 100644 index 0000000..f702c97 --- /dev/null +++ b/lib/antlr4/include/atn/ATNSimulator.h @@ -0,0 +1,87 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATN.h" +#include "misc/IntervalSet.h" +#include "support/CPPUtils.h" +#include "atn/PredictionContext.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ATNSimulator { + public: + /// Must distinguish between missing edge and edge we know leads nowhere. + static const Ref ERROR; + const ATN &atn; + + ATNSimulator(const ATN &atn, PredictionContextCache &sharedContextCache); + virtual ~ATNSimulator(); + + virtual void reset() = 0; + + /** + * Clear the DFA cache used by the current instance. Since the DFA cache may + * be shared by multiple ATN simulators, this method may affect the + * performance (but not accuracy) of other parsers which are being used + * concurrently. + * + * @throws UnsupportedOperationException if the current instance does not + * support clearing the DFA. + * + * @since 4.3 + */ + virtual void clearDFA(); + virtual PredictionContextCache& getSharedContextCache(); + virtual Ref getCachedContext(Ref const& context); + + /// @deprecated Use instead. + static ATN deserialize(const std::vector &data); + + /// @deprecated Use instead. + static void checkCondition(bool condition); + + /// @deprecated Use instead. + static void checkCondition(bool condition, const std::string &message); + + /// @deprecated Use instead. + static Transition *edgeFactory(const ATN &atn, int type, int src, int trg, int arg1, int arg2, int arg3, + const std::vector &sets); + + /// @deprecated Use instead. + static ATNState *stateFactory(int type, int ruleIndex); + + protected: + static antlrcpp::SingleWriteMultipleReadLock _stateLock; // Lock for DFA states. + static antlrcpp::SingleWriteMultipleReadLock _edgeLock; // Lock for the sparse edge map in DFA states. + + /// + /// The context cache maps all PredictionContext objects that are equals() + /// to a single cached copy. This cache is shared across all contexts + /// in all ATNConfigs in all DFA states. We rebuild each ATNConfigSet + /// to use only cached nodes/graphs in addDFAState(). We don't want to + /// fill this during closure() since there are lots of contexts that + /// pop up but are not used ever again. It also greatly slows down closure(). + ///

      + /// This cache makes a huge difference in memory and a little bit in speed. + /// For the Java grammar on java.*, it dropped the memory requirements + /// at the end from 25M to 16M. We don't store any of the full context + /// graphs in the DFA because they are limited to local context only, + /// but apparently there's a lot of repetition there as well. We optimize + /// the config contexts before storing the config set in the DFA states + /// by literally rebuilding them with cached subgraphs only. + ///

      + /// I tried a cache for use during closure operations, that was + /// whacked after each adaptivePredict(). It cost a little bit + /// more time I think and doesn't save on the overall footprint + /// so it's not worth the complexity. + ///

      + PredictionContextCache &_sharedContextCache; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/ATNState.h b/lib/antlr4/include/atn/ATNState.h new file mode 100644 index 0000000..6c73d94 --- /dev/null +++ b/lib/antlr4/include/atn/ATNState.h @@ -0,0 +1,133 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "misc/IntervalSet.h" + +namespace antlr4 { +namespace atn { + + /// + /// The following images show the relation of states and + /// for various grammar constructs. + /// + ///
        + /// + ///
      • Solid edges marked with an ε indicate a required + /// .
      • + /// + ///
      • Dashed edges indicate locations where any transition derived from + /// might appear.
      • + /// + ///
      • Dashed nodes are place holders for either a sequence of linked + /// states or the inclusion of a block representing a nested + /// construct in one of the forms below.
      • + /// + ///
      • Nodes showing multiple outgoing alternatives with a {@code ...} support + /// any number of alternatives (one or more). Nodes without the {@code ...} only + /// support the exact number of alternatives shown in the diagram.
      • + /// + ///
      + /// + ///

      Basic Blocks

      + /// + ///

      Rule

      + /// + /// + /// + ///

      Block of 1 or more alternatives

      + /// + /// + /// + ///

      Greedy Loops

      + /// + ///

      Greedy Closure: {@code (...)*}

      + /// + /// + /// + ///

      Greedy Positive Closure: {@code (...)+}

      + /// + /// + /// + ///

      Greedy Optional: {@code (...)?}

      + /// + /// + /// + ///

      Non-Greedy Loops

      + /// + ///

      Non-Greedy Closure: {@code (...)*?}

      + /// + /// + /// + ///

      Non-Greedy Positive Closure: {@code (...)+?}

      + /// + /// + /// + ///

      Non-Greedy Optional: {@code (...)??}

      + /// + /// + ///
      + class ANTLR4CPP_PUBLIC ATN; + + class ANTLR4CPP_PUBLIC ATNState { + public: + ATNState(); + ATNState(ATNState const&) = delete; + + virtual ~ATNState(); + + ATNState& operator=(ATNState const&) = delete; + + static const size_t INITIAL_NUM_TRANSITIONS = 4; + static const size_t INVALID_STATE_NUMBER = static_cast(-1); // std::numeric_limits::max(); + + enum { + ATN_INVALID_TYPE = 0, + BASIC = 1, + RULE_START = 2, + BLOCK_START = 3, + PLUS_BLOCK_START = 4, + STAR_BLOCK_START = 5, + TOKEN_START = 6, + RULE_STOP = 7, + BLOCK_END = 8, + STAR_LOOP_BACK = 9, + STAR_LOOP_ENTRY = 10, + PLUS_LOOP_BACK = 11, + LOOP_END = 12 + }; + + static const std::vector serializationNames; + + size_t stateNumber = INVALID_STATE_NUMBER; + size_t ruleIndex = 0; // at runtime, we don't have Rule objects + bool epsilonOnlyTransitions = false; + + public: + virtual size_t hashCode(); + bool operator == (const ATNState &other); + + /// Track the transitions emanating from this ATN state. + std::vector transitions; + + virtual bool isNonGreedyExitState(); + virtual std::string toString() const; + virtual void addTransition(Transition *e); + virtual void addTransition(size_t index, Transition *e); + virtual Transition* removeTransition(size_t index); + virtual size_t getStateType() = 0; + + private: + /// Used to cache lookahead during parsing, not used during construction. + + misc::IntervalSet _nextTokenWithinRule; + std::atomic _nextTokenUpdated { false }; + + friend class ATN; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/ATNType.h b/lib/antlr4/include/atn/ATNType.h new file mode 100644 index 0000000..19ed7a6 --- /dev/null +++ b/lib/antlr4/include/atn/ATNType.h @@ -0,0 +1,20 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + /// Represents the type of recognizer an ATN applies to. + enum class ATNType { + LEXER = 0, + PARSER = 1, + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/AbstractPredicateTransition.h b/lib/antlr4/include/atn/AbstractPredicateTransition.h new file mode 100644 index 0000000..4865cb1 --- /dev/null +++ b/lib/antlr4/include/atn/AbstractPredicateTransition.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTState; + + class ANTLR4CPP_PUBLIC AbstractPredicateTransition : public Transition { + + public: + AbstractPredicateTransition(ATNState *target); + ~AbstractPredicateTransition(); + + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/ActionTransition.h b/lib/antlr4/include/atn/ActionTransition.h new file mode 100644 index 0000000..652e75f --- /dev/null +++ b/lib/antlr4/include/atn/ActionTransition.h @@ -0,0 +1,33 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ActionTransition final : public Transition { + public: + const size_t ruleIndex; + const size_t actionIndex; + const bool isCtxDependent; // e.g., $i ref in action + + ActionTransition(ATNState *target, size_t ruleIndex); + + ActionTransition(ATNState *target, size_t ruleIndex, size_t actionIndex, bool isCtxDependent); + + virtual SerializationType getSerializationType() const override; + + virtual bool isEpsilon() const override; + + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/AmbiguityInfo.h b/lib/antlr4/include/atn/AmbiguityInfo.h new file mode 100644 index 0000000..db594a1 --- /dev/null +++ b/lib/antlr4/include/atn/AmbiguityInfo.h @@ -0,0 +1,68 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionEventInfo.h" +#include "support/BitSet.h" + +namespace antlr4 { +namespace atn { + + /// + /// This class represents profiling event information for an ambiguity. + /// Ambiguities are decisions where a particular input resulted in an SLL + /// conflict, followed by LL prediction also reaching a conflict state + /// (indicating a true ambiguity in the grammar). + /// + /// + /// This event may be reported during SLL prediction in cases where the + /// conflicting SLL configuration set provides sufficient information to + /// determine that the SLL conflict is truly an ambiguity. For example, if none + /// of the ATN configurations in the conflicting SLL configuration set have + /// traversed a global follow transition (i.e. + /// is 0 for all configurations), then + /// the result of SLL prediction for that input is known to be equivalent to the + /// result of LL prediction for that input. + /// + /// + /// In some cases, the minimum represented alternative in the conflicting LL + /// configuration set is not equal to the minimum represented alternative in the + /// conflicting SLL configuration set. Grammars and inputs which result in this + /// scenario are unable to use , which in turn means + /// they cannot use the two-stage parsing strategy to improve parsing performance + /// for that input. + /// + /// + /// + class ANTLR4CPP_PUBLIC AmbiguityInfo : public DecisionEventInfo { + public: + /// The set of alternative numbers for this decision event that lead to a valid parse. + antlrcpp::BitSet ambigAlts; + + /// + /// Constructs a new instance of the class with the + /// specified detailed ambiguity information. + /// + /// The decision number + /// The final configuration set identifying the ambiguous + /// alternatives for the current input + /// The set of alternatives in the decision that lead to a valid parse. + /// The predicted alt is the min(ambigAlts) + /// The input token stream + /// The start index for the current prediction + /// The index at which the ambiguity was identified during + /// prediction + /// {@code true} if the ambiguity was identified during LL + /// prediction; otherwise, {@code false} if the ambiguity was identified + /// during SLL prediction + AmbiguityInfo(size_t decision, ATNConfigSet *configs, const antlrcpp::BitSet &ambigAlts, TokenStream *input, + size_t startIndex, size_t stopIndex, bool fullCtx); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/ArrayPredictionContext.h b/lib/antlr4/include/atn/ArrayPredictionContext.h new file mode 100644 index 0000000..53a5b17 --- /dev/null +++ b/lib/antlr4/include/atn/ArrayPredictionContext.h @@ -0,0 +1,43 @@ + +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/PredictionContext.h" + +namespace antlr4 { +namespace atn { + + class SingletonPredictionContext; + + class ANTLR4CPP_PUBLIC ArrayPredictionContext : public PredictionContext { + public: + /// Parent can be empty only if full ctx mode and we make an array + /// from EMPTY and non-empty. We merge EMPTY by using null parent and + /// returnState == EMPTY_RETURN_STATE. + // Also here: we use a strong reference to our parents to avoid having them freed prematurely. + // See also SinglePredictionContext. + const std::vector> parents; + + /// Sorted for merge, no duplicates; if present, EMPTY_RETURN_STATE is always last. + const std::vector returnStates; + + ArrayPredictionContext(Ref const& a); + ArrayPredictionContext(std::vector> const& parents_, std::vector const& returnStates); + virtual ~ArrayPredictionContext(); + + virtual bool isEmpty() const override; + virtual size_t size() const override; + virtual Ref getParent(size_t index) const override; + virtual size_t getReturnState(size_t index) const override; + bool operator == (const PredictionContext &o) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 + diff --git a/lib/antlr4/include/atn/AtomTransition.h b/lib/antlr4/include/atn/AtomTransition.h new file mode 100644 index 0000000..cc22e5a --- /dev/null +++ b/lib/antlr4/include/atn/AtomTransition.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + /// TODO: make all transitions sets? no, should remove set edges. + class ANTLR4CPP_PUBLIC AtomTransition final : public Transition { + public: + /// The token type or character value; or, signifies special label. + const size_t _label; + + AtomTransition(ATNState *target, size_t label); + + virtual SerializationType getSerializationType() const override; + + virtual misc::IntervalSet label() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/BasicBlockStartState.h b/lib/antlr4/include/atn/BasicBlockStartState.h new file mode 100644 index 0000000..471fbc7 --- /dev/null +++ b/lib/antlr4/include/atn/BasicBlockStartState.h @@ -0,0 +1,22 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" +#include "atn/BlockStartState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC BasicBlockStartState final : public BlockStartState { + + public: + virtual size_t getStateType() override; + + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/BasicState.h b/lib/antlr4/include/atn/BasicState.h new file mode 100644 index 0000000..b650dc2 --- /dev/null +++ b/lib/antlr4/include/atn/BasicState.h @@ -0,0 +1,21 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC BasicState final : public ATNState { + + public: + virtual size_t getStateType() override; + + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/BlockEndState.h b/lib/antlr4/include/atn/BlockEndState.h new file mode 100644 index 0000000..b24bee1 --- /dev/null +++ b/lib/antlr4/include/atn/BlockEndState.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + /// Terminal node of a simple {@code (a|b|c)} block. + class ANTLR4CPP_PUBLIC BlockEndState final : public ATNState { + public: + BlockStartState *startState = nullptr; + + BlockEndState(); + + virtual size_t getStateType() override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/BlockStartState.h b/lib/antlr4/include/atn/BlockStartState.h new file mode 100644 index 0000000..725c700 --- /dev/null +++ b/lib/antlr4/include/atn/BlockStartState.h @@ -0,0 +1,21 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionState.h" + +namespace antlr4 { +namespace atn { + + /// The start of a regular {@code (...)} block. + class ANTLR4CPP_PUBLIC BlockStartState : public DecisionState { + public: + ~BlockStartState(); + BlockEndState *endState = nullptr; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/ContextSensitivityInfo.h b/lib/antlr4/include/atn/ContextSensitivityInfo.h new file mode 100644 index 0000000..430ce3b --- /dev/null +++ b/lib/antlr4/include/atn/ContextSensitivityInfo.h @@ -0,0 +1,47 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionEventInfo.h" + +namespace antlr4 { +namespace atn { + + /// + /// This class represents profiling event information for a context sensitivity. + /// Context sensitivities are decisions where a particular input resulted in an + /// SLL conflict, but LL prediction produced a single unique alternative. + /// + /// + /// In some cases, the unique alternative identified by LL prediction is not + /// equal to the minimum represented alternative in the conflicting SLL + /// configuration set. Grammars and inputs which result in this scenario are + /// unable to use , which in turn means they cannot use + /// the two-stage parsing strategy to improve parsing performance for that + /// input. + /// + /// + /// + class ANTLR4CPP_PUBLIC ContextSensitivityInfo : public DecisionEventInfo { + public: + /// + /// Constructs a new instance of the class + /// with the specified detailed context sensitivity information. + /// + /// The decision number + /// The final configuration set containing the unique + /// alternative identified by full-context prediction + /// The input token stream + /// The start index for the current prediction + /// The index at which the context sensitivity was + /// identified during full-context prediction + ContextSensitivityInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, size_t startIndex, size_t stopIndex); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/DecisionEventInfo.h b/lib/antlr4/include/atn/DecisionEventInfo.h new file mode 100644 index 0000000..af7f5f4 --- /dev/null +++ b/lib/antlr4/include/atn/DecisionEventInfo.h @@ -0,0 +1,70 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + /// + /// This is the base class for gathering detailed information about prediction + /// events which occur during parsing. + /// + /// Note that we could record the parser call stack at the time this event + /// occurred but in the presence of left recursive rules, the stack is kind of + /// meaningless. It's better to look at the individual configurations for their + /// individual stacks. Of course that is a object + /// not a parse tree node and so it does not have information about the extent + /// (start...stop) of the various subtrees. Examining the stack tops of all + /// configurations provide the return states for the rule invocations. + /// From there you can get the enclosing rule. + /// + /// @since 4.3 + /// + class ANTLR4CPP_PUBLIC DecisionEventInfo { + public: + /// + /// The invoked decision number which this event is related to. + /// + /// + const size_t decision; + + /// + /// The configuration set containing additional information relevant to the + /// prediction state when the current event occurred, or {@code null} if no + /// additional information is relevant or available. + /// + const ATNConfigSet *configs; + + /// + /// The input token stream which is being parsed. + /// + const TokenStream *input; + + /// + /// The token index in the input stream at which the current prediction was + /// originally invoked. + /// + const size_t startIndex; + + /// + /// The token index in the input stream at which the current event occurred. + /// + const size_t stopIndex; + + /// + /// {@code true} if the current event occurred during LL prediction; + /// otherwise, {@code false} if the input occurred during SLL prediction. + /// + const bool fullCtx; + + DecisionEventInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, size_t startIndex, + size_t stopIndex, bool fullCtx); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/DecisionInfo.h b/lib/antlr4/include/atn/DecisionInfo.h new file mode 100644 index 0000000..cfbb2e9 --- /dev/null +++ b/lib/antlr4/include/atn/DecisionInfo.h @@ -0,0 +1,227 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ContextSensitivityInfo.h" +#include "atn/AmbiguityInfo.h" +#include "atn/PredicateEvalInfo.h" +#include "atn/ErrorInfo.h" + +namespace antlr4 { +namespace atn { + + class LookaheadEventInfo; + + /// + /// This class contains profiling gathered for a particular decision. + /// + /// + /// Parsing performance in ANTLR 4 is heavily influenced by both static factors + /// (e.g. the form of the rules in the grammar) and dynamic factors (e.g. the + /// choice of input and the state of the DFA cache at the time profiling + /// operations are started). For best results, gather and use aggregate + /// statistics from a large sample of inputs representing the inputs expected in + /// production before using the results to make changes in the grammar. + /// + /// @since 4.3 + /// + class ANTLR4CPP_PUBLIC DecisionInfo { + public: + /// + /// The decision number, which is an index into . + /// + const size_t decision; + + /// + /// The total number of times was + /// invoked for this decision. + /// + long long invocations = 0; + + /// + /// The total time spent in for + /// this decision, in nanoseconds. + /// + /// + /// The value of this field contains the sum of differential results obtained + /// by , and is not adjusted to compensate for JIT + /// and/or garbage collection overhead. For best accuracy, use a modern JVM + /// implementation that provides precise results from + /// , and perform profiling in a separate process + /// which is warmed up by parsing the input prior to profiling. If desired, + /// call to reset the DFA cache to its initial + /// state before starting the profiling measurement pass. + /// + long long timeInPrediction = 0; + + /// + /// The sum of the lookahead required for SLL prediction for this decision. + /// Note that SLL prediction is used before LL prediction for performance + /// reasons even when or + /// is used. + /// + long long SLL_TotalLook = 0; + + /// + /// Gets the minimum lookahead required for any single SLL prediction to + /// complete for this decision, by reaching a unique prediction, reaching an + /// SLL conflict state, or encountering a syntax error. + /// + long long SLL_MinLook = 0; + + /// + /// Gets the maximum lookahead required for any single SLL prediction to + /// complete for this decision, by reaching a unique prediction, reaching an + /// SLL conflict state, or encountering a syntax error. + /// + long long SLL_MaxLook = 0; + + /// Gets the associated with the event where the + /// value was set. + Ref SLL_MaxLookEvent; + + /// + /// The sum of the lookahead required for LL prediction for this decision. + /// Note that LL prediction is only used when SLL prediction reaches a + /// conflict state. + /// + long long LL_TotalLook = 0; + + /// + /// Gets the minimum lookahead required for any single LL prediction to + /// complete for this decision. An LL prediction completes when the algorithm + /// reaches a unique prediction, a conflict state (for + /// , an ambiguity state (for + /// , or a syntax error. + /// + long long LL_MinLook = 0; + + /// + /// Gets the maximum lookahead required for any single LL prediction to + /// complete for this decision. An LL prediction completes when the algorithm + /// reaches a unique prediction, a conflict state (for + /// , an ambiguity state (for + /// , or a syntax error. + /// + long long LL_MaxLook = 0; + + /// + /// Gets the associated with the event where the + /// value was set. + /// + Ref LL_MaxLookEvent; + + /// + /// A collection of instances describing the + /// context sensitivities encountered during LL prediction for this decision. + /// + /// + std::vector contextSensitivities; + + /// + /// A collection of instances describing the parse errors + /// identified during calls to for + /// this decision. + /// + /// + std::vector errors; + + /// + /// A collection of instances describing the + /// ambiguities encountered during LL prediction for this decision. + /// + /// + std::vector ambiguities; + + /// + /// A collection of instances describing the + /// results of evaluating individual predicates during prediction for this + /// decision. + /// + /// + std::vector predicateEvals; + + /// + /// The total number of ATN transitions required during SLL prediction for + /// this decision. An ATN transition is determined by the number of times the + /// DFA does not contain an edge that is required for prediction, resulting + /// in on-the-fly computation of that edge. + /// + /// + /// If DFA caching of SLL transitions is employed by the implementation, ATN + /// computation may cache the computed edge for efficient lookup during + /// future parsing of this decision. Otherwise, the SLL parsing algorithm + /// will use ATN transitions exclusively. + /// + /// + /// + /// + long long SLL_ATNTransitions = 0; + + /// + /// The total number of DFA transitions required during SLL prediction for + /// this decision. + /// + /// If the ATN simulator implementation does not use DFA caching for SLL + /// transitions, this value will be 0. + /// + /// + /// + long long SLL_DFATransitions = 0; + + /// + /// Gets the total number of times SLL prediction completed in a conflict + /// state, resulting in fallback to LL prediction. + /// + /// Note that this value is not related to whether or not + /// may be used successfully with a particular + /// grammar. If the ambiguity resolution algorithm applied to the SLL + /// conflicts for this decision produce the same result as LL prediction for + /// this decision, would produce the same overall + /// parsing result as . + /// + long long LL_Fallback = 0; + + /// + /// The total number of ATN transitions required during LL prediction for + /// this decision. An ATN transition is determined by the number of times the + /// DFA does not contain an edge that is required for prediction, resulting + /// in on-the-fly computation of that edge. + /// + /// + /// If DFA caching of LL transitions is employed by the implementation, ATN + /// computation may cache the computed edge for efficient lookup during + /// future parsing of this decision. Otherwise, the LL parsing algorithm will + /// use ATN transitions exclusively. + /// + /// + /// + /// + long long LL_ATNTransitions = 0; + + /// + /// The total number of DFA transitions required during LL prediction for + /// this decision. + /// + /// If the ATN simulator implementation does not use DFA caching for LL + /// transitions, this value will be 0. + /// + /// + /// + long long LL_DFATransitions = 0; + + /// + /// Constructs a new instance of the class to contain + /// statistics for a particular decision. + /// + /// The decision number + DecisionInfo(size_t decision); + + std::string toString() const; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/DecisionState.h b/lib/antlr4/include/atn/DecisionState.h new file mode 100644 index 0000000..005de25 --- /dev/null +++ b/lib/antlr4/include/atn/DecisionState.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC DecisionState : public ATNState { + public: + int decision; + bool nonGreedy; + + private: + void InitializeInstanceFields(); + + public: + DecisionState() { + InitializeInstanceFields(); + } + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/EmptyPredictionContext.h b/lib/antlr4/include/atn/EmptyPredictionContext.h new file mode 100644 index 0000000..93c036c --- /dev/null +++ b/lib/antlr4/include/atn/EmptyPredictionContext.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/SingletonPredictionContext.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC EmptyPredictionContext : public SingletonPredictionContext { + public: + EmptyPredictionContext(); + + virtual bool isEmpty() const override; + virtual size_t size() const override; + virtual Ref getParent(size_t index) const override; + virtual size_t getReturnState(size_t index) const override; + virtual std::string toString() const override; + + virtual bool operator == (const PredictionContext &o) const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/EpsilonTransition.h b/lib/antlr4/include/atn/EpsilonTransition.h new file mode 100644 index 0000000..41fb0fb --- /dev/null +++ b/lib/antlr4/include/atn/EpsilonTransition.h @@ -0,0 +1,39 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC EpsilonTransition final : public Transition { + public: + EpsilonTransition(ATNState *target); + EpsilonTransition(ATNState *target, size_t outermostPrecedenceReturn); + + /** + * @return the rule index of a precedence rule for which this transition is + * returning from, where the precedence value is 0; otherwise, INVALID_INDEX. + * + * @see ATNConfig#isPrecedenceFilterSuppressed() + * @see ParserATNSimulator#applyPrecedenceFilter(ATNConfigSet) + * @since 4.4.1 + */ + size_t outermostPrecedenceReturn(); + virtual SerializationType getSerializationType() const override; + + virtual bool isEpsilon() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + + private: + const size_t _outermostPrecedenceReturn; // A rule index. + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/ErrorInfo.h b/lib/antlr4/include/atn/ErrorInfo.h new file mode 100644 index 0000000..d34642a --- /dev/null +++ b/lib/antlr4/include/atn/ErrorInfo.h @@ -0,0 +1,43 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionEventInfo.h" + +namespace antlr4 { +namespace atn { + + /// + /// This class represents profiling event information for a syntax error + /// identified during prediction. Syntax errors occur when the prediction + /// algorithm is unable to identify an alternative which would lead to a + /// successful parse. + /// + /// + /// + class ANTLR4CPP_PUBLIC ErrorInfo : public DecisionEventInfo { + public: + /// + /// Constructs a new instance of the class with the + /// specified detailed syntax error information. + /// + /// The decision number + /// The final configuration set reached during prediction + /// prior to reaching the state + /// The input token stream + /// The start index for the current prediction + /// The index at which the syntax error was identified + /// {@code true} if the syntax error was identified during LL + /// prediction; otherwise, {@code false} if the syntax error was identified + /// during SLL prediction + ErrorInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, size_t startIndex, size_t stopIndex, + bool fullCtx); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/LL1Analyzer.h b/lib/antlr4/include/atn/LL1Analyzer.h new file mode 100644 index 0000000..b945411 --- /dev/null +++ b/lib/antlr4/include/atn/LL1Analyzer.h @@ -0,0 +1,109 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Token.h" +#include "support/BitSet.h" +#include "atn/PredictionContext.h" +#include "atn/ATNConfig.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC LL1Analyzer { + public: + /// Special value added to the lookahead sets to indicate that we hit + /// a predicate during analysis if {@code seeThruPreds==false}. + static const size_t HIT_PRED = Token::INVALID_TYPE; + + const atn::ATN &_atn; + + LL1Analyzer(const atn::ATN &atn); + virtual ~LL1Analyzer(); + + /// + /// Calculates the SLL(1) expected lookahead set for each outgoing transition + /// of an . The returned array has one element for each + /// outgoing transition in {@code s}. If the closure from transition + /// i leads to a semantic predicate before matching a symbol, the + /// element at index i of the result will be {@code null}. + /// + /// the ATN state + /// the expected symbols for each outgoing transition of {@code s}. + virtual std::vector getDecisionLookahead(ATNState *s) const; + + /// + /// Compute set of tokens that can follow {@code s} in the ATN in the + /// specified {@code ctx}. + ///

      + /// If {@code ctx} is {@code null} and the end of the rule containing + /// {@code s} is reached, is added to the result set. + /// If {@code ctx} is not {@code null} and the end of the outermost rule is + /// reached, is added to the result set. + ///

      + /// the ATN state + /// the complete parser context, or {@code null} if the context + /// should be ignored + /// + /// The set of tokens that can follow {@code s} in the ATN in the + /// specified {@code ctx}. + virtual misc::IntervalSet LOOK(ATNState *s, RuleContext *ctx) const; + + /// + /// Compute set of tokens that can follow {@code s} in the ATN in the + /// specified {@code ctx}. + ///

      + /// If {@code ctx} is {@code null} and the end of the rule containing + /// {@code s} is reached, is added to the result set. + /// If {@code ctx} is not {@code null} and the end of the outermost rule is + /// reached, is added to the result set. + ///

      + /// the ATN state + /// the ATN state to stop at. This can be a + /// to detect epsilon paths through a closure. + /// the complete parser context, or {@code null} if the context + /// should be ignored + /// + /// The set of tokens that can follow {@code s} in the ATN in the + /// specified {@code ctx}. + virtual misc::IntervalSet LOOK(ATNState *s, ATNState *stopState, RuleContext *ctx) const; + + /// + /// Compute set of tokens that can follow {@code s} in the ATN in the + /// specified {@code ctx}. + ///

      + /// If {@code ctx} is {@code null} and {@code stopState} or the end of the + /// rule containing {@code s} is reached, is added to + /// the result set. If {@code ctx} is not {@code null} and {@code addEOF} is + /// {@code true} and {@code stopState} or the end of the outermost rule is + /// reached, is added to the result set. + ///

      + /// the ATN state. + /// the ATN state to stop at. This can be a + /// to detect epsilon paths through a closure. + /// The outer context, or {@code null} if the outer context should + /// not be used. + /// The result lookahead set. + /// A set used for preventing epsilon closures in the ATN + /// from causing a stack overflow. Outside code should pass + /// {@code new HashSet} for this argument. + /// A set used for preventing left recursion in the + /// ATN from causing a stack overflow. Outside code should pass + /// {@code new BitSet()} for this argument. + /// {@code true} to true semantic predicates as + /// implicitly {@code true} and "see through them", otherwise {@code false} + /// to treat semantic predicates as opaque and add to the + /// result if one is encountered. + /// Add to the result if the end of the + /// outermost context is reached. This parameter has no effect if {@code ctx} + /// is {@code null}. + protected: + virtual void _LOOK(ATNState *s, ATNState *stopState, Ref const& ctx, misc::IntervalSet &look, + ATNConfig::Set &lookBusy, antlrcpp::BitSet &calledRuleStack, bool seeThruPreds, bool addEOF) const; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/LexerATNConfig.h b/lib/antlr4/include/atn/LexerATNConfig.h new file mode 100644 index 0000000..e25d3d1 --- /dev/null +++ b/lib/antlr4/include/atn/LexerATNConfig.h @@ -0,0 +1,44 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNConfig.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC LexerATNConfig : public ATNConfig { + public: + LexerATNConfig(ATNState *state, int alt, Ref const& context); + LexerATNConfig(ATNState *state, int alt, Ref const& context, Ref const& lexerActionExecutor); + + LexerATNConfig(Ref const& c, ATNState *state); + LexerATNConfig(Ref const& c, ATNState *state, Ref const& lexerActionExecutor); + LexerATNConfig(Ref const& c, ATNState *state, Ref const& context); + + /** + * Gets the {@link LexerActionExecutor} capable of executing the embedded + * action(s) for the current configuration. + */ + Ref getLexerActionExecutor() const; + bool hasPassedThroughNonGreedyDecision(); + + virtual size_t hashCode() const override; + + bool operator == (const LexerATNConfig& other) const; + + private: + /** + * This is the backing field for {@link #getLexerActionExecutor}. + */ + const Ref _lexerActionExecutor; + const bool _passedThroughNonGreedyDecision; + + static bool checkNonGreedyDecision(Ref const& source, ATNState *target); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/LexerATNSimulator.h b/lib/antlr4/include/atn/LexerATNSimulator.h new file mode 100644 index 0000000..fa113f8 --- /dev/null +++ b/lib/antlr4/include/atn/LexerATNSimulator.h @@ -0,0 +1,210 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNSimulator.h" +#include "atn/LexerATNConfig.h" +#include "atn/ATNConfigSet.h" + +namespace antlr4 { +namespace atn { + + /// "dup" of ParserInterpreter + class ANTLR4CPP_PUBLIC LexerATNSimulator : public ATNSimulator { + protected: + class SimState { + public: + virtual ~SimState(); + + protected: + size_t index; + size_t line; + size_t charPos; + dfa::DFAState *dfaState; + virtual void reset(); + friend class LexerATNSimulator; + + private: + void InitializeInstanceFields(); + + public: + SimState() { + InitializeInstanceFields(); + } + }; + + + public: + static const size_t MIN_DFA_EDGE = 0; + static const size_t MAX_DFA_EDGE = 127; // forces unicode to stay in ATN + + protected: + /// + /// When we hit an accept state in either the DFA or the ATN, we + /// have to notify the character stream to start buffering characters + /// via and record the current state. The current sim state + /// includes the current index into the input, the current line, + /// and current character position in that line. Note that the Lexer is + /// tracking the starting line and characterization of the token. These + /// variables track the "state" of the simulator when it hits an accept state. + ///

      + /// We track these variables separately for the DFA and ATN simulation + /// because the DFA simulation often has to fail over to the ATN + /// simulation. If the ATN simulation fails, we need the DFA to fall + /// back to its previously accepted state, if any. If the ATN succeeds, + /// then the ATN does the accept and the DFA simulator that invoked it + /// can simply return the predicted token type. + ///

      + Lexer *const _recog; + + /// The current token's starting index into the character stream. + /// Shared across DFA to ATN simulation in case the ATN fails and the + /// DFA did not have a previous accept state. In this case, we use the + /// ATN-generated exception object. + size_t _startIndex; + + /// line number 1..n within the input. + size_t _line; + + /// The index of the character relative to the beginning of the line 0..n-1. + size_t _charPositionInLine; + + public: + std::vector &_decisionToDFA; + + protected: + size_t _mode; + + /// Used during DFA/ATN exec to record the most recent accept configuration info. + SimState _prevAccept; + + public: + static int match_calls; + + LexerATNSimulator(const ATN &atn, std::vector &decisionToDFA, PredictionContextCache &sharedContextCache); + LexerATNSimulator(Lexer *recog, const ATN &atn, std::vector &decisionToDFA, PredictionContextCache &sharedContextCache); + virtual ~LexerATNSimulator () {} + + virtual void copyState(LexerATNSimulator *simulator); + virtual size_t match(CharStream *input, size_t mode); + virtual void reset() override; + + virtual void clearDFA() override; + + protected: + virtual size_t matchATN(CharStream *input); + virtual size_t execATN(CharStream *input, dfa::DFAState *ds0); + + /// + /// Get an existing target state for an edge in the DFA. If the target state + /// for the edge has not yet been computed or is otherwise not available, + /// this method returns {@code null}. + /// + /// The current DFA state + /// The next input symbol + /// The existing target DFA state for the given input symbol + /// {@code t}, or {@code null} if the target state for this edge is not + /// already cached + virtual dfa::DFAState *getExistingTargetState(dfa::DFAState *s, size_t t); + + /// + /// Compute a target state for an edge in the DFA, and attempt to add the + /// computed state and corresponding edge to the DFA. + /// + /// The input stream + /// The current DFA state + /// The next input symbol + /// + /// The computed target DFA state for the given input symbol + /// {@code t}. If {@code t} does not lead to a valid DFA state, this method + /// returns . + virtual dfa::DFAState *computeTargetState(CharStream *input, dfa::DFAState *s, size_t t); + + virtual size_t failOrAccept(CharStream *input, ATNConfigSet *reach, size_t t); + + /// + /// Given a starting configuration set, figure out all ATN configurations + /// we can reach upon input {@code t}. Parameter {@code reach} is a return + /// parameter. + /// + void getReachableConfigSet(CharStream *input, ATNConfigSet *closure_, // closure_ as we have a closure() already + ATNConfigSet *reach, size_t t); + + virtual void accept(CharStream *input, const Ref &lexerActionExecutor, size_t startIndex, size_t index, + size_t line, size_t charPos); + + virtual ATNState *getReachableTarget(Transition *trans, size_t t); + + virtual std::unique_ptr computeStartState(CharStream *input, ATNState *p); + + /// + /// Since the alternatives within any lexer decision are ordered by + /// preference, this method stops pursuing the closure as soon as an accept + /// state is reached. After the first accept state is reached by depth-first + /// search from {@code config}, all other (potentially reachable) states for + /// this rule would have a lower priority. + /// + /// {@code true} if an accept state is reached, otherwise + /// {@code false}. + virtual bool closure(CharStream *input, const Ref &config, ATNConfigSet *configs, + bool currentAltReachedAcceptState, bool speculative, bool treatEofAsEpsilon); + + // side-effect: can alter configs.hasSemanticContext + virtual Ref getEpsilonTarget(CharStream *input, const Ref &config, Transition *t, + ATNConfigSet *configs, bool speculative, bool treatEofAsEpsilon); + + /// + /// Evaluate a predicate specified in the lexer. + ///

      + /// If {@code speculative} is {@code true}, this method was called before + /// for the matched character. This method should call + /// before evaluating the predicate to ensure position + /// sensitive values, including , , + /// and , properly reflect the current + /// lexer state. This method should restore {@code input} and the simulator + /// to the original state before returning (i.e. undo the actions made by the + /// call to . + ///

      + /// The input stream. + /// The rule containing the predicate. + /// The index of the predicate within the rule. + /// {@code true} if the current index in {@code input} is + /// one character before the predicate's location. + /// + /// {@code true} if the specified predicate evaluates to + /// {@code true}. + virtual bool evaluatePredicate(CharStream *input, size_t ruleIndex, size_t predIndex, bool speculative); + + virtual void captureSimState(CharStream *input, dfa::DFAState *dfaState); + virtual dfa::DFAState* addDFAEdge(dfa::DFAState *from, size_t t, ATNConfigSet *q); + virtual void addDFAEdge(dfa::DFAState *p, size_t t, dfa::DFAState *q); + + /// + /// Add a new DFA state if there isn't one with this set of + /// configurations already. This method also detects the first + /// configuration containing an ATN rule stop state. Later, when + /// traversing the DFA, we will know which rule to accept. + /// + virtual dfa::DFAState *addDFAState(ATNConfigSet *configs); + + public: + dfa::DFA& getDFA(size_t mode); + + /// Get the text matched so far for the current token. + virtual std::string getText(CharStream *input); + virtual size_t getLine() const; + virtual void setLine(size_t line); + virtual size_t getCharPositionInLine(); + virtual void setCharPositionInLine(size_t charPositionInLine); + virtual void consume(CharStream *input); + virtual std::string getTokenName(size_t t); + + private: + void InitializeInstanceFields(); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/LexerAction.h b/lib/antlr4/include/atn/LexerAction.h new file mode 100644 index 0000000..8e833b6 --- /dev/null +++ b/lib/antlr4/include/atn/LexerAction.h @@ -0,0 +1,66 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerActionType.h" +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + /// + /// Represents a single action which can be executed following the successful + /// match of a lexer rule. Lexer actions are used for both embedded action syntax + /// and ANTLR 4's new lexer command syntax. + /// + /// @author Sam Harwell + /// @since 4.2 + /// + class ANTLR4CPP_PUBLIC LexerAction { + public: + virtual ~LexerAction(); + + /// + /// Gets the serialization type of the lexer action. + /// + /// The serialization type of the lexer action. + virtual LexerActionType getActionType() const = 0; + + /// + /// Gets whether the lexer action is position-dependent. Position-dependent + /// actions may have different semantics depending on the + /// index at the time the action is executed. + /// + /// Many lexer commands, including {@code type}, {@code skip}, and + /// {@code more}, do not check the input index during their execution. + /// Actions like this are position-independent, and may be stored more + /// efficiently as part of the . + /// + /// {@code true} if the lexer action semantics can be affected by the + /// position of the input at the time it is executed; + /// otherwise, {@code false}. + virtual bool isPositionDependent() const = 0; + + /// + /// Execute the lexer action in the context of the specified . + /// + /// For position-dependent actions, the input stream must already be + /// positioned correctly prior to calling this method. + /// + /// The lexer instance. + virtual void execute(Lexer *lexer) = 0; + + virtual size_t hashCode() const = 0; + virtual bool operator == (const LexerAction &obj) const = 0; + virtual bool operator != (const LexerAction &obj) const { + return !(*this == obj); + } + + virtual std::string toString() const = 0; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/LexerActionExecutor.h b/lib/antlr4/include/atn/LexerActionExecutor.h new file mode 100644 index 0000000..488b54c --- /dev/null +++ b/lib/antlr4/include/atn/LexerActionExecutor.h @@ -0,0 +1,115 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "CharStream.h" +#include "atn/LexerAction.h" + +namespace antlr4 { +namespace atn { + + /// Represents an executor for a sequence of lexer actions which traversed during + /// the matching operation of a lexer rule (token). + /// + /// The executor tracks position information for position-dependent lexer actions + /// efficiently, ensuring that actions appearing only at the end of the rule do + /// not cause bloating of the created for the lexer. + class ANTLR4CPP_PUBLIC LexerActionExecutor : public std::enable_shared_from_this { + public: + /// + /// Constructs an executor for a sequence of actions. + /// The lexer actions to execute. + LexerActionExecutor(const std::vector> &lexerActions); + virtual ~LexerActionExecutor(); + + /// + /// Creates a which executes the actions for + /// the input {@code lexerActionExecutor} followed by a specified + /// {@code lexerAction}. + /// + /// The executor for actions already traversed by + /// the lexer while matching a token within a particular + /// . If this is {@code null}, the method behaves as + /// though it were an empty executor. + /// The lexer action to execute after the actions + /// specified in {@code lexerActionExecutor}. + /// + /// A for executing the combine actions + /// of {@code lexerActionExecutor} and {@code lexerAction}. + static Ref append(Ref const& lexerActionExecutor, + Ref const& lexerAction); + + /// + /// Creates a which encodes the current offset + /// for position-dependent lexer actions. + /// + /// Normally, when the executor encounters lexer actions where + /// returns {@code true}, it calls + /// on the input to set the input + /// position to the end of the current token. This behavior provides + /// for efficient DFA representation of lexer actions which appear at the end + /// of a lexer rule, even when the lexer rule matches a variable number of + /// characters. + /// + /// Prior to traversing a match transition in the ATN, the current offset + /// from the token start index is assigned to all position-dependent lexer + /// actions which have not already been assigned a fixed offset. By storing + /// the offsets relative to the token start index, the DFA representation of + /// lexer actions which appear in the middle of tokens remains efficient due + /// to sharing among tokens of the same length, regardless of their absolute + /// position in the input stream. + /// + /// If the current executor already has offsets assigned to all + /// position-dependent lexer actions, the method returns {@code this}. + /// + /// The current offset to assign to all position-dependent + /// lexer actions which do not already have offsets assigned. + /// + /// A which stores input stream offsets + /// for all position-dependent lexer actions. + virtual Ref fixOffsetBeforeMatch(int offset); + + /// + /// Gets the lexer actions to be executed by this executor. + /// The lexer actions to be executed by this executor. + virtual std::vector> getLexerActions() const; + + /// + /// Execute the actions encapsulated by this executor within the context of a + /// particular . + /// + /// This method calls to set the position of the + /// {@code input} prior to calling + /// on a position-dependent action. Before the + /// method returns, the input position will be restored to the same position + /// it was in when the method was invoked. + /// + /// The lexer instance. + /// The input stream which is the source for the current token. + /// When this method is called, the current for + /// {@code input} should be the start of the following token, i.e. 1 + /// character past the end of the current token. + /// The token start index. This value may be passed to + /// to set the {@code input} position to the beginning + /// of the token. + virtual void execute(Lexer *lexer, CharStream *input, size_t startIndex); + + virtual size_t hashCode() const; + virtual bool operator == (const LexerActionExecutor &obj) const; + virtual bool operator != (const LexerActionExecutor &obj) const; + + private: + const std::vector> _lexerActions; + + /// Caches the result of since the hash code is an element + /// of the performance-critical operation. + const size_t _hashCode; + + size_t generateHashCode() const; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/LexerActionType.h b/lib/antlr4/include/atn/LexerActionType.h new file mode 100644 index 0000000..a72f15c --- /dev/null +++ b/lib/antlr4/include/atn/LexerActionType.h @@ -0,0 +1,55 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + /// + /// Represents the serialization type of a . + /// + /// @author Sam Harwell + /// @since 4.2 + /// + enum class LexerActionType : size_t { + /// + /// The type of a action. + /// + CHANNEL, + /// + /// The type of a action. + /// + CUSTOM, + /// + /// The type of a action. + /// + MODE, + /// + /// The type of a action. + /// + MORE, + /// + /// The type of a action. + /// + POP_MODE, + /// + /// The type of a action. + /// + PUSH_MODE, + /// + /// The type of a action. + /// + SKIP, + /// + /// The type of a action. + /// + TYPE, + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/LexerChannelAction.h b/lib/antlr4/include/atn/LexerChannelAction.h new file mode 100644 index 0000000..73e3a26 --- /dev/null +++ b/lib/antlr4/include/atn/LexerChannelAction.h @@ -0,0 +1,63 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + using antlr4::Lexer; + + /// + /// Implements the {@code channel} lexer action by calling + /// with the assigned channel. + /// + /// @author Sam Harwell + /// @since 4.2 + /// + class ANTLR4CPP_PUBLIC LexerChannelAction final : public LexerAction { + public: + /// + /// Constructs a new {@code channel} action with the specified channel value. + /// The channel value to pass to . + LexerChannelAction(int channel); + + /// + /// Gets the channel to use for the created by the lexer. + /// + /// The channel to use for the created by the lexer. + int getChannel() const; + + /// + /// {@inheritDoc} + /// This method returns . + virtual LexerActionType getActionType() const override; + + /// + /// {@inheritDoc} + /// This method returns {@code false}. + virtual bool isPositionDependent() const override; + + /// + /// {@inheritDoc} + /// + /// This action is implemented by calling with the + /// value provided by . + /// + virtual void execute(Lexer *lexer) override; + + virtual size_t hashCode() const override; + virtual bool operator == (const LexerAction &obj) const override; + virtual std::string toString() const override; + + private: + const int _channel; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/LexerCustomAction.h b/lib/antlr4/include/atn/LexerCustomAction.h new file mode 100644 index 0000000..bd1c5d3 --- /dev/null +++ b/lib/antlr4/include/atn/LexerCustomAction.h @@ -0,0 +1,87 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// + /// Executes a custom lexer action by calling with the + /// rule and action indexes assigned to the custom action. The implementation of + /// a custom action is added to the generated code for the lexer in an override + /// of when the grammar is compiled. + /// + /// This class may represent embedded actions created with the {...} + /// syntax in ANTLR 4, as well as actions created for lexer commands where the + /// command argument could not be evaluated when the grammar was compiled. + /// + /// @author Sam Harwell + /// @since 4.2 + /// + class ANTLR4CPP_PUBLIC LexerCustomAction final : public LexerAction { + public: + /// + /// Constructs a custom lexer action with the specified rule and action + /// indexes. + /// + /// The rule index to use for calls to + /// . + /// The action index to use for calls to + /// . + LexerCustomAction(size_t ruleIndex, size_t actionIndex); + + /// + /// Gets the rule index to use for calls to . + /// + /// The rule index for the custom action. + size_t getRuleIndex() const; + + /// + /// Gets the action index to use for calls to . + /// + /// The action index for the custom action. + size_t getActionIndex() const; + + /// + /// {@inheritDoc} + /// + /// This method returns . + virtual LexerActionType getActionType() const override; + + /// + /// Gets whether the lexer action is position-dependent. Position-dependent + /// actions may have different semantics depending on the + /// index at the time the action is executed. + /// + /// Custom actions are position-dependent since they may represent a + /// user-defined embedded action which makes calls to methods like + /// . + /// + /// This method returns {@code true}. + virtual bool isPositionDependent() const override; + + /// + /// {@inheritDoc} + /// + /// Custom actions are implemented by calling with the + /// appropriate rule and action indexes. + /// + virtual void execute(Lexer *lexer) override; + + virtual size_t hashCode() const override; + virtual bool operator == (const LexerAction &obj) const override; + virtual std::string toString() const override; + + private: + const size_t _ruleIndex; + const size_t _actionIndex; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/LexerIndexedCustomAction.h b/lib/antlr4/include/atn/LexerIndexedCustomAction.h new file mode 100644 index 0000000..bb371f8 --- /dev/null +++ b/lib/antlr4/include/atn/LexerIndexedCustomAction.h @@ -0,0 +1,82 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RuleContext.h" +#include "atn/LexerAction.h" + +namespace antlr4 { +namespace atn { + + /// + /// This implementation of is used for tracking input offsets + /// for position-dependent actions within a . + /// + /// This action is not serialized as part of the ATN, and is only required for + /// position-dependent lexer actions which appear at a location other than the + /// end of a rule. For more information about DFA optimizations employed for + /// lexer actions, see and + /// . + /// + /// @author Sam Harwell + /// @since 4.2 + /// + class ANTLR4CPP_PUBLIC LexerIndexedCustomAction final : public LexerAction { + public: + /// + /// Constructs a new indexed custom action by associating a character offset + /// with a . + /// + /// Note: This class is only required for lexer actions for which + /// returns {@code true}. + /// + /// The offset into the input , relative to + /// the token start index, at which the specified lexer action should be + /// executed. + /// The lexer action to execute at a particular offset in the + /// input . + LexerIndexedCustomAction(int offset, Ref const& action); + + /// + /// Gets the location in the input at which the lexer + /// action should be executed. The value is interpreted as an offset relative + /// to the token start index. + /// + /// The location in the input at which the lexer + /// action should be executed. + int getOffset() const; + + /// + /// Gets the lexer action to execute. + /// + /// A object which executes the lexer action. + Ref getAction() const; + + /// + /// {@inheritDoc} + /// + /// This method returns the result of calling + /// on the returned by . + virtual LexerActionType getActionType() const override; + + /// + /// {@inheritDoc} + /// This method returns {@code true}. + virtual bool isPositionDependent() const override; + + virtual void execute(Lexer *lexer) override; + virtual size_t hashCode() const override; + virtual bool operator == (const LexerAction &obj) const override; + virtual std::string toString() const override; + + private: + const int _offset; + const Ref _action; + }; + +} // namespace atn +} // namespace antlr4 + diff --git a/lib/antlr4/include/atn/LexerModeAction.h b/lib/antlr4/include/atn/LexerModeAction.h new file mode 100644 index 0000000..49a858b --- /dev/null +++ b/lib/antlr4/include/atn/LexerModeAction.h @@ -0,0 +1,61 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// + /// Implements the {@code mode} lexer action by calling with + /// the assigned mode. + /// + /// @author Sam Harwell + /// @since 4.2 + /// + class ANTLR4CPP_PUBLIC LexerModeAction final : public LexerAction { + public: + /// + /// Constructs a new {@code mode} action with the specified mode value. + /// The mode value to pass to . + LexerModeAction(int mode); + + /// + /// Get the lexer mode this action should transition the lexer to. + /// + /// The lexer mode for this {@code mode} command. + int getMode(); + + /// + /// {@inheritDoc} + /// This method returns . + virtual LexerActionType getActionType() const override; + + /// + /// {@inheritDoc} + /// This method returns {@code false}. + virtual bool isPositionDependent() const override; + + /// + /// {@inheritDoc} + /// + /// This action is implemented by calling with the + /// value provided by . + /// + virtual void execute(Lexer *lexer) override; + + virtual size_t hashCode() const override; + virtual bool operator == (const LexerAction &obj) const override; + virtual std::string toString() const override; + + private: + const int _mode; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/LexerMoreAction.h b/lib/antlr4/include/atn/LexerMoreAction.h new file mode 100644 index 0000000..ee3b2aa --- /dev/null +++ b/lib/antlr4/include/atn/LexerMoreAction.h @@ -0,0 +1,57 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// + /// Implements the {@code more} lexer action by calling . + /// + /// The {@code more} command does not have any parameters, so this action is + /// implemented as a singleton instance exposed by . + /// + /// @author Sam Harwell + /// @since 4.2 + /// + class ANTLR4CPP_PUBLIC LexerMoreAction final : public LexerAction { + public: + /// + /// Provides a singleton instance of this parameterless lexer action. + /// + static const Ref getInstance(); + + /// + /// {@inheritDoc} + /// This method returns . + virtual LexerActionType getActionType() const override; + + /// + /// {@inheritDoc} + /// This method returns {@code false}. + virtual bool isPositionDependent() const override; + + /// + /// {@inheritDoc} + /// + /// This action is implemented by calling . + /// + virtual void execute(Lexer *lexer) override; + + virtual size_t hashCode() const override; + virtual bool operator == (const LexerAction &obj) const override; + virtual std::string toString() const override; + + private: + /// Constructs the singleton instance of the lexer {@code more} command. + LexerMoreAction(); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/LexerPopModeAction.h b/lib/antlr4/include/atn/LexerPopModeAction.h new file mode 100644 index 0000000..497305c --- /dev/null +++ b/lib/antlr4/include/atn/LexerPopModeAction.h @@ -0,0 +1,57 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// + /// Implements the {@code popMode} lexer action by calling . + /// + /// The {@code popMode} command does not have any parameters, so this action is + /// implemented as a singleton instance exposed by . + /// + /// @author Sam Harwell + /// @since 4.2 + /// + class ANTLR4CPP_PUBLIC LexerPopModeAction final : public LexerAction { + public: + /// + /// Provides a singleton instance of this parameterless lexer action. + /// + static const Ref getInstance(); + + /// + /// {@inheritDoc} + /// This method returns . + virtual LexerActionType getActionType() const override; + + /// + /// {@inheritDoc} + /// This method returns {@code false}. + virtual bool isPositionDependent() const override; + + /// + /// {@inheritDoc} + /// + /// This action is implemented by calling . + /// + virtual void execute(Lexer *lexer) override; + + virtual size_t hashCode() const override; + virtual bool operator == (const LexerAction &obj) const override; + virtual std::string toString() const override; + + private: + /// Constructs the singleton instance of the lexer {@code popMode} command. + LexerPopModeAction(); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/LexerPushModeAction.h b/lib/antlr4/include/atn/LexerPushModeAction.h new file mode 100644 index 0000000..43cb888 --- /dev/null +++ b/lib/antlr4/include/atn/LexerPushModeAction.h @@ -0,0 +1,61 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// + /// Implements the {@code pushMode} lexer action by calling + /// with the assigned mode. + /// + /// @author Sam Harwell + /// @since 4.2 + /// + class ANTLR4CPP_PUBLIC LexerPushModeAction final : public LexerAction { + public: + /// + /// Constructs a new {@code pushMode} action with the specified mode value. + /// The mode value to pass to . + LexerPushModeAction(int mode); + + /// + /// Get the lexer mode this action should transition the lexer to. + /// + /// The lexer mode for this {@code pushMode} command. + int getMode() const; + + /// + /// {@inheritDoc} + /// This method returns . + virtual LexerActionType getActionType() const override; + + /// + /// {@inheritDoc} + /// This method returns {@code false}. + virtual bool isPositionDependent() const override; + + /// + /// {@inheritDoc} + /// + /// This action is implemented by calling with the + /// value provided by . + /// + virtual void execute(Lexer *lexer) override; + + virtual size_t hashCode() const override; + virtual bool operator == (const LexerAction &obj) const override; + virtual std::string toString() const override; + + private: + const int _mode; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/LexerSkipAction.h b/lib/antlr4/include/atn/LexerSkipAction.h new file mode 100644 index 0000000..5bd2e1c --- /dev/null +++ b/lib/antlr4/include/atn/LexerSkipAction.h @@ -0,0 +1,55 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// + /// Implements the {@code skip} lexer action by calling . + /// + /// The {@code skip} command does not have any parameters, so this action is + /// implemented as a singleton instance exposed by . + /// + /// @author Sam Harwell + /// @since 4.2 + /// + class ANTLR4CPP_PUBLIC LexerSkipAction final : public LexerAction { + public: + /// Provides a singleton instance of this parameterless lexer action. + static const Ref getInstance(); + + /// + /// {@inheritDoc} + /// This method returns . + virtual LexerActionType getActionType() const override; + + /// + /// {@inheritDoc} + /// This method returns {@code false}. + virtual bool isPositionDependent() const override; + + /// + /// {@inheritDoc} + /// + /// This action is implemented by calling . + /// + virtual void execute(Lexer *lexer) override; + + virtual size_t hashCode() const override; + virtual bool operator == (const LexerAction &obj) const override; + virtual std::string toString() const override; + + private: + /// Constructs the singleton instance of the lexer {@code skip} command. + LexerSkipAction(); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/LexerTypeAction.h b/lib/antlr4/include/atn/LexerTypeAction.h new file mode 100644 index 0000000..1c4a8a1 --- /dev/null +++ b/lib/antlr4/include/atn/LexerTypeAction.h @@ -0,0 +1,55 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerActionType.h" +#include "atn/LexerAction.h" + +namespace antlr4 { +namespace atn { + + /// Implements the {@code type} lexer action by calling + /// with the assigned type. + class ANTLR4CPP_PUBLIC LexerTypeAction : public LexerAction { + public: + /// + /// Constructs a new {@code type} action with the specified token type value. + /// The type to assign to the token using . + LexerTypeAction(int type); + + /// + /// Gets the type to assign to a token created by the lexer. + /// The type to assign to a token created by the lexer. + virtual int getType() const; + + /// + /// {@inheritDoc} + /// This method returns . + virtual LexerActionType getActionType() const override; + + /// + /// {@inheritDoc} + /// This method returns {@code false}. + virtual bool isPositionDependent() const override; + + /// + /// {@inheritDoc} + /// + /// This action is implemented by calling with the + /// value provided by . + /// + virtual void execute(Lexer *lexer) override; + + virtual size_t hashCode() const override; + virtual bool operator == (const LexerAction &obj) const override; + virtual std::string toString() const override; + + private: + const int _type; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/LookaheadEventInfo.h b/lib/antlr4/include/atn/LookaheadEventInfo.h new file mode 100644 index 0000000..f5fc24f --- /dev/null +++ b/lib/antlr4/include/atn/LookaheadEventInfo.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionEventInfo.h" + +namespace antlr4 { +namespace atn { + + /// This class represents profiling event information for tracking the lookahead + /// depth required in order to make a prediction. + class ANTLR4CPP_PUBLIC LookaheadEventInfo : public DecisionEventInfo { + public: + /// The alternative chosen by adaptivePredict(), not necessarily + /// the outermost alt shown for a rule; left-recursive rules have + /// user-level alts that differ from the rewritten rule with a (...) block + /// and a (..)* loop. + size_t predictedAlt = 0; + + /// + /// Constructs a new instance of the class with + /// the specified detailed lookahead information. + /// + /// The decision number + /// The final configuration set containing the necessary + /// information to determine the result of a prediction, or {@code null} if + /// the final configuration set is not available + /// The input token stream + /// The start index for the current prediction + /// The index at which the prediction was finally made + /// {@code true} if the current lookahead is part of an LL + /// prediction; otherwise, {@code false} if the current lookahead is part of + /// an SLL prediction + LookaheadEventInfo(size_t decision, ATNConfigSet *configs, size_t predictedAlt, TokenStream *input, size_t startIndex, + size_t stopIndex, bool fullCtx); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/LoopEndState.h b/lib/antlr4/include/atn/LoopEndState.h new file mode 100644 index 0000000..c90efa3 --- /dev/null +++ b/lib/antlr4/include/atn/LoopEndState.h @@ -0,0 +1,22 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + /// Mark the end of a * or + loop. + class ANTLR4CPP_PUBLIC LoopEndState final : public ATNState { + public: + ATNState *loopBackState = nullptr; + + virtual size_t getStateType() override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/NotSetTransition.h b/lib/antlr4/include/atn/NotSetTransition.h new file mode 100644 index 0000000..214fb06 --- /dev/null +++ b/lib/antlr4/include/atn/NotSetTransition.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/SetTransition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC NotSetTransition final : public SetTransition { + public: + NotSetTransition(ATNState *target, const misc::IntervalSet &set); + + virtual SerializationType getSerializationType() const override; + + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/OrderedATNConfigSet.h b/lib/antlr4/include/atn/OrderedATNConfigSet.h new file mode 100644 index 0000000..4ce43bb --- /dev/null +++ b/lib/antlr4/include/atn/OrderedATNConfigSet.h @@ -0,0 +1,20 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNConfigSet.h" +#include "atn/ATNConfig.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC OrderedATNConfigSet : public ATNConfigSet { + protected: + virtual size_t getHash(ATNConfig *c) override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/ParseInfo.h b/lib/antlr4/include/atn/ParseInfo.h new file mode 100644 index 0000000..7ced7de --- /dev/null +++ b/lib/antlr4/include/atn/ParseInfo.h @@ -0,0 +1,102 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionInfo.h" + +namespace antlr4 { +namespace atn { + + class ProfilingATNSimulator; + + /// This class provides access to specific and aggregate statistics gathered + /// during profiling of a parser. + class ANTLR4CPP_PUBLIC ParseInfo { + public: + ParseInfo(ProfilingATNSimulator *atnSimulator); + ParseInfo(ParseInfo const&) = default; + virtual ~ParseInfo(); + + ParseInfo& operator=(ParseInfo const&) = default; + + /// + /// Gets an array of instances containing the profiling + /// information gathered for each decision in the ATN. + /// + /// An array of instances, indexed by decision + /// number. + virtual std::vector getDecisionInfo(); + + /// + /// Gets the decision numbers for decisions that required one or more + /// full-context predictions during parsing. These are decisions for which + /// is non-zero. + /// + /// A list of decision numbers which required one or more + /// full-context predictions during parsing. + virtual std::vector getLLDecisions(); + + /// + /// Gets the total time spent during prediction across all decisions made + /// during parsing. This value is the sum of + /// for all decisions. + /// + virtual long long getTotalTimeInPrediction(); + + /// + /// Gets the total number of SLL lookahead operations across all decisions + /// made during parsing. This value is the sum of + /// for all decisions. + /// + virtual long long getTotalSLLLookaheadOps(); + + /// + /// Gets the total number of LL lookahead operations across all decisions + /// made during parsing. This value is the sum of + /// for all decisions. + /// + virtual long long getTotalLLLookaheadOps(); + + /// + /// Gets the total number of ATN lookahead operations for SLL prediction + /// across all decisions made during parsing. + /// + virtual long long getTotalSLLATNLookaheadOps(); + + /// + /// Gets the total number of ATN lookahead operations for LL prediction + /// across all decisions made during parsing. + /// + virtual long long getTotalLLATNLookaheadOps(); + + /// + /// Gets the total number of ATN lookahead operations for SLL and LL + /// prediction across all decisions made during parsing. + /// + /// + /// This value is the sum of and + /// . + /// + virtual long long getTotalATNLookaheadOps(); + + /// + /// Gets the total number of DFA states stored in the DFA cache for all + /// decisions in the ATN. + /// + virtual size_t getDFASize(); + + /// + /// Gets the total number of DFA states stored in the DFA cache for a + /// particular decision. + /// + virtual size_t getDFASize(size_t decision); + + protected: + const ProfilingATNSimulator *_atnSimulator; // non-owning, we are created by this simulator. + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/ParserATNSimulator.h b/lib/antlr4/include/atn/ParserATNSimulator.h new file mode 100644 index 0000000..6520a44 --- /dev/null +++ b/lib/antlr4/include/atn/ParserATNSimulator.h @@ -0,0 +1,904 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "PredictionMode.h" +#include "dfa/DFAState.h" +#include "atn/ATNSimulator.h" +#include "atn/PredictionContext.h" +#include "SemanticContext.h" +#include "atn/ATNConfig.h" + +namespace antlr4 { +namespace atn { + + /** + * The embodiment of the adaptive LL(*), ALL(*), parsing strategy. + * + *

      + * The basic complexity of the adaptive strategy makes it harder to understand. + * We begin with ATN simulation to build paths in a DFA. Subsequent prediction + * requests go through the DFA first. If they reach a state without an edge for + * the current symbol, the algorithm fails over to the ATN simulation to + * complete the DFA path for the current input (until it finds a conflict state + * or uniquely predicting state).

      + * + *

      + * All of that is done without using the outer context because we want to create + * a DFA that is not dependent upon the rule invocation stack when we do a + * prediction. One DFA works in all contexts. We avoid using context not + * necessarily because it's slower, although it can be, but because of the DFA + * caching problem. The closure routine only considers the rule invocation stack + * created during prediction beginning in the decision rule. For example, if + * prediction occurs without invoking another rule's ATN, there are no context + * stacks in the configurations. When lack of context leads to a conflict, we + * don't know if it's an ambiguity or a weakness in the strong LL(*) parsing + * strategy (versus full LL(*)).

      + * + *

      + * When SLL yields a configuration set with conflict, we rewind the input and + * retry the ATN simulation, this time using full outer context without adding + * to the DFA. Configuration context stacks will be the full invocation stacks + * from the start rule. If we get a conflict using full context, then we can + * definitively say we have a true ambiguity for that input sequence. If we + * don't get a conflict, it implies that the decision is sensitive to the outer + * context. (It is not context-sensitive in the sense of context-sensitive + * grammars.)

      + * + *

      + * The next time we reach this DFA state with an SLL conflict, through DFA + * simulation, we will again retry the ATN simulation using full context mode. + * This is slow because we can't save the results and have to "interpret" the + * ATN each time we get that input.

      + * + *

      + * CACHING FULL CONTEXT PREDICTIONS

      + * + *

      + * We could cache results from full context to predicted alternative easily and + * that saves a lot of time but doesn't work in presence of predicates. The set + * of visible predicates from the ATN start state changes depending on the + * context, because closure can fall off the end of a rule. I tried to cache + * tuples (stack context, semantic context, predicted alt) but it was slower + * than interpreting and much more complicated. Also required a huge amount of + * memory. The goal is not to create the world's fastest parser anyway. I'd like + * to keep this algorithm simple. By launching multiple threads, we can improve + * the speed of parsing across a large number of files.

      + * + *

      + * There is no strict ordering between the amount of input used by SLL vs LL, + * which makes it really hard to build a cache for full context. Let's say that + * we have input A B C that leads to an SLL conflict with full context X. That + * implies that using X we might only use A B but we could also use A B C D to + * resolve conflict. Input A B C D could predict alternative 1 in one position + * in the input and A B C E could predict alternative 2 in another position in + * input. The conflicting SLL configurations could still be non-unique in the + * full context prediction, which would lead us to requiring more input than the + * original A B C. To make a prediction cache work, we have to track the exact + * input used during the previous prediction. That amounts to a cache that maps + * X to a specific DFA for that context.

      + * + *

      + * Something should be done for left-recursive expression predictions. They are + * likely LL(1) + pred eval. Easier to do the whole SLL unless error and retry + * with full LL thing Sam does.

      + * + *

      + * AVOIDING FULL CONTEXT PREDICTION

      + * + *

      + * We avoid doing full context retry when the outer context is empty, we did not + * dip into the outer context by falling off the end of the decision state rule, + * or when we force SLL mode.

      + * + *

      + * As an example of the not dip into outer context case, consider as super + * constructor calls versus function calls. One grammar might look like + * this:

      + * + *
      +   * ctorBody
      +   *   : '{' superCall? stat* '}'
      +   *   ;
      +   * 
      + * + *

      + * Or, you might see something like

      + * + *
      +   * stat
      +   *   : superCall ';'
      +   *   | expression ';'
      +   *   | ...
      +   *   ;
      +   * 
      + * + *

      + * In both cases I believe that no closure operations will dip into the outer + * context. In the first case ctorBody in the worst case will stop at the '}'. + * In the 2nd case it should stop at the ';'. Both cases should stay within the + * entry rule and not dip into the outer context.

      + * + *

      + * PREDICATES

      + * + *

      + * Predicates are always evaluated if present in either SLL or LL both. SLL and + * LL simulation deals with predicates differently. SLL collects predicates as + * it performs closure operations like ANTLR v3 did. It delays predicate + * evaluation until it reaches and accept state. This allows us to cache the SLL + * ATN simulation whereas, if we had evaluated predicates on-the-fly during + * closure, the DFA state configuration sets would be different and we couldn't + * build up a suitable DFA.

      + * + *

      + * When building a DFA accept state during ATN simulation, we evaluate any + * predicates and return the sole semantically valid alternative. If there is + * more than 1 alternative, we report an ambiguity. If there are 0 alternatives, + * we throw an exception. Alternatives without predicates act like they have + * true predicates. The simple way to think about it is to strip away all + * alternatives with false predicates and choose the minimum alternative that + * remains.

      + * + *

      + * When we start in the DFA and reach an accept state that's predicated, we test + * those and return the minimum semantically viable alternative. If no + * alternatives are viable, we throw an exception.

      + * + *

      + * During full LL ATN simulation, closure always evaluates predicates and + * on-the-fly. This is crucial to reducing the configuration set size during + * closure. It hits a landmine when parsing with the Java grammar, for example, + * without this on-the-fly evaluation.

      + * + *

      + * SHARING DFA

      + * + *

      + * All instances of the same parser share the same decision DFAs through a + * static field. Each instance gets its own ATN simulator but they share the + * same {@link #decisionToDFA} field. They also share a + * {@link PredictionContextCache} object that makes sure that all + * {@link PredictionContext} objects are shared among the DFA states. This makes + * a big size difference.

      + * + *

      + * THREAD SAFETY

      + * + *

      + * The {@link ParserATNSimulator} locks on the {@link #decisionToDFA} field when + * it adds a new DFA object to that array. {@link #addDFAEdge} + * locks on the DFA for the current decision when setting the + * {@link DFAState#edges} field. {@link #addDFAState} locks on + * the DFA for the current decision when looking up a DFA state to see if it + * already exists. We must make sure that all requests to add DFA states that + * are equivalent result in the same shared DFA object. This is because lots of + * threads will be trying to update the DFA at once. The + * {@link #addDFAState} method also locks inside the DFA lock + * but this time on the shared context cache when it rebuilds the + * configurations' {@link PredictionContext} objects using cached + * subgraphs/nodes. No other locking occurs, even during DFA simulation. This is + * safe as long as we can guarantee that all threads referencing + * {@code s.edge[t]} get the same physical target {@link DFAState}, or + * {@code null}. Once into the DFA, the DFA simulation does not reference the + * {@link DFA#states} map. It follows the {@link DFAState#edges} field to new + * targets. The DFA simulator will either find {@link DFAState#edges} to be + * {@code null}, to be non-{@code null} and {@code dfa.edges[t]} null, or + * {@code dfa.edges[t]} to be non-null. The + * {@link #addDFAEdge} method could be racing to set the field + * but in either case the DFA simulator works; if {@code null}, and requests ATN + * simulation. It could also race trying to get {@code dfa.edges[t]}, but either + * way it will work because it's not doing a test and set operation.

      + * + *

      + * Starting with SLL then failing to combined SLL/LL (Two-Stage + * Parsing)

      + * + *

      + * Sam pointed out that if SLL does not give a syntax error, then there is no + * point in doing full LL, which is slower. We only have to try LL if we get a + * syntax error. For maximum speed, Sam starts the parser set to pure SLL + * mode with the {@link BailErrorStrategy}:

      + * + *
      +   * parser.{@link Parser#getInterpreter() getInterpreter()}.{@link #setPredictionMode setPredictionMode}{@code (}{@link PredictionMode#SLL}{@code )};
      +   * parser.{@link Parser#setErrorHandler setErrorHandler}(new {@link BailErrorStrategy}());
      +   * 
      + * + *

      + * If it does not get a syntax error, then we're done. If it does get a syntax + * error, we need to retry with the combined SLL/LL strategy.

      + * + *

      + * The reason this works is as follows. If there are no SLL conflicts, then the + * grammar is SLL (at least for that input set). If there is an SLL conflict, + * the full LL analysis must yield a set of viable alternatives which is a + * subset of the alternatives reported by SLL. If the LL set is a singleton, + * then the grammar is LL but not SLL. If the LL set is the same size as the SLL + * set, the decision is SLL. If the LL set has size > 1, then that decision + * is truly ambiguous on the current input. If the LL set is smaller, then the + * SLL conflict resolution might choose an alternative that the full LL would + * rule out as a possibility based upon better context information. If that's + * the case, then the SLL parse will definitely get an error because the full LL + * analysis says it's not viable. If SLL conflict resolution chooses an + * alternative within the LL set, them both SLL and LL would choose the same + * alternative because they both choose the minimum of multiple conflicting + * alternatives.

      + * + *

      + * Let's say we have a set of SLL conflicting alternatives {@code {1, 2, 3}} and + * a smaller LL set called s. If s is {@code {2, 3}}, then SLL + * parsing will get an error because SLL will pursue alternative 1. If + * s is {@code {1, 2}} or {@code {1, 3}} then both SLL and LL will + * choose the same alternative because alternative one is the minimum of either + * set. If s is {@code {2}} or {@code {3}} then SLL will get a syntax + * error. If s is {@code {1}} then SLL will succeed.

      + * + *

      + * Of course, if the input is invalid, then we will get an error for sure in + * both SLL and LL parsing. Erroneous input will therefore require 2 passes over + * the input.

      + */ + class ANTLR4CPP_PUBLIC ParserATNSimulator : public ATNSimulator { + public: + /// Testing only! + ParserATNSimulator(const ATN &atn, std::vector &decisionToDFA, + PredictionContextCache &sharedContextCache); + + ParserATNSimulator(Parser *parser, const ATN &atn, std::vector &decisionToDFA, + PredictionContextCache &sharedContextCache); + + virtual void reset() override; + virtual void clearDFA() override; + virtual size_t adaptivePredict(TokenStream *input, size_t decision, ParserRuleContext *outerContext); + + static const bool TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT; + + std::vector &decisionToDFA; + + /** Implements first-edge (loop entry) elimination as an optimization + * during closure operations. See antlr/antlr4#1398. + * + * The optimization is to avoid adding the loop entry config when + * the exit path can only lead back to the same + * StarLoopEntryState after popping context at the rule end state + * (traversing only epsilon edges, so we're still in closure, in + * this same rule). + * + * We need to detect any state that can reach loop entry on + * epsilon w/o exiting rule. We don't have to look at FOLLOW + * links, just ensure that all stack tops for config refer to key + * states in LR rule. + * + * To verify we are in the right situation we must first check + * closure is at a StarLoopEntryState generated during LR removal. + * Then we check that each stack top of context is a return state + * from one of these cases: + * + * 1. 'not' expr, '(' type ')' expr. The return state points at loop entry state + * 2. expr op expr. The return state is the block end of internal block of (...)* + * 3. 'between' expr 'and' expr. The return state of 2nd expr reference. + * That state points at block end of internal block of (...)*. + * 4. expr '?' expr ':' expr. The return state points at block end, + * which points at loop entry state. + * + * If any is true for each stack top, then closure does not add a + * config to the current config set for edge[0], the loop entry branch. + * + * Conditions fail if any context for the current config is: + * + * a. empty (we'd fall out of expr to do a global FOLLOW which could + * even be to some weird spot in expr) or, + * b. lies outside of expr or, + * c. lies within expr but at a state not the BlockEndState + * generated during LR removal + * + * Do we need to evaluate predicates ever in closure for this case? + * + * No. Predicates, including precedence predicates, are only + * evaluated when computing a DFA start state. I.e., only before + * the lookahead (but not parser) consumes a token. + * + * There are no epsilon edges allowed in LR rule alt blocks or in + * the "primary" part (ID here). If closure is in + * StarLoopEntryState any lookahead operation will have consumed a + * token as there are no epsilon-paths that lead to + * StarLoopEntryState. We do not have to evaluate predicates + * therefore if we are in the generated StarLoopEntryState of a LR + * rule. Note that when making a prediction starting at that + * decision point, decision d=2, compute-start-state performs + * closure starting at edges[0], edges[1] emanating from + * StarLoopEntryState. That means it is not performing closure on + * StarLoopEntryState during compute-start-state. + * + * How do we know this always gives same prediction answer? + * + * Without predicates, loop entry and exit paths are ambiguous + * upon remaining input +b (in, say, a+b). Either paths lead to + * valid parses. Closure can lead to consuming + immediately or by + * falling out of this call to expr back into expr and loop back + * again to StarLoopEntryState to match +b. In this special case, + * we choose the more efficient path, which is to take the bypass + * path. + * + * The lookahead language has not changed because closure chooses + * one path over the other. Both paths lead to consuming the same + * remaining input during a lookahead operation. If the next token + * is an operator, lookahead will enter the choice block with + * operators. If it is not, lookahead will exit expr. Same as if + * closure had chosen to enter the choice block immediately. + * + * Closure is examining one config (some loopentrystate, some alt, + * context) which means it is considering exactly one alt. Closure + * always copies the same alt to any derived configs. + * + * How do we know this optimization doesn't mess up precedence in + * our parse trees? + * + * Looking through expr from left edge of stat only has to confirm + * that an input, say, a+b+c; begins with any valid interpretation + * of an expression. The precedence actually doesn't matter when + * making a decision in stat seeing through expr. It is only when + * parsing rule expr that we must use the precedence to get the + * right interpretation and, hence, parse tree. + */ + bool canDropLoopEntryEdgeInLeftRecursiveRule(ATNConfig *config) const; + virtual std::string getRuleName(size_t index); + + virtual Ref precedenceTransition(Ref const& config, PrecedencePredicateTransition *pt, + bool collectPredicates, bool inContext, bool fullCtx); + + void setPredictionMode(PredictionMode newMode); + PredictionMode getPredictionMode(); + + Parser* getParser(); + + virtual std::string getTokenName(size_t t); + + virtual std::string getLookaheadName(TokenStream *input); + + /// + /// Used for debugging in adaptivePredict around execATN but I cut + /// it out for clarity now that alg. works well. We can leave this + /// "dead" code for a bit. + /// + virtual void dumpDeadEndConfigs(NoViableAltException &nvae); + + protected: + Parser *const parser; + + /// + /// Each prediction operation uses a cache for merge of prediction contexts. + /// Don't keep around as it wastes huge amounts of memory. The merge cache + /// isn't synchronized but we're ok since two threads shouldn't reuse same + /// parser/atnsim object because it can only handle one input at a time. + /// This maps graphs a and b to merged result c. (a,b)->c. We can avoid + /// the merge if we ever see a and b again. Note that (b,a)->c should + /// also be examined during cache lookup. + /// + PredictionContextMergeCache mergeCache; + + // LAME globals to avoid parameters!!!!! I need these down deep in predTransition + TokenStream *_input; + size_t _startIndex; + ParserRuleContext *_outerContext; + dfa::DFA *_dfa; // Reference into the decisionToDFA vector. + + /// + /// Performs ATN simulation to compute a predicted alternative based + /// upon the remaining input, but also updates the DFA cache to avoid + /// having to traverse the ATN again for the same input sequence. + /// + /// There are some key conditions we're looking for after computing a new + /// set of ATN configs (proposed DFA state): + /// if the set is empty, there is no viable alternative for current symbol + /// does the state uniquely predict an alternative? + /// does the state have a conflict that would prevent us from + /// putting it on the work list? + /// + /// We also have some key operations to do: + /// add an edge from previous DFA state to potentially new DFA state, D, + /// upon current symbol but only if adding to work list, which means in all + /// cases except no viable alternative (and possibly non-greedy decisions?) + /// collecting predicates and adding semantic context to DFA accept states + /// adding rule context to context-sensitive DFA accept states + /// consuming an input symbol + /// reporting a conflict + /// reporting an ambiguity + /// reporting a context sensitivity + /// reporting insufficient predicates + /// + /// cover these cases: + /// dead end + /// single alt + /// single alt + preds + /// conflict + /// conflict + preds + /// + virtual size_t execATN(dfa::DFA &dfa, dfa::DFAState *s0, TokenStream *input, size_t startIndex, + ParserRuleContext *outerContext); + + /// + /// Get an existing target state for an edge in the DFA. If the target state + /// for the edge has not yet been computed or is otherwise not available, + /// this method returns {@code null}. + /// + /// The current DFA state + /// The next input symbol + /// The existing target DFA state for the given input symbol + /// {@code t}, or {@code null} if the target state for this edge is not + /// already cached + virtual dfa::DFAState* getExistingTargetState(dfa::DFAState *previousD, size_t t); + + /// + /// Compute a target state for an edge in the DFA, and attempt to add the + /// computed state and corresponding edge to the DFA. + /// + /// The DFA + /// The current DFA state + /// The next input symbol + /// + /// The computed target DFA state for the given input symbol + /// {@code t}. If {@code t} does not lead to a valid DFA state, this method + /// returns . + virtual dfa::DFAState *computeTargetState(dfa::DFA &dfa, dfa::DFAState *previousD, size_t t); + + virtual void predicateDFAState(dfa::DFAState *dfaState, DecisionState *decisionState); + + // comes back with reach.uniqueAlt set to a valid alt + virtual size_t execATNWithFullContext(dfa::DFA &dfa, dfa::DFAState *D, ATNConfigSet *s0, + TokenStream *input, size_t startIndex, ParserRuleContext *outerContext); // how far we got before failing over + + virtual std::unique_ptr computeReachSet(ATNConfigSet *closure, size_t t, bool fullCtx); + + /// + /// Return a configuration set containing only the configurations from + /// {@code configs} which are in a . If all + /// configurations in {@code configs} are already in a rule stop state, this + /// method simply returns {@code configs}. + ///

      + /// When {@code lookToEndOfRule} is true, this method uses + /// for each configuration in {@code configs} which is + /// not already in a rule stop state to see if a rule stop state is reachable + /// from the configuration via epsilon-only transitions. + ///

      + /// the configuration set to update + /// when true, this method checks for rule stop states + /// reachable by epsilon-only transitions from each configuration in + /// {@code configs}. + /// + /// {@code configs} if all configurations in {@code configs} are in a + /// rule stop state, otherwise return a new configuration set containing only + /// the configurations from {@code configs} which are in a rule stop state + virtual ATNConfigSet* removeAllConfigsNotInRuleStopState(ATNConfigSet *configs, bool lookToEndOfRule); + + virtual std::unique_ptr computeStartState(ATNState *p, RuleContext *ctx, bool fullCtx); + + /* parrt internal source braindump that doesn't mess up + * external API spec. + + applyPrecedenceFilter is an optimization to avoid highly + nonlinear prediction of expressions and other left recursive + rules. The precedence predicates such as {3>=prec}? Are highly + context-sensitive in that they can only be properly evaluated + in the context of the proper prec argument. Without pruning, + these predicates are normal predicates evaluated when we reach + conflict state (or unique prediction). As we cannot evaluate + these predicates out of context, the resulting conflict leads + to full LL evaluation and nonlinear prediction which shows up + very clearly with fairly large expressions. + + Example grammar: + + e : e '*' e + | e '+' e + | INT + ; + + We convert that to the following: + + e[int prec] + : INT + ( {3>=prec}? '*' e[4] + | {2>=prec}? '+' e[3] + )* + ; + + The (..)* loop has a decision for the inner block as well as + an enter or exit decision, which is what concerns us here. At + the 1st + of input 1+2+3, the loop entry sees both predicates + and the loop exit also sees both predicates by falling off the + edge of e. This is because we have no stack information with + SLL and find the follow of e, which will hit the return states + inside the loop after e[4] and e[3], which brings it back to + the enter or exit decision. In this case, we know that we + cannot evaluate those predicates because we have fallen off + the edge of the stack and will in general not know which prec + parameter is the right one to use in the predicate. + + Because we have special information, that these are precedence + predicates, we can resolve them without failing over to full + LL despite their context sensitive nature. We make an + assumption that prec[-1] <= prec[0], meaning that the current + precedence level is greater than or equal to the precedence + level of recursive invocations above us in the stack. For + example, if predicate {3>=prec}? is true of the current prec, + then one option is to enter the loop to match it now. The + other option is to exit the loop and the left recursive rule + to match the current operator in rule invocation further up + the stack. But, we know that all of those prec are lower or + the same value and so we can decide to enter the loop instead + of matching it later. That means we can strip out the other + configuration for the exit branch. + + So imagine we have (14,1,$,{2>=prec}?) and then + (14,2,$-dipsIntoOuterContext,{2>=prec}?). The optimization + allows us to collapse these two configurations. We know that + if {2>=prec}? is true for the current prec parameter, it will + also be true for any prec from an invoking e call, indicated + by dipsIntoOuterContext. As the predicates are both true, we + have the option to evaluate them early in the decision start + state. We do this by stripping both predicates and choosing to + enter the loop as it is consistent with the notion of operator + precedence. It's also how the full LL conflict resolution + would work. + + The solution requires a different DFA start state for each + precedence level. + + The basic filter mechanism is to remove configurations of the + form (p, 2, pi) if (p, 1, pi) exists for the same p and pi. In + other words, for the same ATN state and predicate context, + remove any configuration associated with an exit branch if + there is a configuration associated with the enter branch. + + It's also the case that the filter evaluates precedence + predicates and resolves conflicts according to precedence + levels. For example, for input 1+2+3 at the first +, we see + prediction filtering + + [(11,1,[$],{3>=prec}?), (14,1,[$],{2>=prec}?), (5,2,[$],up=1), + (11,2,[$],up=1), (14,2,[$],up=1)],hasSemanticContext=true,dipsIntoOuterContext + + to + + [(11,1,[$]), (14,1,[$]), (5,2,[$],up=1)],dipsIntoOuterContext + + This filters because {3>=prec}? evals to true and collapses + (11,1,[$],{3>=prec}?) and (11,2,[$],up=1) since early conflict + resolution based upon rules of operator precedence fits with + our usual match first alt upon conflict. + + We noticed a problem where a recursive call resets precedence + to 0. Sam's fix: each config has flag indicating if it has + returned from an expr[0] call. then just don't filter any + config with that flag set. flag is carried along in + closure(). so to avoid adding field, set bit just under sign + bit of dipsIntoOuterContext (SUPPRESS_PRECEDENCE_FILTER). + With the change you filter "unless (p, 2, pi) was reached + after leaving the rule stop state of the LR rule containing + state p, corresponding to a rule invocation with precedence + level 0" + */ + + /** + * This method transforms the start state computed by + * {@link #computeStartState} to the special start state used by a + * precedence DFA for a particular precedence value. The transformation + * process applies the following changes to the start state's configuration + * set. + * + *
        + *
      1. Evaluate the precedence predicates for each configuration using + * {@link SemanticContext#evalPrecedence}.
      2. + *
      3. When {@link ATNConfig#isPrecedenceFilterSuppressed} is {@code false}, + * remove all configurations which predict an alternative greater than 1, + * for which another configuration that predicts alternative 1 is in the + * same ATN state with the same prediction context. This transformation is + * valid for the following reasons: + *
          + *
        • The closure block cannot contain any epsilon transitions which bypass + * the body of the closure, so all states reachable via alternative 1 are + * part of the precedence alternatives of the transformed left-recursive + * rule.
        • + *
        • The "primary" portion of a left recursive rule cannot contain an + * epsilon transition, so the only way an alternative other than 1 can exist + * in a state that is also reachable via alternative 1 is by nesting calls + * to the left-recursive rule, with the outer calls not being at the + * preferred precedence level. The + * {@link ATNConfig#isPrecedenceFilterSuppressed} property marks ATN + * configurations which do not meet this condition, and therefore are not + * eligible for elimination during the filtering process.
        • + *
        + *
      4. + *
      + * + *

      + * The prediction context must be considered by this filter to address + * situations like the following. + *

      + * + *
      +     * grammar TA;
      +     * prog: statement* EOF;
      +     * statement: letterA | statement letterA 'b' ;
      +     * letterA: 'a';
      +     * 
      + *
      + *

      + * If the above grammar, the ATN state immediately before the token + * reference {@code 'a'} in {@code letterA} is reachable from the left edge + * of both the primary and closure blocks of the left-recursive rule + * {@code statement}. The prediction context associated with each of these + * configurations distinguishes between them, and prevents the alternative + * which stepped out to {@code prog} (and then back in to {@code statement} + * from being eliminated by the filter. + *

      + * + * @param configs The configuration set computed by + * {@link #computeStartState} as the start state for the DFA. + * @return The transformed configuration set representing the start state + * for a precedence DFA at a particular precedence level (determined by + * calling {@link Parser#getPrecedence}). + */ + std::unique_ptr applyPrecedenceFilter(ATNConfigSet *configs); + + virtual ATNState *getReachableTarget(Transition *trans, size_t ttype); + + virtual std::vector> getPredsForAmbigAlts(const antlrcpp::BitSet &ambigAlts, + ATNConfigSet *configs, size_t nalts); + + virtual std::vector getPredicatePredictions(const antlrcpp::BitSet &ambigAlts, + std::vector> const& altToPred); + + /** + * This method is used to improve the localization of error messages by + * choosing an alternative rather than throwing a + * {@link NoViableAltException} in particular prediction scenarios where the + * {@link #ERROR} state was reached during ATN simulation. + * + *

      + * The default implementation of this method uses the following + * algorithm to identify an ATN configuration which successfully parsed the + * decision entry rule. Choosing such an alternative ensures that the + * {@link ParserRuleContext} returned by the calling rule will be complete + * and valid, and the syntax error will be reported later at a more + * localized location.

      + * + *
        + *
      • If a syntactically valid path or paths reach the end of the decision rule and + * they are semantically valid if predicated, return the min associated alt.
      • + *
      • Else, if a semantically invalid but syntactically valid path exist + * or paths exist, return the minimum associated alt. + *
      • + *
      • Otherwise, return {@link ATN#INVALID_ALT_NUMBER}.
      • + *
      + * + *

      + * In some scenarios, the algorithm described above could predict an + * alternative which will result in a {@link FailedPredicateException} in + * the parser. Specifically, this could occur if the only configuration + * capable of successfully parsing to the end of the decision rule is + * blocked by a semantic predicate. By choosing this alternative within + * {@link #adaptivePredict} instead of throwing a + * {@link NoViableAltException}, the resulting + * {@link FailedPredicateException} in the parser will identify the specific + * predicate which is preventing the parser from successfully parsing the + * decision rule, which helps developers identify and correct logic errors + * in semantic predicates. + *

      + * + * @param configs The ATN configurations which were valid immediately before + * the {@link #ERROR} state was reached + * @param outerContext The is the \gamma_0 initial parser context from the paper + * or the parser stack at the instant before prediction commences. + * + * @return The value to return from {@link #adaptivePredict}, or + * {@link ATN#INVALID_ALT_NUMBER} if a suitable alternative was not + * identified and {@link #adaptivePredict} should report an error instead. + */ + size_t getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(ATNConfigSet *configs, + ParserRuleContext *outerContext); + + virtual size_t getAltThatFinishedDecisionEntryRule(ATNConfigSet *configs); + + /** Walk the list of configurations and split them according to + * those that have preds evaluating to true/false. If no pred, assume + * true pred and include in succeeded set. Returns Pair of sets. + * + * Create a new set so as not to alter the incoming parameter. + * + * Assumption: the input stream has been restored to the starting point + * prediction, which is where predicates need to evaluate. + */ + std::pair splitAccordingToSemanticValidity(ATNConfigSet *configs, + ParserRuleContext *outerContext); + + /// + /// Look through a list of predicate/alt pairs, returning alts for the + /// pairs that win. A {@code NONE} predicate indicates an alt containing an + /// unpredicated config which behaves as "always true." If !complete + /// then we stop at the first predicate that evaluates to true. This + /// includes pairs with null predicates. + /// + virtual antlrcpp::BitSet evalSemanticContext(std::vector predPredictions, + ParserRuleContext *outerContext, bool complete); + + /** + * Evaluate a semantic context within a specific parser context. + * + *

      + * This method might not be called for every semantic context evaluated + * during the prediction process. In particular, we currently do not + * evaluate the following but it may change in the future:

      + * + *
        + *
      • Precedence predicates (represented by + * {@link SemanticContext.PrecedencePredicate}) are not currently evaluated + * through this method.
      • + *
      • Operator predicates (represented by {@link SemanticContext.AND} and + * {@link SemanticContext.OR}) are evaluated as a single semantic + * context, rather than evaluating the operands individually. + * Implementations which require evaluation results from individual + * predicates should override this method to explicitly handle evaluation of + * the operands within operator predicates.
      • + *
      + * + * @param pred The semantic context to evaluate + * @param parserCallStack The parser context in which to evaluate the + * semantic context + * @param alt The alternative which is guarded by {@code pred} + * @param fullCtx {@code true} if the evaluation is occurring during LL + * prediction; otherwise, {@code false} if the evaluation is occurring + * during SLL prediction + * + * @since 4.3 + */ + virtual bool evalSemanticContext(Ref const& pred, ParserRuleContext *parserCallStack, + size_t alt, bool fullCtx); + + /* TODO: If we are doing predicates, there is no point in pursuing + closure operations if we reach a DFA state that uniquely predicts + alternative. We will not be caching that DFA state and it is a + waste to pursue the closure. Might have to advance when we do + ambig detection thought :( + */ + virtual void closure(Ref const& config, ATNConfigSet *configs, ATNConfig::Set &closureBusy, + bool collectPredicates, bool fullCtx, bool treatEofAsEpsilon); + + virtual void closureCheckingStopState(Ref const& config, ATNConfigSet *configs, ATNConfig::Set &closureBusy, + bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon); + + /// Do the actual work of walking epsilon edges. + virtual void closure_(Ref const& config, ATNConfigSet *configs, ATNConfig::Set &closureBusy, + bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon); + + virtual Ref getEpsilonTarget(Ref const& config, Transition *t, bool collectPredicates, + bool inContext, bool fullCtx, bool treatEofAsEpsilon); + virtual Ref actionTransition(Ref const& config, ActionTransition *t); + + virtual Ref predTransition(Ref const& config, PredicateTransition *pt, bool collectPredicates, + bool inContext, bool fullCtx); + + virtual Ref ruleTransition(Ref const& config, RuleTransition *t); + + /** + * Gets a {@link BitSet} containing the alternatives in {@code configs} + * which are part of one or more conflicting alternative subsets. + * + * @param configs The {@link ATNConfigSet} to analyze. + * @return The alternatives in {@code configs} which are part of one or more + * conflicting alternative subsets. If {@code configs} does not contain any + * conflicting subsets, this method returns an empty {@link BitSet}. + */ + virtual antlrcpp::BitSet getConflictingAlts(ATNConfigSet *configs); + + /// + /// Sam pointed out a problem with the previous definition, v3, of + /// ambiguous states. If we have another state associated with conflicting + /// alternatives, we should keep going. For example, the following grammar + /// + /// s : (ID | ID ID?) ';' ; + /// + /// When the ATN simulation reaches the state before ';', it has a DFA + /// state that looks like: [12|1|[], 6|2|[], 12|2|[]]. Naturally + /// 12|1|[] and 12|2|[] conflict, but we cannot stop processing this node + /// because alternative to has another way to continue, via [6|2|[]]. + /// The key is that we have a single state that has config's only associated + /// with a single alternative, 2, and crucially the state transitions + /// among the configurations are all non-epsilon transitions. That means + /// we don't consider any conflicts that include alternative 2. So, we + /// ignore the conflict between alts 1 and 2. We ignore a set of + /// conflicting alts when there is an intersection with an alternative + /// associated with a single alt state in the state->config-list map. + /// + /// It's also the case that we might have two conflicting configurations but + /// also a 3rd nonconflicting configuration for a different alternative: + /// [1|1|[], 1|2|[], 8|3|[]]. This can come about from grammar: + /// + /// a : A | A | A B ; + /// + /// After matching input A, we reach the stop state for rule A, state 1. + /// State 8 is the state right before B. Clearly alternatives 1 and 2 + /// conflict and no amount of further lookahead will separate the two. + /// However, alternative 3 will be able to continue and so we do not + /// stop working on this state. In the previous example, we're concerned + /// with states associated with the conflicting alternatives. Here alt + /// 3 is not associated with the conflicting configs, but since we can continue + /// looking for input reasonably, I don't declare the state done. We + /// ignore a set of conflicting alts when we have an alternative + /// that we still need to pursue. + /// + + virtual antlrcpp::BitSet getConflictingAltsOrUniqueAlt(ATNConfigSet *configs); + + virtual NoViableAltException noViableAlt(TokenStream *input, ParserRuleContext *outerContext, + ATNConfigSet *configs, size_t startIndex, bool deleteConfigs); + + static size_t getUniqueAlt(ATNConfigSet *configs); + + /// + /// Add an edge to the DFA, if possible. This method calls + /// to ensure the {@code to} state is present in the + /// DFA. If {@code from} is {@code null}, or if {@code t} is outside the + /// range of edges that can be represented in the DFA tables, this method + /// returns without adding the edge to the DFA. + ///

      + /// If {@code to} is {@code null}, this method returns {@code null}. + /// Otherwise, this method returns the returned by calling + /// for the {@code to} state. + ///

      + /// The DFA + /// The source state for the edge + /// The input symbol + /// The target state for the edge + /// + /// If {@code to} is {@code null}, this method returns {@code null}; + /// otherwise this method returns the result of calling + /// on {@code to} + virtual dfa::DFAState *addDFAEdge(dfa::DFA &dfa, dfa::DFAState *from, ssize_t t, dfa::DFAState *to); + + /// + /// Add state {@code D} to the DFA if it is not already present, and return + /// the actual instance stored in the DFA. If a state equivalent to {@code D} + /// is already in the DFA, the existing state is returned. Otherwise this + /// method returns {@code D} after adding it to the DFA. + ///

      + /// If {@code D} is , this method returns and + /// does not change the DFA. + ///

      + /// The dfa + /// The DFA state to add + /// The state stored in the DFA. This will be either the existing + /// state if {@code D} is already in the DFA, or {@code D} itself if the + /// state was not already present. + virtual dfa::DFAState *addDFAState(dfa::DFA &dfa, dfa::DFAState *D); + + virtual void reportAttemptingFullContext(dfa::DFA &dfa, const antlrcpp::BitSet &conflictingAlts, + ATNConfigSet *configs, size_t startIndex, size_t stopIndex); + + virtual void reportContextSensitivity(dfa::DFA &dfa, size_t prediction, ATNConfigSet *configs, + size_t startIndex, size_t stopIndex); + + /// If context sensitive parsing, we know it's ambiguity not conflict. + virtual void reportAmbiguity(dfa::DFA &dfa, + dfa::DFAState *D, // the DFA state from execATN() that had SLL conflicts + size_t startIndex, size_t stopIndex, + bool exact, + const antlrcpp::BitSet &ambigAlts, + ATNConfigSet *configs); // configs that LL not SLL considered conflicting + + private: + // SLL, LL, or LL + exact ambig detection? + PredictionMode _mode; + + static bool getLrLoopSetting(); + void InitializeInstanceFields(); + }; + +} // namespace atn +} // namespace antlr4 + diff --git a/lib/antlr4/include/atn/PlusBlockStartState.h b/lib/antlr4/include/atn/PlusBlockStartState.h new file mode 100644 index 0000000..a3affb8 --- /dev/null +++ b/lib/antlr4/include/atn/PlusBlockStartState.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/BlockStartState.h" + +namespace antlr4 { +namespace atn { + + /// Start of {@code (A|B|...)+} loop. Technically a decision state, but + /// we don't use for code generation; somebody might need it, so I'm defining + /// it for completeness. In reality, the node is the + /// real decision-making note for {@code A+}. + class ANTLR4CPP_PUBLIC PlusBlockStartState final : public BlockStartState { + public: + PlusLoopbackState *loopBackState = nullptr; + + virtual size_t getStateType() override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/PlusLoopbackState.h b/lib/antlr4/include/atn/PlusLoopbackState.h new file mode 100644 index 0000000..ba7a4b6 --- /dev/null +++ b/lib/antlr4/include/atn/PlusLoopbackState.h @@ -0,0 +1,22 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionState.h" + +namespace antlr4 { +namespace atn { + + /// Decision state for {@code A+} and {@code (A|B)+}. It has two transitions: + /// one to the loop back to start of the block and one to exit. + class ANTLR4CPP_PUBLIC PlusLoopbackState final : public DecisionState { + + public: + virtual size_t getStateType() override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/PrecedencePredicateTransition.h b/lib/antlr4/include/atn/PrecedencePredicateTransition.h new file mode 100644 index 0000000..bc22146 --- /dev/null +++ b/lib/antlr4/include/atn/PrecedencePredicateTransition.h @@ -0,0 +1,29 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/AbstractPredicateTransition.h" +#include "SemanticContext.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC PrecedencePredicateTransition final : public AbstractPredicateTransition { + public: + const int precedence; + + PrecedencePredicateTransition(ATNState *target, int precedence); + + virtual SerializationType getSerializationType() const override; + virtual bool isEpsilon() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + Ref getPredicate() const; + virtual std::string toString() const override; + + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/PredicateEvalInfo.h b/lib/antlr4/include/atn/PredicateEvalInfo.h new file mode 100644 index 0000000..b0513ae --- /dev/null +++ b/lib/antlr4/include/atn/PredicateEvalInfo.h @@ -0,0 +1,62 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionEventInfo.h" + +namespace antlr4 { +namespace atn { + + /// + /// This class represents profiling event information for semantic predicate + /// evaluations which occur during prediction. + /// + /// + class ANTLR4CPP_PUBLIC PredicateEvalInfo : public DecisionEventInfo { + public: + /// The semantic context which was evaluated. + const Ref semctx; + + /// + /// The alternative number for the decision which is guarded by the semantic + /// context . Note that other ATN + /// configurations may predict the same alternative which are guarded by + /// other semantic contexts and/or . + /// + const size_t predictedAlt; + + /// The result of evaluating the semantic context . + const bool evalResult; + + /// + /// Constructs a new instance of the class with the + /// specified detailed predicate evaluation information. + /// + /// The decision number + /// The input token stream + /// The start index for the current prediction + /// The index at which the predicate evaluation was + /// triggered. Note that the input stream may be reset to other positions for + /// the actual evaluation of individual predicates. + /// The semantic context which was evaluated + /// The results of evaluating the semantic context + /// The alternative number for the decision which is + /// guarded by the semantic context {@code semctx}. See + /// for more information. + /// {@code true} if the semantic context was + /// evaluated during LL prediction; otherwise, {@code false} if the semantic + /// context was evaluated during SLL prediction + /// + /// + /// + PredicateEvalInfo(size_t decision, TokenStream *input, size_t startIndex, size_t stopIndex, + Ref const& semctx, bool evalResult, size_t predictedAlt, bool fullCtx); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/PredicateTransition.h b/lib/antlr4/include/atn/PredicateTransition.h new file mode 100644 index 0000000..4d9b420 --- /dev/null +++ b/lib/antlr4/include/atn/PredicateTransition.h @@ -0,0 +1,39 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/AbstractPredicateTransition.h" +#include "SemanticContext.h" + +namespace antlr4 { +namespace atn { + + /// TODO: this is old comment: + /// A tree of semantic predicates from the grammar AST if label==SEMPRED. + /// In the ATN, labels will always be exactly one predicate, but the DFA + /// may have to combine a bunch of them as it collects predicates from + /// multiple ATN configurations into a single DFA state. + class ANTLR4CPP_PUBLIC PredicateTransition final : public AbstractPredicateTransition { + public: + const size_t ruleIndex; + const size_t predIndex; + const bool isCtxDependent; // e.g., $i ref in pred + + PredicateTransition(ATNState *target, size_t ruleIndex, size_t predIndex, bool isCtxDependent); + + virtual SerializationType getSerializationType() const override; + + virtual bool isEpsilon() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + Ref getPredicate() const; + + virtual std::string toString() const override; + + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/PredictionContext.h b/lib/antlr4/include/atn/PredictionContext.h new file mode 100644 index 0000000..9a52e00 --- /dev/null +++ b/lib/antlr4/include/atn/PredictionContext.h @@ -0,0 +1,254 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Recognizer.h" +#include "atn/ATN.h" +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + struct PredictionContextHasher; + struct PredictionContextComparer; + class PredictionContextMergeCache; + + typedef std::unordered_set, PredictionContextHasher, PredictionContextComparer> PredictionContextCache; + + class ANTLR4CPP_PUBLIC PredictionContext { + public: + /// Represents $ in local context prediction, which means wildcard. + /// *+x = *. + static const Ref EMPTY; + + /// Represents $ in an array in full context mode, when $ + /// doesn't mean wildcard: $ + x = [$,x]. Here, + /// $ = EMPTY_RETURN_STATE. + // ml: originally Integer.MAX_VALUE, which would be -1 for us, but this is already used in places where + // -1 is converted to unsigned, so we use a different value here. Any value does the job provided it doesn't + // conflict with real return states. + static const size_t EMPTY_RETURN_STATE = static_cast(-10); // std::numeric_limits::max() - 9; + + private: + static const size_t INITIAL_HASH = 1; + + public: + static size_t globalNodeCount; + const size_t id; + + /// + /// Stores the computed hash code of this . The hash + /// code is computed in parts to match the following reference algorithm. + /// + ///
      +    ///  private int referenceHashCode() {
      +    ///      int hash = ();
      +    ///
      +    ///      for (int i = 0; i < ; i++) {
      +    ///          hash = (hash, (i));
      +    ///      }
      +    ///
      +    ///      for (int i = 0; i < ; i++) {
      +    ///          hash = (hash, (i));
      +    ///      }
      +    ///
      +    ///      hash = (hash, 2 * );
      +    ///      return hash;
      +    ///  }
      +    /// 
      + ///
      + const size_t cachedHashCode; + + protected: + PredictionContext(size_t cachedHashCode); + ~PredictionContext(); + + public: + /// Convert a RuleContext tree to a PredictionContext graph. + /// Return EMPTY if outerContext is empty. + static Ref fromRuleContext(const ATN &atn, RuleContext *outerContext); + + virtual size_t size() const = 0; + virtual Ref getParent(size_t index) const = 0; + virtual size_t getReturnState(size_t index) const = 0; + + virtual bool operator == (const PredictionContext &o) const = 0; + + /// This means only the EMPTY (wildcard? not sure) context is in set. + virtual bool isEmpty() const; + virtual bool hasEmptyPath() const; + virtual size_t hashCode() const; + + protected: + static size_t calculateEmptyHashCode(); + static size_t calculateHashCode(Ref parent, size_t returnState); + static size_t calculateHashCode(const std::vector> &parents, + const std::vector &returnStates); + + public: + // dispatch + static Ref merge(const Ref &a, const Ref &b, + bool rootIsWildcard, PredictionContextMergeCache *mergeCache); + + /// + /// Merge two instances. + /// + ///

      + /// + /// Stack tops equal, parents merge is same; return left graph.
      + /// + /// + ///

      + /// + /// Same stack top, parents differ; merge parents giving array node, then + /// remainders of those graphs. A new root node is created to point to the + /// merged parents.
      + /// + /// + ///

      + /// + /// Different stack tops pointing to same parent. Make array node for the + /// root where both element in the root point to the same (original) + /// parent.
      + /// + /// + ///

      + /// + /// Different stack tops pointing to different parents. Make array node for + /// the root where each element points to the corresponding original + /// parent.
      + /// + ///

      + /// the first + /// the second + /// {@code true} if this is a local-context merge, + /// otherwise false to indicate a full-context merge + /// + static Ref mergeSingletons(const Ref &a, + const Ref &b, bool rootIsWildcard, PredictionContextMergeCache *mergeCache); + + /** + * Handle case where at least one of {@code a} or {@code b} is + * {@link #EMPTY}. In the following diagrams, the symbol {@code $} is used + * to represent {@link #EMPTY}. + * + *

      Local-Context Merges

      + * + *

      These local-context merge operations are used when {@code rootIsWildcard} + * is true.

      + * + *

      {@link #EMPTY} is superset of any graph; return {@link #EMPTY}.
      + *

      + * + *

      {@link #EMPTY} and anything is {@code #EMPTY}, so merged parent is + * {@code #EMPTY}; return left graph.
      + *

      + * + *

      Special case of last merge if local context.
      + *

      + * + *

      Full-Context Merges

      + * + *

      These full-context merge operations are used when {@code rootIsWildcard} + * is false.

      + * + *

      + * + *

      Must keep all contexts; {@link #EMPTY} in array is a special value (and + * null parent).
      + *

      + * + *

      + * + * @param a the first {@link SingletonPredictionContext} + * @param b the second {@link SingletonPredictionContext} + * @param rootIsWildcard {@code true} if this is a local-context merge, + * otherwise false to indicate a full-context merge + */ + static Ref mergeRoot(const Ref &a, + const Ref &b, bool rootIsWildcard); + + /** + * Merge two {@link ArrayPredictionContext} instances. + * + *

      Different tops, different parents.
      + *

      + * + *

      Shared top, same parents.
      + *

      + * + *

      Shared top, different parents.
      + *

      + * + *

      Shared top, all shared parents.
      + *

      + * + *

      Equal tops, merge parents and reduce top to + * {@link SingletonPredictionContext}.
      + *

      + */ + static Ref mergeArrays(const Ref &a, + const Ref &b, bool rootIsWildcard, PredictionContextMergeCache *mergeCache); + + protected: + /// Make pass over all M parents; merge any equal() ones. + /// @returns true if the list has been changed (i.e. duplicates where found). + static bool combineCommonParents(std::vector> &parents); + + public: + static std::string toDOTString(const Ref &context); + + static Ref getCachedContext(const Ref &context, + PredictionContextCache &contextCache, + std::map, Ref> &visited); + + // ter's recursive version of Sam's getAllNodes() + static std::vector> getAllContextNodes(const Ref &context); + static void getAllContextNodes_(const Ref &context, + std::vector> &nodes, std::set &visited); + + virtual std::string toString() const; + virtual std::string toString(Recognizer *recog) const; + + std::vector toStrings(Recognizer *recognizer, int currentState); + std::vector toStrings(Recognizer *recognizer, const Ref &stop, int currentState); + }; + + struct PredictionContextHasher { + size_t operator () (const Ref &k) const { + return k->hashCode(); + } + }; + + struct PredictionContextComparer { + bool operator () (const Ref &lhs, const Ref &rhs) const + { + if (lhs == rhs) // Object identity. + return true; + return (lhs->hashCode() == rhs->hashCode()) && (*lhs == *rhs); + } + }; + + class PredictionContextMergeCache { + public: + Ref put(Ref const& key1, Ref const& key2, + Ref const& value); + Ref get(Ref const& key1, Ref const& key2); + + void clear(); + std::string toString() const; + size_t count() const; + + private: + std::unordered_map, + std::unordered_map, Ref, PredictionContextHasher, PredictionContextComparer>, + PredictionContextHasher, PredictionContextComparer> _data; + + }; + +} // namespace atn +} // namespace antlr4 + diff --git a/lib/antlr4/include/atn/PredictionMode.h b/lib/antlr4/include/atn/PredictionMode.h new file mode 100644 index 0000000..726f4cf --- /dev/null +++ b/lib/antlr4/include/atn/PredictionMode.h @@ -0,0 +1,436 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "support/BitSet.h" + +namespace antlr4 { +namespace atn { + + /** + * This enumeration defines the prediction modes available in ANTLR 4 along with + * utility methods for analyzing configuration sets for conflicts and/or + * ambiguities. + */ + enum class PredictionMode { + /** + * The SLL(*) prediction mode. This prediction mode ignores the current + * parser context when making predictions. This is the fastest prediction + * mode, and provides correct results for many grammars. This prediction + * mode is more powerful than the prediction mode provided by ANTLR 3, but + * may result in syntax errors for grammar and input combinations which are + * not SLL. + * + *

      + * When using this prediction mode, the parser will either return a correct + * parse tree (i.e. the same parse tree that would be returned with the + * {@link #LL} prediction mode), or it will report a syntax error. If a + * syntax error is encountered when using the {@link #SLL} prediction mode, + * it may be due to either an actual syntax error in the input or indicate + * that the particular combination of grammar and input requires the more + * powerful {@link #LL} prediction abilities to complete successfully.

      + * + *

      + * This prediction mode does not provide any guarantees for prediction + * behavior for syntactically-incorrect inputs.

      + */ + SLL, + + /** + * The LL(*) prediction mode. This prediction mode allows the current parser + * context to be used for resolving SLL conflicts that occur during + * prediction. This is the fastest prediction mode that guarantees correct + * parse results for all combinations of grammars with syntactically correct + * inputs. + * + *

      + * When using this prediction mode, the parser will make correct decisions + * for all syntactically-correct grammar and input combinations. However, in + * cases where the grammar is truly ambiguous this prediction mode might not + * report a precise answer for exactly which alternatives are + * ambiguous.

      + * + *

      + * This prediction mode does not provide any guarantees for prediction + * behavior for syntactically-incorrect inputs.

      + */ + LL, + + /** + * The LL(*) prediction mode with exact ambiguity detection. In addition to + * the correctness guarantees provided by the {@link #LL} prediction mode, + * this prediction mode instructs the prediction algorithm to determine the + * complete and exact set of ambiguous alternatives for every ambiguous + * decision encountered while parsing. + * + *

      + * This prediction mode may be used for diagnosing ambiguities during + * grammar development. Due to the performance overhead of calculating sets + * of ambiguous alternatives, this prediction mode should be avoided when + * the exact results are not necessary.

      + * + *

      + * This prediction mode does not provide any guarantees for prediction + * behavior for syntactically-incorrect inputs.

      + */ + LL_EXACT_AMBIG_DETECTION + }; + + class ANTLR4CPP_PUBLIC PredictionModeClass { + public: + /** + * Computes the SLL prediction termination condition. + * + *

      + * This method computes the SLL prediction termination condition for both of + * the following cases.

      + * + *
        + *
      • The usual SLL+LL fallback upon SLL conflict
      • + *
      • Pure SLL without LL fallback
      • + *
      + * + *

      COMBINED SLL+LL PARSING

      + * + *

      When LL-fallback is enabled upon SLL conflict, correct predictions are + * ensured regardless of how the termination condition is computed by this + * method. Due to the substantially higher cost of LL prediction, the + * prediction should only fall back to LL when the additional lookahead + * cannot lead to a unique SLL prediction.

      + * + *

      Assuming combined SLL+LL parsing, an SLL configuration set with only + * conflicting subsets should fall back to full LL, even if the + * configuration sets don't resolve to the same alternative (e.g. + * {@code {1,2}} and {@code {3,4}}. If there is at least one non-conflicting + * configuration, SLL could continue with the hopes that more lookahead will + * resolve via one of those non-conflicting configurations.

      + * + *

      Here's the prediction termination rule them: SLL (for SLL+LL parsing) + * stops when it sees only conflicting configuration subsets. In contrast, + * full LL keeps going when there is uncertainty.

      + * + *

      HEURISTIC

      + * + *

      As a heuristic, we stop prediction when we see any conflicting subset + * unless we see a state that only has one alternative associated with it. + * The single-alt-state thing lets prediction continue upon rules like + * (otherwise, it would admit defeat too soon):

      + * + *

      {@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;}

      + * + *

      When the ATN simulation reaches the state before {@code ';'}, it has a + * DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally + * {@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop + * processing this node because alternative to has another way to continue, + * via {@code [6|2|[]]}.

      + * + *

      It also let's us continue for this rule:

      + * + *

      {@code [1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;}

      + * + *

      After matching input A, we reach the stop state for rule A, state 1. + * State 8 is the state right before B. Clearly alternatives 1 and 2 + * conflict and no amount of further lookahead will separate the two. + * However, alternative 3 will be able to continue and so we do not stop + * working on this state. In the previous example, we're concerned with + * states associated with the conflicting alternatives. Here alt 3 is not + * associated with the conflicting configs, but since we can continue + * looking for input reasonably, don't declare the state done.

      + * + *

      PURE SLL PARSING

      + * + *

      To handle pure SLL parsing, all we have to do is make sure that we + * combine stack contexts for configurations that differ only by semantic + * predicate. From there, we can do the usual SLL termination heuristic.

      + * + *

      PREDICATES IN SLL+LL PARSING

      + * + *

      SLL decisions don't evaluate predicates until after they reach DFA stop + * states because they need to create the DFA cache that works in all + * semantic situations. In contrast, full LL evaluates predicates collected + * during start state computation so it can ignore predicates thereafter. + * This means that SLL termination detection can totally ignore semantic + * predicates.

      + * + *

      Implementation-wise, {@link ATNConfigSet} combines stack contexts but not + * semantic predicate contexts so we might see two configurations like the + * following.

      + * + *

      {@code (s, 1, x, {}), (s, 1, x', {p})}

      + * + *

      Before testing these configurations against others, we have to merge + * {@code x} and {@code x'} (without modifying the existing configurations). + * For example, we test {@code (x+x')==x''} when looking for conflicts in + * the following configurations.

      + * + *

      {@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})}

      + * + *

      If the configuration set has predicates (as indicated by + * {@link ATNConfigSet#hasSemanticContext}), this algorithm makes a copy of + * the configurations to strip out all of the predicates so that a standard + * {@link ATNConfigSet} will merge everything ignoring predicates.

      + */ + static bool hasSLLConflictTerminatingPrediction(PredictionMode mode, ATNConfigSet *configs); + + /// + /// Checks if any configuration in {@code configs} is in a + /// . Configurations meeting this condition have + /// reached + /// the end of the decision rule (local context) or end of start rule (full + /// context). + /// + /// the configuration set to test + /// {@code true} if any configuration in {@code configs} is in a + /// , otherwise {@code false} + static bool hasConfigInRuleStopState(ATNConfigSet *configs); + + /// + /// Checks if all configurations in {@code configs} are in a + /// . Configurations meeting this condition have + /// reached + /// the end of the decision rule (local context) or end of start rule (full + /// context). + /// + /// the configuration set to test + /// {@code true} if all configurations in {@code configs} are in a + /// , otherwise {@code false} + static bool allConfigsInRuleStopStates(ATNConfigSet *configs); + + /** + * Full LL prediction termination. + * + *

      Can we stop looking ahead during ATN simulation or is there some + * uncertainty as to which alternative we will ultimately pick, after + * consuming more input? Even if there are partial conflicts, we might know + * that everything is going to resolve to the same minimum alternative. That + * means we can stop since no more lookahead will change that fact. On the + * other hand, there might be multiple conflicts that resolve to different + * minimums. That means we need more look ahead to decide which of those + * alternatives we should predict.

      + * + *

      The basic idea is to split the set of configurations {@code C}, into + * conflicting subsets {@code (s, _, ctx, _)} and singleton subsets with + * non-conflicting configurations. Two configurations conflict if they have + * identical {@link ATNConfig#state} and {@link ATNConfig#context} values + * but different {@link ATNConfig#alt} value, e.g. {@code (s, i, ctx, _)} + * and {@code (s, j, ctx, _)} for {@code i!=j}.

      + * + *

      Reduce these configuration subsets to the set of possible alternatives. + * You can compute the alternative subsets in one pass as follows:

      + * + *

      {@code A_s,ctx = {i | (s, i, ctx, _)}} for each configuration in + * {@code C} holding {@code s} and {@code ctx} fixed.

      + * + *

      Or in pseudo-code, for each configuration {@code c} in {@code C}:

      + * + *
      +     * map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not
      +     * alt and not pred
      +     * 
      + * + *

      The values in {@code map} are the set of {@code A_s,ctx} sets.

      + * + *

      If {@code |A_s,ctx|=1} then there is no conflict associated with + * {@code s} and {@code ctx}.

      + * + *

      Reduce the subsets to singletons by choosing a minimum of each subset. If + * the union of these alternative subsets is a singleton, then no amount of + * more lookahead will help us. We will always pick that alternative. If, + * however, there is more than one alternative, then we are uncertain which + * alternative to predict and must continue looking for resolution. We may + * or may not discover an ambiguity in the future, even if there are no + * conflicting subsets this round.

      + * + *

      The biggest sin is to terminate early because it means we've made a + * decision but were uncertain as to the eventual outcome. We haven't used + * enough lookahead. On the other hand, announcing a conflict too late is no + * big deal; you will still have the conflict. It's just inefficient. It + * might even look until the end of file.

      + * + *

      No special consideration for semantic predicates is required because + * predicates are evaluated on-the-fly for full LL prediction, ensuring that + * no configuration contains a semantic context during the termination + * check.

      + * + *

      CONFLICTING CONFIGS

      + * + *

      Two configurations {@code (s, i, x)} and {@code (s, j, x')}, conflict + * when {@code i!=j} but {@code x=x'}. Because we merge all + * {@code (s, i, _)} configurations together, that means that there are at + * most {@code n} configurations associated with state {@code s} for + * {@code n} possible alternatives in the decision. The merged stacks + * complicate the comparison of configuration contexts {@code x} and + * {@code x'}. Sam checks to see if one is a subset of the other by calling + * merge and checking to see if the merged result is either {@code x} or + * {@code x'}. If the {@code x} associated with lowest alternative {@code i} + * is the superset, then {@code i} is the only possible prediction since the + * others resolve to {@code min(i)} as well. However, if {@code x} is + * associated with {@code j>i} then at least one stack configuration for + * {@code j} is not in conflict with alternative {@code i}. The algorithm + * should keep going, looking for more lookahead due to the uncertainty.

      + * + *

      For simplicity, I'm doing a equality check between {@code x} and + * {@code x'} that lets the algorithm continue to consume lookahead longer + * than necessary. The reason I like the equality is of course the + * simplicity but also because that is the test you need to detect the + * alternatives that are actually in conflict.

      + * + *

      CONTINUE/STOP RULE

      + * + *

      Continue if union of resolved alternative sets from non-conflicting and + * conflicting alternative subsets has more than one alternative. We are + * uncertain about which alternative to predict.

      + * + *

      The complete set of alternatives, {@code [i for (_,i,_)]}, tells us which + * alternatives are still in the running for the amount of input we've + * consumed at this point. The conflicting sets let us to strip away + * configurations that won't lead to more states because we resolve + * conflicts to the configuration with a minimum alternate for the + * conflicting set.

      + * + *

      CASES

      + * + *
        + * + *
      • no conflicts and more than 1 alternative in set => continue
      • + * + *
      • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s, 3, z)}, + * {@code (s', 1, y)}, {@code (s', 2, y)} yields non-conflicting set + * {@code {3}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} = + * {@code {1,3}} => continue + *
      • + * + *
      • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)}, + * {@code (s', 2, y)}, {@code (s'', 1, z)} yields non-conflicting set + * {@code {1}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} = + * {@code {1}} => stop and predict 1
      • + * + *
      • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)}, + * {@code (s', 2, y)} yields conflicting, reduced sets {@code {1}} U + * {@code {1}} = {@code {1}} => stop and predict 1, can announce + * ambiguity {@code {1,2}}
      • + * + *
      • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 2, y)}, + * {@code (s', 3, y)} yields conflicting, reduced sets {@code {1}} U + * {@code {2}} = {@code {1,2}} => continue
      • + * + *
      • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 3, y)}, + * {@code (s', 4, y)} yields conflicting, reduced sets {@code {1}} U + * {@code {3}} = {@code {1,3}} => continue
      • + * + *
      + * + *

      EXACT AMBIGUITY DETECTION

      + * + *

      If all states report the same conflicting set of alternatives, then we + * know we have the exact ambiguity set.

      + * + *

      |A_i|>1 and + * A_i = A_j for all i, j.

      + * + *

      In other words, we continue examining lookahead until all {@code A_i} + * have more than one alternative and all {@code A_i} are the same. If + * {@code A={{1,2}, {1,3}}}, then regular LL prediction would terminate + * because the resolved set is {@code {1}}. To determine what the real + * ambiguity is, we have to know whether the ambiguity is between one and + * two or one and three so we keep going. We can only stop prediction when + * we need exact ambiguity detection when the sets look like + * {@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc...

      + */ + static size_t resolvesToJustOneViableAlt(const std::vector &altsets); + + /// + /// Determines if every alternative subset in {@code altsets} contains more + /// than one alternative. + /// + /// a collection of alternative subsets + /// {@code true} if every in {@code altsets} + /// has + /// > 1, otherwise {@code + /// false} + static bool allSubsetsConflict(const std::vector &altsets); + + /// + /// Determines if any single alternative subset in {@code altsets} contains + /// exactly one alternative. + /// + /// a collection of alternative subsets + /// {@code true} if {@code altsets} contains a with + /// 1, otherwise {@code false} + /// + static bool hasNonConflictingAltSet(const std::vector &altsets); + + /// + /// Determines if any single alternative subset in {@code altsets} contains + /// more than one alternative. + /// + /// a collection of alternative subsets + /// {@code true} if {@code altsets} contains a with + /// > 1, otherwise {@code + /// false} + static bool hasConflictingAltSet(const std::vector &altsets); + + /// + /// Determines if every alternative subset in {@code altsets} is equivalent. + /// + /// a collection of alternative subsets + /// {@code true} if every member of {@code altsets} is equal to the + /// others, otherwise {@code false} + static bool allSubsetsEqual(const std::vector &altsets); + + /// + /// Returns the unique alternative predicted by all alternative subsets in + /// {@code altsets}. If no such alternative exists, this method returns + /// . + /// + /// a collection of alternative subsets + static size_t getUniqueAlt(const std::vector &altsets); + + /// + /// Gets the complete set of represented alternatives for a collection of + /// alternative subsets. This method returns the union of each + /// in {@code altsets}. + /// + /// a collection of alternative subsets + /// the set of represented alternatives in {@code altsets} + static antlrcpp::BitSet getAlts(const std::vector &altsets); + + /** Get union of all alts from configs. @since 4.5.1 */ + static antlrcpp::BitSet getAlts(ATNConfigSet *configs); + + /// + /// This function gets the conflicting alt subsets from a configuration set. + /// For each configuration {@code c} in {@code configs}: + /// + ///
      +    /// map[c] U= c. # map hash/equals uses s and
      +    /// x, not
      +    /// alt and not pred
      +    /// 
      + ///
      + static std::vector getConflictingAltSubsets(ATNConfigSet *configs); + + /// + /// Get a map from state to alt subset from a configuration set. For each + /// configuration {@code c} in {@code configs}: + /// + ///
      +    /// map[c.] U= c.
      +    /// 
      + ///
      + static std::map getStateToAltMap(ATNConfigSet *configs); + + static bool hasStateAssociatedWithOneAlt(ATNConfigSet *configs); + + static size_t getSingleViableAlt(const std::vector &altsets); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/ProfilingATNSimulator.h b/lib/antlr4/include/atn/ProfilingATNSimulator.h new file mode 100644 index 0000000..79ecd00 --- /dev/null +++ b/lib/antlr4/include/atn/ProfilingATNSimulator.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ParserATNSimulator.h" +#include "atn/DecisionInfo.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ProfilingATNSimulator : public ParserATNSimulator { + public: + ProfilingATNSimulator(Parser *parser); + + virtual size_t adaptivePredict(TokenStream *input, size_t decision, ParserRuleContext *outerContext) override; + + virtual std::vector getDecisionInfo() const; + virtual dfa::DFAState* getCurrentState() const; + + protected: + std::vector _decisions; + + int _sllStopIndex = 0; + int _llStopIndex = 0; + + size_t _currentDecision = 0; + dfa::DFAState *_currentState; + + /// + /// At the point of LL failover, we record how SLL would resolve the conflict so that + /// we can determine whether or not a decision / input pair is context-sensitive. + /// If LL gives a different result than SLL's predicted alternative, we have a + /// context sensitivity for sure. The converse is not necessarily true, however. + /// It's possible that after conflict resolution chooses minimum alternatives, + /// SLL could get the same answer as LL. Regardless of whether or not the result indicates + /// an ambiguity, it is not treated as a context sensitivity because LL prediction + /// was not required in order to produce a correct prediction for this decision and input sequence. + /// It may in fact still be a context sensitivity but we don't know by looking at the + /// minimum alternatives for the current input. + /// + size_t conflictingAltResolvedBySLL = 0; + + virtual dfa::DFAState* getExistingTargetState(dfa::DFAState *previousD, size_t t) override; + virtual dfa::DFAState* computeTargetState(dfa::DFA &dfa, dfa::DFAState *previousD, size_t t) override; + virtual std::unique_ptr computeReachSet(ATNConfigSet *closure, size_t t, bool fullCtx) override; + virtual bool evalSemanticContext(Ref const& pred, ParserRuleContext *parserCallStack, + size_t alt, bool fullCtx) override; + virtual void reportAttemptingFullContext(dfa::DFA &dfa, const antlrcpp::BitSet &conflictingAlts, ATNConfigSet *configs, + size_t startIndex, size_t stopIndex) override; + virtual void reportContextSensitivity(dfa::DFA &dfa, size_t prediction, ATNConfigSet *configs, + size_t startIndex, size_t stopIndex) override; + virtual void reportAmbiguity(dfa::DFA &dfa, dfa::DFAState *D, size_t startIndex, size_t stopIndex, bool exact, + const antlrcpp::BitSet &ambigAlts, ATNConfigSet *configs) override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/RangeTransition.h b/lib/antlr4/include/atn/RangeTransition.h new file mode 100644 index 0000000..14093e2 --- /dev/null +++ b/lib/antlr4/include/atn/RangeTransition.h @@ -0,0 +1,29 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC RangeTransition final : public Transition { + public: + const size_t from; + const size_t to; + + RangeTransition(ATNState *target, size_t from, size_t to); + + virtual SerializationType getSerializationType() const override; + + virtual misc::IntervalSet label() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/RuleStartState.h b/lib/antlr4/include/atn/RuleStartState.h new file mode 100644 index 0000000..94ab0e4 --- /dev/null +++ b/lib/antlr4/include/atn/RuleStartState.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC RuleStartState final : public ATNState { + public: + RuleStartState(); + + RuleStopState *stopState = nullptr; + bool isLeftRecursiveRule = false; + + virtual size_t getStateType() override; + + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/RuleStopState.h b/lib/antlr4/include/atn/RuleStopState.h new file mode 100644 index 0000000..8a4a580 --- /dev/null +++ b/lib/antlr4/include/atn/RuleStopState.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + /// The last node in the ATN for a rule, unless that rule is the start symbol. + /// In that case, there is one transition to EOF. Later, we might encode + /// references to all calls to this rule to compute FOLLOW sets for + /// error handling. + class ANTLR4CPP_PUBLIC RuleStopState final : public ATNState { + + public: + virtual size_t getStateType() override; + + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/RuleTransition.h b/lib/antlr4/include/atn/RuleTransition.h new file mode 100644 index 0000000..50d3d29 --- /dev/null +++ b/lib/antlr4/include/atn/RuleTransition.h @@ -0,0 +1,40 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC RuleTransition : public Transition { + public: + /// Ptr to the rule definition object for this rule ref. + const size_t ruleIndex; // no Rule object at runtime + + const int precedence; + + /// What node to begin computations following ref to rule. + ATNState *followState; + + /// @deprecated Use + /// instead. + RuleTransition(RuleStartState *ruleStart, size_t ruleIndex, ATNState *followState); + + RuleTransition(RuleStartState *ruleStart, size_t ruleIndex, int precedence, ATNState *followState); + RuleTransition(RuleTransition const&) = delete; + RuleTransition& operator=(RuleTransition const&) = delete; + + virtual SerializationType getSerializationType() const override; + + virtual bool isEpsilon() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/SemanticContext.h b/lib/antlr4/include/atn/SemanticContext.h new file mode 100644 index 0000000..7ccc16c --- /dev/null +++ b/lib/antlr4/include/atn/SemanticContext.h @@ -0,0 +1,222 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Recognizer.h" +#include "support/CPPUtils.h" + +namespace antlr4 { +namespace atn { + + /// A tree structure used to record the semantic context in which + /// an ATN configuration is valid. It's either a single predicate, + /// a conjunction "p1 && p2", or a sum of products "p1||p2". + /// + /// I have scoped the AND, OR, and Predicate subclasses of + /// SemanticContext within the scope of this outer class. + class ANTLR4CPP_PUBLIC SemanticContext : public std::enable_shared_from_this { + public: + struct Hasher + { + size_t operator()(Ref const& k) const { + return k->hashCode(); + } + }; + + struct Comparer { + bool operator()(Ref const& lhs, Ref const& rhs) const { + if (lhs == rhs) + return true; + return (lhs->hashCode() == rhs->hashCode()) && (*lhs == *rhs); + } + }; + + + using Set = std::unordered_set, Hasher, Comparer>; + + /** + * The default {@link SemanticContext}, which is semantically equivalent to + * a predicate of the form {@code {true}?}. + */ + static const Ref NONE; + + virtual ~SemanticContext(); + + virtual size_t hashCode() const = 0; + virtual std::string toString() const = 0; + virtual bool operator == (const SemanticContext &other) const = 0; + virtual bool operator != (const SemanticContext &other) const; + + /// + /// For context independent predicates, we evaluate them without a local + /// context (i.e., null context). That way, we can evaluate them without + /// having to create proper rule-specific context during prediction (as + /// opposed to the parser, which creates them naturally). In a practical + /// sense, this avoids a cast exception from RuleContext to myruleContext. + ///

      + /// For context dependent predicates, we must pass in a local context so that + /// references such as $arg evaluate properly as _localctx.arg. We only + /// capture context dependent predicates in the context in which we begin + /// prediction, so we passed in the outer context here in case of context + /// dependent predicate evaluation. + ///

      + virtual bool eval(Recognizer *parser, RuleContext *parserCallStack) = 0; + + /** + * Evaluate the precedence predicates for the context and reduce the result. + * + * @param parser The parser instance. + * @param parserCallStack + * @return The simplified semantic context after precedence predicates are + * evaluated, which will be one of the following values. + *
        + *
      • {@link #NONE}: if the predicate simplifies to {@code true} after + * precedence predicates are evaluated.
      • + *
      • {@code null}: if the predicate simplifies to {@code false} after + * precedence predicates are evaluated.
      • + *
      • {@code this}: if the semantic context is not changed as a result of + * precedence predicate evaluation.
      • + *
      • A non-{@code null} {@link SemanticContext}: the new simplified + * semantic context after precedence predicates are evaluated.
      • + *
      + */ + virtual Ref evalPrecedence(Recognizer *parser, RuleContext *parserCallStack); + + static Ref And(Ref const& a, Ref const& b); + + /// See also: ParserATNSimulator::getPredsForAmbigAlts. + static Ref Or(Ref const& a, Ref const& b); + + class Predicate; + class PrecedencePredicate; + class Operator; + class AND; + class OR; + + private: + static std::vector> filterPrecedencePredicates(const Set &collection); + }; + + class ANTLR4CPP_PUBLIC SemanticContext::Predicate : public SemanticContext { + public: + const size_t ruleIndex; + const size_t predIndex; + const bool isCtxDependent; // e.g., $i ref in pred + + protected: + Predicate(); + + public: + Predicate(size_t ruleIndex, size_t predIndex, bool isCtxDependent); + + virtual bool eval(Recognizer *parser, RuleContext *parserCallStack) override; + virtual size_t hashCode() const override; + virtual bool operator == (const SemanticContext &other) const override; + virtual std::string toString() const override; + }; + + class ANTLR4CPP_PUBLIC SemanticContext::PrecedencePredicate : public SemanticContext { + public: + const int precedence; + + protected: + PrecedencePredicate(); + + public: + PrecedencePredicate(int precedence); + + virtual bool eval(Recognizer *parser, RuleContext *parserCallStack) override; + virtual Ref evalPrecedence(Recognizer *parser, RuleContext *parserCallStack) override; + virtual int compareTo(PrecedencePredicate *o); + virtual size_t hashCode() const override; + virtual bool operator == (const SemanticContext &other) const override; + virtual std::string toString() const override; + }; + + /** + * This is the base class for semantic context "operators", which operate on + * a collection of semantic context "operands". + * + * @since 4.3 + */ + class ANTLR4CPP_PUBLIC SemanticContext::Operator : public SemanticContext { + public: + virtual ~Operator() override; + + /** + * Gets the operands for the semantic context operator. + * + * @return a collection of {@link SemanticContext} operands for the + * operator. + * + * @since 4.3 + */ + + virtual std::vector> getOperands() const = 0; + }; + + /** + * A semantic context which is true whenever none of the contained contexts + * is false. + */ + class ANTLR4CPP_PUBLIC SemanticContext::AND : public SemanticContext::Operator { + public: + std::vector> opnds; + + AND(Ref const& a, Ref const& b) ; + + virtual std::vector> getOperands() const override; + virtual bool operator == (const SemanticContext &other) const override; + virtual size_t hashCode() const override; + + /** + * The evaluation of predicates by this context is short-circuiting, but + * unordered.

      + */ + virtual bool eval(Recognizer *parser, RuleContext *parserCallStack) override; + virtual Ref evalPrecedence(Recognizer *parser, RuleContext *parserCallStack) override; + virtual std::string toString() const override; + }; + + /** + * A semantic context which is true whenever at least one of the contained + * contexts is true. + */ + class ANTLR4CPP_PUBLIC SemanticContext::OR : public SemanticContext::Operator { + public: + std::vector> opnds; + + OR(Ref const& a, Ref const& b); + + virtual std::vector> getOperands() const override; + virtual bool operator == (const SemanticContext &other) const override; + virtual size_t hashCode() const override; + + /** + * The evaluation of predicates by this context is short-circuiting, but + * unordered. + */ + virtual bool eval(Recognizer *parser, RuleContext *parserCallStack) override; + virtual Ref evalPrecedence(Recognizer *parser, RuleContext *parserCallStack) override; + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 + +// Hash function for SemanticContext, used in the MurmurHash::update function + +namespace std { + using antlr4::atn::SemanticContext; + + template <> struct hash + { + size_t operator () (SemanticContext &x) const + { + return x.hashCode(); + } + }; +} diff --git a/lib/antlr4/include/atn/SetTransition.h b/lib/antlr4/include/atn/SetTransition.h new file mode 100644 index 0000000..044d41a --- /dev/null +++ b/lib/antlr4/include/atn/SetTransition.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + /// + /// A transition containing a set of values. + class ANTLR4CPP_PUBLIC SetTransition : public Transition { + public: + const misc::IntervalSet set; + + SetTransition(ATNState *target, const misc::IntervalSet &set); + + virtual SerializationType getSerializationType() const override; + + virtual misc::IntervalSet label() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/SingletonPredictionContext.h b/lib/antlr4/include/atn/SingletonPredictionContext.h new file mode 100644 index 0000000..f1e993b --- /dev/null +++ b/lib/antlr4/include/atn/SingletonPredictionContext.h @@ -0,0 +1,36 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/PredictionContext.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC SingletonPredictionContext : public PredictionContext { + public: + // Usually a parent is linked via a weak ptr. Not so here as we have kinda reverse reference chain. + // There are no child contexts stored here and often the parent context is left dangling when it's + // owning ATNState is released. In order to avoid having this context released as well (leaving all other contexts + // which got this one as parent with a null reference) we use a shared_ptr here instead, to keep those left alone + // parent contexts alive. + const Ref parent; + const size_t returnState; + + SingletonPredictionContext(Ref const& parent, size_t returnState); + virtual ~SingletonPredictionContext(); + + static Ref create(Ref const& parent, size_t returnState); + + virtual size_t size() const override; + virtual Ref getParent(size_t index) const override; + virtual size_t getReturnState(size_t index) const override; + virtual bool operator == (const PredictionContext &o) const override; + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/StarBlockStartState.h b/lib/antlr4/include/atn/StarBlockStartState.h new file mode 100644 index 0000000..8fae316 --- /dev/null +++ b/lib/antlr4/include/atn/StarBlockStartState.h @@ -0,0 +1,21 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/BlockStartState.h" + +namespace antlr4 { +namespace atn { + + /// The block that begins a closure loop. + class ANTLR4CPP_PUBLIC StarBlockStartState final : public BlockStartState { + + public: + virtual size_t getStateType() override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/StarLoopEntryState.h b/lib/antlr4/include/atn/StarLoopEntryState.h new file mode 100644 index 0000000..a062c58 --- /dev/null +++ b/lib/antlr4/include/atn/StarLoopEntryState.h @@ -0,0 +1,35 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC StarLoopEntryState final : public DecisionState { + public: + StarLoopEntryState(); + + /** + * Indicates whether this state can benefit from a precedence DFA during SLL + * decision making. + * + *

      This is a computed property that is calculated during ATN deserialization + * and stored for use in {@link ParserATNSimulator} and + * {@link ParserInterpreter}.

      + * + * @see DFA#isPrecedenceDfa() + */ + bool isPrecedenceDecision = false; + + StarLoopbackState *loopBackState = nullptr; + + virtual size_t getStateType() override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/StarLoopbackState.h b/lib/antlr4/include/atn/StarLoopbackState.h new file mode 100644 index 0000000..f5db3ef --- /dev/null +++ b/lib/antlr4/include/atn/StarLoopbackState.h @@ -0,0 +1,21 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC StarLoopbackState final : public ATNState { + public: + StarLoopEntryState *getLoopEntryState(); + + virtual size_t getStateType() override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/TokensStartState.h b/lib/antlr4/include/atn/TokensStartState.h new file mode 100644 index 0000000..e534d04 --- /dev/null +++ b/lib/antlr4/include/atn/TokensStartState.h @@ -0,0 +1,21 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionState.h" + +namespace antlr4 { +namespace atn { + + /// The Tokens rule start state linking to each lexer rule start state. + class ANTLR4CPP_PUBLIC TokensStartState final : public DecisionState { + + public: + virtual size_t getStateType() override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/Transition.h b/lib/antlr4/include/atn/Transition.h new file mode 100644 index 0000000..ffed2f5 --- /dev/null +++ b/lib/antlr4/include/atn/Transition.h @@ -0,0 +1,76 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "misc/IntervalSet.h" + +namespace antlr4 { +namespace atn { + + /// + /// An ATN transition between any two ATN states. Subclasses define + /// atom, set, epsilon, action, predicate, rule transitions. + ///

      + /// This is a one way link. It emanates from a state (usually via a list of + /// transitions) and has a target state. + ///

      + /// Since we never have to change the ATN transitions once we construct it, + /// we can fix these transitions as specific classes. The DFA transitions + /// on the other hand need to update the labels as it adds transitions to + /// the states. We'll use the term Edge for the DFA to distinguish them from + /// ATN transitions. + ///

      + class ANTLR4CPP_PUBLIC Transition { + public: + // constants for serialization + enum SerializationType { + EPSILON = 1, + RANGE = 2, + RULE = 3, + PREDICATE = 4, // e.g., {isType(input.LT(1))}? + ATOM = 5, + ACTION = 6, + SET = 7, // ~(A|B) or ~atom, wildcard, which convert to next 2 + NOT_SET = 8, + WILDCARD = 9, + PRECEDENCE = 10, + }; + + static const std::vector serializationNames; + + /// The target of this transition. + // ml: this is a reference into the ATN. + ATNState *target; + + virtual ~Transition(); + + protected: + Transition(ATNState *target); + + public: + virtual SerializationType getSerializationType() const = 0; + + /** + * Determines if the transition is an "epsilon" transition. + * + *

      The default implementation returns {@code false}.

      + * + * @return {@code true} if traversing this transition in the ATN does not + * consume an input symbol; otherwise, {@code false} if traversing this + * transition consumes (matches) an input symbol. + */ + virtual bool isEpsilon() const; + virtual misc::IntervalSet label() const; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const = 0; + + virtual std::string toString() const; + + Transition(Transition const&) = delete; + Transition& operator=(Transition const&) = delete; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/atn/WildcardTransition.h b/lib/antlr4/include/atn/WildcardTransition.h new file mode 100644 index 0000000..c47c717 --- /dev/null +++ b/lib/antlr4/include/atn/WildcardTransition.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC WildcardTransition final : public Transition { + public: + WildcardTransition(ATNState *target); + + virtual SerializationType getSerializationType() const override; + + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/dfa/DFA.h b/lib/antlr4/include/dfa/DFA.h new file mode 100644 index 0000000..99daf0a --- /dev/null +++ b/lib/antlr4/include/dfa/DFA.h @@ -0,0 +1,91 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "dfa/DFAState.h" + +namespace antlrcpp { + class SingleWriteMultipleReadLock; +} + +namespace antlr4 { +namespace dfa { + + class ANTLR4CPP_PUBLIC DFA { + public: + /// A set of all DFA states. Use a map so we can get old state back. + /// Set only allows you to see if it's there. + + /// From which ATN state did we create this DFA? + atn::DecisionState *atnStartState; + std::unordered_set states; // States are owned by this class. + DFAState *s0; + size_t decision; + + DFA(atn::DecisionState *atnStartState); + DFA(atn::DecisionState *atnStartState, size_t decision); + DFA(const DFA &other) = delete; + DFA(DFA &&other); + virtual ~DFA(); + + /** + * Gets whether this DFA is a precedence DFA. Precedence DFAs use a special + * start state {@link #s0} which is not stored in {@link #states}. The + * {@link DFAState#edges} array for this start state contains outgoing edges + * supplying individual start states corresponding to specific precedence + * values. + * + * @return {@code true} if this is a precedence DFA; otherwise, + * {@code false}. + * @see Parser#getPrecedence() + */ + bool isPrecedenceDfa() const; + + /** + * Get the start state for a specific precedence value. + * + * @param precedence The current precedence. + * @return The start state corresponding to the specified precedence, or + * {@code null} if no start state exists for the specified precedence. + * + * @throws IllegalStateException if this is not a precedence DFA. + * @see #isPrecedenceDfa() + */ + DFAState* getPrecedenceStartState(int precedence) const; + + /** + * Set the start state for a specific precedence value. + * + * @param precedence The current precedence. + * @param startState The start state corresponding to the specified + * precedence. + * + * @throws IllegalStateException if this is not a precedence DFA. + * @see #isPrecedenceDfa() + */ + void setPrecedenceStartState(int precedence, DFAState *startState, antlrcpp::SingleWriteMultipleReadLock &lock); + + /// Return a list of all states in this DFA, ordered by state number. + virtual std::vector getStates() const; + + /** + * @deprecated Use {@link #toString(Vocabulary)} instead. + */ + virtual std::string toString(const std::vector& tokenNames); + std::string toString(const Vocabulary &vocabulary) const; + + virtual std::string toLexerString(); + + private: + /** + * {@code true} if this DFA is for a precedence decision; otherwise, + * {@code false}. This is the backing field for {@link #isPrecedenceDfa}. + */ + bool _precedenceDfa; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/dfa/DFASerializer.h b/lib/antlr4/include/dfa/DFASerializer.h new file mode 100644 index 0000000..a1fe5a5 --- /dev/null +++ b/lib/antlr4/include/dfa/DFASerializer.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Vocabulary.h" + +namespace antlr4 { +namespace dfa { + + /// A DFA walker that knows how to dump them to serialized strings. + class ANTLR4CPP_PUBLIC DFASerializer { + public: + DFASerializer(const DFA *dfa, const std::vector& tnames); + DFASerializer(const DFA *dfa, const Vocabulary &vocabulary); + virtual ~DFASerializer(); + + virtual std::string toString() const; + + protected: + virtual std::string getEdgeLabel(size_t i) const; + virtual std::string getStateString(DFAState *s) const; + + private: + const DFA *_dfa; + const Vocabulary &_vocabulary; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/dfa/DFAState.h b/lib/antlr4/include/dfa/DFAState.h new file mode 100644 index 0000000..2f0ddba --- /dev/null +++ b/lib/antlr4/include/dfa/DFAState.h @@ -0,0 +1,144 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace dfa { + + /// + /// A DFA state represents a set of possible ATN configurations. + /// As Aho, Sethi, Ullman p. 117 says "The DFA uses its state + /// to keep track of all possible states the ATN can be in after + /// reading each input symbol. That is to say, after reading + /// input a1a2..an, the DFA is in a state that represents the + /// subset T of the states of the ATN that are reachable from the + /// ATN's start state along some path labeled a1a2..an." + /// In conventional NFA->DFA conversion, therefore, the subset T + /// would be a bitset representing the set of states the + /// ATN could be in. We need to track the alt predicted by each + /// state as well, however. More importantly, we need to maintain + /// a stack of states, tracking the closure operations as they + /// jump from rule to rule, emulating rule invocations (method calls). + /// I have to add a stack to simulate the proper lookahead sequences for + /// the underlying LL grammar from which the ATN was derived. + ///

      + /// I use a set of ATNConfig objects not simple states. An ATNConfig + /// is both a state (ala normal conversion) and a RuleContext describing + /// the chain of rules (if any) followed to arrive at that state. + ///

      + /// A DFA state may have multiple references to a particular state, + /// but with different ATN contexts (with same or different alts) + /// meaning that state was reached via a different set of rule invocations. + ///

      + class ANTLR4CPP_PUBLIC DFAState { + public: + class PredPrediction { + public: + Ref pred; // never null; at least SemanticContext.NONE + int alt; + + PredPrediction(const Ref &pred, int alt); + virtual ~PredPrediction(); + + virtual std::string toString(); + + private: + void InitializeInstanceFields(); + }; + + int stateNumber; + + std::unique_ptr configs; + + /// {@code edges[symbol]} points to target of symbol. Shift up by 1 so (-1) + /// maps to {@code edges[0]}. + // ml: this is a sparse list, so we use a map instead of a vector. + // Watch out: we no longer have the -1 offset, as it isn't needed anymore. + std::unordered_map edges; + + bool isAcceptState; + + /// if accept state, what ttype do we match or alt do we predict? + /// This is set to when {@code !=null} or + /// . + size_t prediction; + + Ref lexerActionExecutor; + + /// + /// Indicates that this state was created during SLL prediction that + /// discovered a conflict between the configurations in the state. Future + /// invocations immediately jumped doing + /// full context prediction if this field is true. + /// + bool requiresFullContext; + + /// + /// During SLL parsing, this is a list of predicates associated with the + /// ATN configurations of the DFA state. When we have predicates, + /// is {@code false} since full context prediction evaluates predicates + /// on-the-fly. If this is not null, then is + /// . + ///

      + /// We only use these for non- but conflicting states. That + /// means we know from the context (it's $ or we don't dip into outer + /// context) that it's an ambiguity not a conflict. + ///

      + /// This list is computed by . + ///

      + std::vector predicates; + + /// Map a predicate to a predicted alternative. + DFAState(); + DFAState(int state); + DFAState(std::unique_ptr configs); + virtual ~DFAState(); + + /// + /// Get the set of all alts mentioned by all ATN configurations in this + /// DFA state. + /// + virtual std::set getAltSet(); + + virtual size_t hashCode() const; + + /// Two DFAState instances are equal if their ATN configuration sets + /// are the same. This method is used to see if a state already exists. + /// + /// Because the number of alternatives and number of ATN configurations are + /// finite, there is a finite number of DFA states that can be processed. + /// This is necessary to show that the algorithm terminates. + /// + /// Cannot test the DFA state numbers here because in + /// ParserATNSimulator#addDFAState we need to know if any other state + /// exists that has this exact set of ATN configurations. The + /// stateNumber is irrelevant. + bool operator == (const DFAState &o) const; + + virtual std::string toString(); + + struct Hasher + { + size_t operator()(DFAState *k) const { + return k->hashCode(); + } + }; + + struct Comparer { + bool operator()(DFAState *lhs, DFAState *rhs) const + { + return *lhs == *rhs; + } + }; + + private: + void InitializeInstanceFields(); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/dfa/LexerDFASerializer.h b/lib/antlr4/include/dfa/LexerDFASerializer.h new file mode 100644 index 0000000..d157107 --- /dev/null +++ b/lib/antlr4/include/dfa/LexerDFASerializer.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "dfa/DFASerializer.h" + +namespace antlr4 { +namespace dfa { + + class ANTLR4CPP_PUBLIC LexerDFASerializer : public DFASerializer { + public: + LexerDFASerializer(DFA *dfa); + virtual ~LexerDFASerializer(); + + protected: + virtual std::string getEdgeLabel(size_t i) const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/misc/InterpreterDataReader.h b/lib/antlr4/include/misc/InterpreterDataReader.h new file mode 100644 index 0000000..0c32ac6 --- /dev/null +++ b/lib/antlr4/include/misc/InterpreterDataReader.h @@ -0,0 +1,31 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace misc { + + struct InterpreterData { + atn::ATN atn; + dfa::Vocabulary vocabulary; + std::vector ruleNames; + std::vector channels; // Only valid for lexer grammars. + std::vector modes; // ditto + + InterpreterData() {}; // For invalid content. + InterpreterData(std::vector const& literalNames, std::vector const& symbolicNames); + }; + + // A class to read plain text interpreter data produced by ANTLR. + class ANTLR4CPP_PUBLIC InterpreterDataReader { + public: + static InterpreterData parseFile(std::string const& fileName); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/misc/Interval.h b/lib/antlr4/include/misc/Interval.h new file mode 100644 index 0000000..0198ee5 --- /dev/null +++ b/lib/antlr4/include/misc/Interval.h @@ -0,0 +1,84 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace misc { + + // Helpers to convert certain unsigned symbols (e.g. Token::EOF) to their original numeric value (e.g. -1) + // and vice versa. This is needed mostly for intervals to keep their original order and for toString() + // methods to print the original numeric value (e.g. for tests). + size_t numericToSymbol(ssize_t v); + ssize_t symbolToNumeric(size_t v); + + /// An immutable inclusive interval a..b + class ANTLR4CPP_PUBLIC Interval { + public: + static const Interval INVALID; + + // Must stay signed to guarantee the correct sort order. + ssize_t a; + ssize_t b; + + Interval(); + explicit Interval(size_t a_, size_t b_); // For unsigned -> signed mappings. + Interval(ssize_t a_, ssize_t b_); + + /// return number of elements between a and b inclusively. x..x is length 1. + /// if b < a, then length is 0. 9..10 has length 2. + size_t length() const; + + bool operator == (const Interval &other) const; + + size_t hashCode() const; + + /// + /// Does this start completely before other? Disjoint + bool startsBeforeDisjoint(const Interval &other) const; + + /// + /// Does this start at or before other? Nondisjoint + bool startsBeforeNonDisjoint(const Interval &other) const; + + /// + /// Does this.a start after other.b? May or may not be disjoint + bool startsAfter(const Interval &other) const; + + /// + /// Does this start completely after other? Disjoint + bool startsAfterDisjoint(const Interval &other) const; + + /// + /// Does this start after other? NonDisjoint + bool startsAfterNonDisjoint(const Interval &other) const; + + /// + /// Are both ranges disjoint? I.e., no overlap? + bool disjoint(const Interval &other) const; + + /// + /// Are two intervals adjacent such as 0..41 and 42..42? + bool adjacent(const Interval &other) const; + + bool properlyContains(const Interval &other) const; + + /// + /// Return the interval computed from combining this and other + Interval Union(const Interval &other) const; + + /// + /// Return the interval in common between this and o + Interval intersection(const Interval &other) const; + + std::string toString() const; + + private: + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/misc/IntervalSet.h b/lib/antlr4/include/misc/IntervalSet.h new file mode 100644 index 0000000..aa2adf6 --- /dev/null +++ b/lib/antlr4/include/misc/IntervalSet.h @@ -0,0 +1,198 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "misc/Interval.h" +#include "Exceptions.h" + +namespace antlr4 { +namespace misc { + + /** + * This class implements the {@link IntSet} backed by a sorted array of + * non-overlapping intervals. It is particularly efficient for representing + * large collections of numbers, where the majority of elements appear as part + * of a sequential range of numbers that are all part of the set. For example, + * the set { 1, 2, 3, 4, 7, 8 } may be represented as { [1, 4], [7, 8] }. + * + *

      + * This class is able to represent sets containing any combination of values in + * the range {@link Integer#MIN_VALUE} to {@link Integer#MAX_VALUE} + * (inclusive).

      + */ + class ANTLR4CPP_PUBLIC IntervalSet { + public: + static IntervalSet const COMPLETE_CHAR_SET; + static IntervalSet const EMPTY_SET; + + private: + /// The list of sorted, disjoint intervals. + std::vector _intervals; + + explicit IntervalSet(std::vector&& intervals); + + public: + IntervalSet(); + IntervalSet(IntervalSet const& set); + IntervalSet(IntervalSet&& set); + + template + IntervalSet(int, T1 t1, T_NEXT&&... next) : IntervalSet() { + // The first int argument is an ignored count for compatibility + // with the previous varargs based interface. + addItems(t1, std::forward(next)...); + } + + IntervalSet& operator=(IntervalSet const& set); + IntervalSet& operator=(IntervalSet&& set); + + /// Create a set with a single element, el. + static IntervalSet of(ssize_t a); + + /// Create a set with all ints within range [a..b] (inclusive) + static IntervalSet of(ssize_t a, ssize_t b); + + void clear(); + + /// Add a single element to the set. An isolated element is stored + /// as a range el..el. + void add(ssize_t el); + + /// Add interval; i.e., add all integers from a to b to set. + /// If b &sets); + + // Copy on write so we can cache a..a intervals and sets of that. + void add(const Interval &addition); + IntervalSet& addAll(const IntervalSet &set); + + template + void addItems(T1 t1, T_NEXT&&... next) { + add(t1); + addItems(std::forward(next)...); + } + + IntervalSet complement(ssize_t minElement, ssize_t maxElement) const; + + /// Given the set of possible values (rather than, say UNICODE or MAXINT), + /// return a new set containing all elements in vocabulary, but not in + /// this. The computation is (vocabulary - this). + /// + /// 'this' is assumed to be either a subset or equal to vocabulary. + IntervalSet complement(const IntervalSet &vocabulary) const; + + /// Compute this-other via this&~other. + /// Return a new set containing all elements in this but not in other. + /// other is assumed to be a subset of this; + /// anything that is in other but not in this will be ignored. + IntervalSet subtract(const IntervalSet &other) const; + + /** + * Compute the set difference between two interval sets. The specific + * operation is {@code left - right}. If either of the input sets is + * {@code null}, it is treated as though it was an empty set. + */ + static IntervalSet subtract(const IntervalSet &left, const IntervalSet &right); + + IntervalSet Or(const IntervalSet &a) const; + + /// Return a new set with the intersection of this set with other. Because + /// the intervals are sorted, we can use an iterator for each list and + /// just walk them together. This is roughly O(min(n,m)) for interval + /// list lengths n and m. + IntervalSet And(const IntervalSet &other) const; + + /// Is el in any range of this set? + bool contains(size_t el) const; // For mapping of e.g. Token::EOF to -1 etc. + bool contains(ssize_t el) const; + + /// return true if this set has no members + bool isEmpty() const; + + /// If this set is a single integer, return it otherwise Token.INVALID_TYPE. + ssize_t getSingleElement() const; + + /** + * Returns the maximum value contained in the set. + * + * @return the maximum value contained in the set. If the set is empty, this + * method returns {@link Token#INVALID_TYPE}. + */ + ssize_t getMaxElement() const; + + /** + * Returns the minimum value contained in the set. + * + * @return the minimum value contained in the set. If the set is empty, this + * method returns {@link Token#INVALID_TYPE}. + */ + ssize_t getMinElement() const; + + /// + /// Return a list of Interval objects. + std::vector const& getIntervals() const; + + size_t hashCode() const; + + /// Are two IntervalSets equal? Because all intervals are sorted + /// and disjoint, equals is a simple linear walk over both lists + /// to make sure they are the same. + bool operator == (const IntervalSet &other) const; + std::string toString() const; + std::string toString(bool elemAreChar) const; + + /** + * @deprecated Use {@link #toString(Vocabulary)} instead. + */ + std::string toString(const std::vector &tokenNames) const; + std::string toString(const dfa::Vocabulary &vocabulary) const; + + protected: + /** + * @deprecated Use {@link #elementName(Vocabulary, int)} instead. + */ + std::string elementName(const std::vector &tokenNames, ssize_t a) const; + std::string elementName(const dfa::Vocabulary &vocabulary, ssize_t a) const; + + public: + size_t size() const; + std::vector toList() const; + std::set toSet() const; + + /// Get the ith element of ordered set. Used only by RandomPhrase so + /// don't bother to implement if you're not doing that for a new + /// ANTLR code gen target. + ssize_t get(size_t i) const; + void remove(size_t el); // For mapping of e.g. Token::EOF to -1 etc. + void remove(ssize_t el); + + private: + void addItems() { /* No-op */ } + }; + +} // namespace atn +} // namespace antlr4 + +// Hash function for IntervalSet. + +namespace std { + using antlr4::misc::IntervalSet; + + template <> struct hash + { + size_t operator() (const IntervalSet &x) const + { + return x.hashCode(); + } + }; +} diff --git a/lib/antlr4/include/misc/MurmurHash.h b/lib/antlr4/include/misc/MurmurHash.h new file mode 100644 index 0000000..b8b5a55 --- /dev/null +++ b/lib/antlr4/include/misc/MurmurHash.h @@ -0,0 +1,76 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace misc { + + class ANTLR4CPP_PUBLIC MurmurHash { + + private: + static const size_t DEFAULT_SEED = 0; + + /// Initialize the hash using the default seed value. + /// Returns the intermediate hash value. + public: + static size_t initialize(); + + /// Initialize the hash using the specified seed. + static size_t initialize(size_t seed); + + /// Update the intermediate hash value for the next input {@code value}. + /// the intermediate hash value + /// the value to add to the current hash + /// Returns the updated intermediate hash value. + static size_t update(size_t hash, size_t value); + + /** + * Update the intermediate hash value for the next input {@code value}. + * + * @param hash the intermediate hash value + * @param value the value to add to the current hash + * @return the updated intermediate hash value + */ + template + static size_t update(size_t hash, Ref const& value) { + return update(hash, value != nullptr ? value->hashCode() : 0); + } + + template + static size_t update(size_t hash, T *value) { + return update(hash, value != nullptr ? value->hashCode() : 0); + } + + /// + /// Apply the final computation steps to the intermediate value {@code hash} + /// to form the final result of the MurmurHash 3 hash function. + /// + /// the intermediate hash value + /// the number of calls to update() before calling finish() + /// the final hash result + static size_t finish(size_t hash, size_t entryCount); + + /// Utility function to compute the hash code of an array using the MurmurHash3 algorithm. + /// + /// @param the array element type + /// the array data + /// the seed for the MurmurHash algorithm + /// the hash code of the data + template // where T is C array type + static size_t hashCode(const std::vector> &data, size_t seed) { + size_t hash = initialize(seed); + for (auto entry : data) { + hash = update(hash, entry->hashCode()); + } + + return finish(hash, data.size()); + } + }; + +} // namespace atn +} // namespace antlr4 diff --git a/lib/antlr4/include/misc/Predicate.h b/lib/antlr4/include/misc/Predicate.h new file mode 100644 index 0000000..1032d53 --- /dev/null +++ b/lib/antlr4/include/misc/Predicate.h @@ -0,0 +1,21 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace misc { + + class ANTLR4CPP_PUBLIC Predicate { + public: + virtual ~Predicate(); + + virtual bool test(tree::ParseTree *t) = 0; + }; + +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/support/Any.h b/lib/antlr4/include/support/Any.h new file mode 100644 index 0000000..5db59f6 --- /dev/null +++ b/lib/antlr4/include/support/Any.h @@ -0,0 +1,170 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +// A standard C++ class loosely modeled after boost::Any. + +#pragma once + +#include "antlr4-common.h" + +#ifdef _MSC_VER + #pragma warning(push) + #pragma warning(disable: 4521) // 'antlrcpp::Any': multiple copy constructors specified +#endif + +namespace antlrcpp { + +template + using StorageType = typename std::decay::type; + +struct ANTLR4CPP_PUBLIC Any +{ + bool isNull() const { return _ptr == nullptr; } + bool isNotNull() const { return _ptr != nullptr; } + + Any() : _ptr(nullptr) { + } + + Any(Any& that) : _ptr(that.clone()) { + } + + Any(Any&& that) : _ptr(that._ptr) { + that._ptr = nullptr; + } + + Any(const Any& that) : _ptr(that.clone()) { + } + + Any(const Any&& that) : _ptr(that.clone()) { + } + + template + Any(U&& value) : _ptr(new Derived>(std::forward(value))) { + } + + template + bool is() const { + auto derived = getDerived(false); + + return derived != nullptr; + } + + template + StorageType& as() { + auto derived = getDerived(true); + + return derived->value; + } + + template + const StorageType& as() const { + auto derived = getDerived(true); + + return derived->value; + } + + template + operator U() { + return as>(); + } + + template + operator const U() const { + return as>(); + } + + Any& operator = (const Any& a) { + if (_ptr == a._ptr) + return *this; + + auto old_ptr = _ptr; + _ptr = a.clone(); + + if (old_ptr) + delete old_ptr; + + return *this; + } + + Any& operator = (Any&& a) { + if (_ptr == a._ptr) + return *this; + + std::swap(_ptr, a._ptr); + + return *this; + } + + virtual ~Any(); + + virtual bool equals(Any other) const { + return _ptr == other._ptr; + } + +private: + struct Base { + virtual ~Base() {}; + virtual Base* clone() const = 0; + }; + + template + struct Derived : Base + { + template Derived(U&& value_) : value(std::forward(value_)) { + } + + T value; + + Base* clone() const { + return clone<>(); + } + + private: + template::value, int>::type = 0> + Base* clone() const { + return new Derived(value); + } + + template::value, int>::type = 0> + Base* clone() const { + return nullptr; + } + + }; + + Base* clone() const + { + if (_ptr) + return _ptr->clone(); + else + return nullptr; + } + + template + Derived>* getDerived(bool checkCast) const { + typedef StorageType T; + + auto derived = dynamic_cast*>(_ptr); + + if (checkCast && !derived) + throw std::bad_cast(); + + return derived; + } + + Base *_ptr; + +}; + + template<> inline + Any::Any(std::nullptr_t&& ) : _ptr(nullptr) { + } + + +} // namespace antlrcpp + +#ifdef _MSC_VER +#pragma warning(pop) +#endif diff --git a/lib/antlr4/include/support/Arrays.h b/lib/antlr4/include/support/Arrays.h new file mode 100644 index 0000000..18e6a8a --- /dev/null +++ b/lib/antlr4/include/support/Arrays.h @@ -0,0 +1,110 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlrcpp { + + class ANTLR4CPP_PUBLIC Arrays { + public: + + static std::string listToString(const std::vector &list, const std::string &separator); + + template + static bool equals(const std::vector &a, const std::vector &b) { + if (a.size() != b.size()) + return false; + + for (size_t i = 0; i < a.size(); ++i) + if (!(a[i] == b[i])) + return false; + + return true; + } + + template + static bool equals(const std::vector &a, const std::vector &b) { + if (a.size() != b.size()) + return false; + + for (size_t i = 0; i < a.size(); ++i) { + if (a[i] == b[i]) + continue; + if (!(*a[i] == *b[i])) + return false; + } + + return true; + } + + template + static bool equals(const std::vector> &a, const std::vector> &b) { + if (a.size() != b.size()) + return false; + + for (size_t i = 0; i < a.size(); ++i) { + if (!a[i] && !b[i]) + continue; + if (!a[i] || !b[i]) + return false; + if (a[i] == b[i]) + continue; + + if (!(*a[i] == *b[i])) + return false; + } + + return true; + } + + template + static std::string toString(const std::vector &source) { + std::string result = "["; + bool firstEntry = true; + for (auto &value : source) { + result += value.toString(); + if (firstEntry) { + result += ", "; + firstEntry = false; + } + } + return result + "]"; + } + + template + static std::string toString(const std::vector> &source) { + std::string result = "["; + bool firstEntry = true; + for (auto &value : source) { + result += value->toString(); + if (firstEntry) { + result += ", "; + firstEntry = false; + } + } + return result + "]"; + } + + template + static std::string toString(const std::vector &source) { + std::string result = "["; + bool firstEntry = true; + for (auto value : source) { + result += value->toString(); + if (firstEntry) { + result += ", "; + firstEntry = false; + } + } + return result + "]"; + } + + }; + + template <> + std::string Arrays::toString(const std::vector &source); +} diff --git a/lib/antlr4/include/support/BitSet.h b/lib/antlr4/include/support/BitSet.h new file mode 100644 index 0000000..bf849b1 --- /dev/null +++ b/lib/antlr4/include/support/BitSet.h @@ -0,0 +1,76 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlrcpp { + + class ANTLR4CPP_PUBLIC BitSet : public std::bitset<2048> { + public: + size_t nextSetBit(size_t pos) const { + for (size_t i = pos; i < size(); i++){ + if (test(i)) { + return i; + } + } + + return INVALID_INDEX; + } + + // Prints a list of every index for which the bitset contains a bit in true. + friend std::wostream& operator << (std::wostream& os, const BitSet& obj) + { + os << "{"; + size_t total = obj.count(); + for (size_t i = 0; i < obj.size(); i++){ + if (obj.test(i)){ + os << i; + --total; + if (total > 1){ + os << ", "; + } + } + } + + os << "}"; + return os; + } + + static std::string subStringRepresentation(const std::vector::iterator &begin, + const std::vector::iterator &end) { + std::string result; + std::vector::iterator vectorIterator; + + for (vectorIterator = begin; vectorIterator != end; vectorIterator++) { + result += vectorIterator->toString(); + } + // Grab the end + result += end->toString(); + + return result; + } + + std::string toString(){ + std::stringstream stream; + stream << "{"; + bool valueAdded = false; + for (size_t i = 0; i < size(); ++i){ + if (test(i)){ + if (valueAdded) { + stream << ", "; + } + stream << i; + valueAdded = true; + } + } + + stream << "}"; + return stream.str(); + } + + }; +} diff --git a/lib/antlr4/include/support/CPPUtils.h b/lib/antlr4/include/support/CPPUtils.h new file mode 100644 index 0000000..fc83503 --- /dev/null +++ b/lib/antlr4/include/support/CPPUtils.h @@ -0,0 +1,78 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlrcpp { + + std::string join(std::vector strings, const std::string &separator); + std::map toMap(const std::vector &keys); + std::string escapeWhitespace(std::string str, bool escapeSpaces); + std::string toHexString(const int t); + std::string arrayToString(const std::vector &data); + std::string replaceString(const std::string &s, const std::string &from, const std::string &to); + std::vector split(const std::string &s, const std::string &sep, int count); + std::string indent(const std::string &s, const std::string &indentation, bool includingFirst = true); + + // Using RAII + a lambda to implement a "finally" replacement. + struct FinalAction { + FinalAction(std::function f) : _cleanUp { f } {} + FinalAction(FinalAction &&other) : + _cleanUp(std::move(other._cleanUp)), _enabled(other._enabled) { + other._enabled = false; // Don't trigger the lambda after ownership has moved. + } + ~FinalAction() { if (_enabled) _cleanUp(); } + + void disable() { _enabled = false; } + private: + std::function _cleanUp; + bool _enabled {true}; + }; + + ANTLR4CPP_PUBLIC FinalAction finally(std::function f); + + // Convenience functions to avoid lengthy dynamic_cast() != nullptr checks in many places. + template + inline bool is(T2 *obj) { // For pointer types. + return dynamic_cast::type>(obj) != nullptr; + } + + template + inline bool is(Ref const& obj) { // For shared pointers. + return dynamic_cast(obj.get()) != nullptr; + } + + template + std::string toString(const T &o) { + std::stringstream ss; + // typeid gives the mangled class name, but that's all what's possible + // in a portable way. + ss << typeid(o).name() << "@" << std::hex << reinterpret_cast(&o); + return ss.str(); + } + + // Get the error text from an exception pointer or the current exception. + std::string what(std::exception_ptr eptr = std::current_exception()); + + class SingleWriteMultipleReadLock { + public: + void readLock(); + void readUnlock(); + void writeLock(); + void writeUnlock(); + + private: + std::condition_variable _readerGate; + std::condition_variable _writerGate; + + std::mutex _mutex; + size_t _activeReaders = 0; + size_t _waitingWriters = 0; + size_t _activeWriters = 0; + }; + +} // namespace antlrcpp diff --git a/lib/antlr4/include/support/Declarations.h b/lib/antlr4/include/support/Declarations.h new file mode 100644 index 0000000..a355d9b --- /dev/null +++ b/lib/antlr4/include/support/Declarations.h @@ -0,0 +1,163 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +namespace antlr4 { + class ANTLRErrorListener; + class ANTLRErrorStrategy; + class ANTLRFileStream; + class ANTLRInputStream; + class BailErrorStrategy; + class BaseErrorListener; + class BufferedTokenStream; + class CharStream; + class CommonToken; + class CommonTokenFactory; + class CommonTokenStream; + class ConsoleErrorListener; + class DefaultErrorStrategy; + class DiagnosticErrorListener; + class EmptyStackException; + class FailedPredicateException; + class IllegalArgumentException; + class IllegalStateException; + class InputMismatchException; + class IntStream; + class InterpreterRuleContext; + class Lexer; + class LexerInterpreter; + class LexerNoViableAltException; + class ListTokenSource; + class NoSuchElementException; + class NoViableAltException; + class NullPointerException; + class ParseCancellationException; + class Parser; + class ParserInterpreter; + class ParserRuleContext; + class ProxyErrorListener; + class RecognitionException; + class Recognizer; + class RuleContext; + class Token; + template class TokenFactory; + class TokenSource; + class TokenStream; + class TokenStreamRewriter; + class UnbufferedCharStream; + class UnbufferedTokenStream; + class WritableToken; + + namespace misc { + class InterpreterDataReader; + class Interval; + class IntervalSet; + class MurmurHash; + class Utils; + class Predicate; + } + namespace atn { + class ATN; + class ATNConfig; + class ATNConfigSet; + class ATNDeserializationOptions; + class ATNDeserializer; + class ATNSerializer; + class ATNSimulator; + class ATNState; + enum class ATNType; + class AbstractPredicateTransition; + class ActionTransition; + class ArrayPredictionContext; + class AtomTransition; + class BasicBlockStartState; + class BasicState; + class BlockEndState; + class BlockStartState; + class DecisionState; + class EmptyPredictionContext; + class EpsilonTransition; + class LL1Analyzer; + class LexerAction; + class LexerActionExecutor; + class LexerATNConfig; + class LexerATNSimulator; + class LexerMoreAction; + class LexerPopModeAction; + class LexerSkipAction; + class LookaheadEventInfo; + class LoopEndState; + class NotSetTransition; + class OrderedATNConfigSet; + class ParseInfo; + class ParserATNSimulator; + class PlusBlockStartState; + class PlusLoopbackState; + class PrecedencePredicateTransition; + class PredicateTransition; + class PredictionContext; + enum class PredictionMode; + class PredictionModeClass; + class RangeTransition; + class RuleStartState; + class RuleStopState; + class RuleTransition; + class SemanticContext; + class SetTransition; + class SingletonPredictionContext; + class StarBlockStartState; + class StarLoopEntryState; + class StarLoopbackState; + class TokensStartState; + class Transition; + class WildcardTransition; + } + namespace dfa { + class DFA; + class DFASerializer; + class DFAState; + class LexerDFASerializer; + class Vocabulary; + } + namespace tree { + class AbstractParseTreeVisitor; + class ErrorNode; + class ErrorNodeImpl; + class ParseTree; + class ParseTreeListener; + template class ParseTreeProperty; + class ParseTreeVisitor; + class ParseTreeWalker; + class SyntaxTree; + class TerminalNode; + class TerminalNodeImpl; + class Tree; + class Trees; + + namespace pattern { + class Chunk; + class ParseTreeMatch; + class ParseTreePattern; + class ParseTreePatternMatcher; + class RuleTagToken; + class TagChunk; + class TextChunk; + class TokenTagToken; + } + + namespace xpath { + class XPath; + class XPathElement; + class XPathLexerErrorListener; + class XPathRuleAnywhereElement; + class XPathRuleElement; + class XPathTokenAnywhereElement; + class XPathTokenElement; + class XPathWildcardAnywhereElement; + class XPathWildcardElement; + } + } +} diff --git a/lib/antlr4/include/support/StringUtils.h b/lib/antlr4/include/support/StringUtils.h new file mode 100644 index 0000000..4971528 --- /dev/null +++ b/lib/antlr4/include/support/StringUtils.h @@ -0,0 +1,54 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlrcpp { + + // For all conversions utf8 <-> utf32. + // VS 2015 and VS 2017 have different bugs in std::codecvt_utf8 (VS 2013 works fine). +#if defined(_MSC_VER) && _MSC_VER >= 1900 && _MSC_VER < 2000 + typedef std::wstring_convert, __int32> UTF32Converter; +#else + typedef std::wstring_convert, char32_t> UTF32Converter; +#endif + + // The conversion functions fails in VS2017, so we explicitly use a workaround. + template + inline std::string utf32_to_utf8(T const& data) + { + // Don't make the converter static or we have to serialize access to it. + thread_local UTF32Converter converter; + + #if defined(_MSC_VER) && _MSC_VER >= 1900 && _MSC_VER < 2000 + auto p = reinterpret_cast(data.data()); + return converter.to_bytes(p, p + data.size()); + #else + return converter.to_bytes(data); + #endif + } + + inline UTF32String utf8_to_utf32(const char* first, const char* last) + { + thread_local UTF32Converter converter; + + #if defined(_MSC_VER) && _MSC_VER >= 1900 && _MSC_VER < 2000 + auto r = converter.from_bytes(first, last); + i32string s = reinterpret_cast(r.data()); + #else + std::u32string s = converter.from_bytes(first, last); + #endif + + return s; + } + + void replaceAll(std::string &str, std::string const& from, std::string const& to); + + // string <-> wstring conversion (UTF-16), e.g. for use with Window's wide APIs. + ANTLR4CPP_PUBLIC std::string ws2s(std::wstring const& wstr); + ANTLR4CPP_PUBLIC std::wstring s2ws(std::string const& str); +} diff --git a/lib/antlr4/include/support/guid.h b/lib/antlr4/include/support/guid.h new file mode 100644 index 0000000..b412497 --- /dev/null +++ b/lib/antlr4/include/support/guid.h @@ -0,0 +1,112 @@ +/* + The MIT License (MIT) + + Copyright (c) 2014 Graeme Hill (http://graemehill.ca) + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ +#pragma once + +#include +#include +#include +#include +#include +#include + +#ifdef GUID_ANDROID +#include +#endif + +// Class to represent a GUID/UUID. Each instance acts as a wrapper around a +// 16 byte value that can be passed around by value. It also supports +// conversion to string (via the stream operator <<) and conversion from a +// string via constructor. +class Guid +{ +public: + + // create a guid from vector of bytes + Guid(const std::vector &bytes); + + // create a guid from array of bytes + Guid(const unsigned char *bytes); + + // Create a guid from array of words. + Guid(const uint16_t *bytes, bool reverse); + + // create a guid from string + Guid(const std::string &fromString); + + // create empty guid + Guid(); + + // copy constructor + Guid(const Guid &other); + + // overload assignment operator + Guid &operator=(const Guid &other); + + // overload equality and inequality operator + bool operator==(const Guid &other) const; + bool operator!=(const Guid &other) const; + + const std::string toString() const; + std::vector::const_iterator begin() { return _bytes.begin(); } + std::vector::const_iterator end() { return _bytes.end(); } + std::vector::const_reverse_iterator rbegin() { return _bytes.rbegin(); } + std::vector::const_reverse_iterator rend() { return _bytes.rend(); } + + +private: + + // actual data + std::vector _bytes; + + // make the << operator a friend so it can access _bytes + friend std::ostream &operator<<(std::ostream &s, const Guid &guid); +}; + +// Class that can create new guids. The only reason this exists instead of +// just a global "newGuid" function is because some platforms will require +// that there is some attached context. In the case of android, we need to +// know what JNIEnv is being used to call back to Java, but the newGuid() +// function would no longer be cross-platform if we parameterized the android +// version. Instead, construction of the GuidGenerator may be different on +// each platform, but the use of newGuid is uniform. +class GuidGenerator +{ +public: +#ifdef GUID_ANDROID + GuidGenerator(JNIEnv *env); +#else + GuidGenerator() { } +#endif + + Guid newGuid(); + +#ifdef GUID_ANDROID +private: + JNIEnv *_env; + jclass _uuidClass; + jmethodID _newGuidMethod; + jmethodID _mostSignificantBitsMethod; + jmethodID _leastSignificantBitsMethod; +#endif +}; diff --git a/lib/antlr4/include/tree/AbstractParseTreeVisitor.h b/lib/antlr4/include/tree/AbstractParseTreeVisitor.h new file mode 100644 index 0000000..d21795b --- /dev/null +++ b/lib/antlr4/include/tree/AbstractParseTreeVisitor.h @@ -0,0 +1,128 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "tree/ParseTreeVisitor.h" + +namespace antlr4 { +namespace tree { + + class ANTLR4CPP_PUBLIC AbstractParseTreeVisitor : public ParseTreeVisitor { + public: + /// The default implementation calls on the + /// specified tree. + virtual antlrcpp::Any visit(ParseTree *tree) override { + return tree->accept(this); + } + + /** + *

      The default implementation initializes the aggregate result to + * {@link #defaultResult defaultResult()}. Before visiting each child, it + * calls {@link #shouldVisitNextChild shouldVisitNextChild}; if the result + * is {@code false} no more children are visited and the current aggregate + * result is returned. After visiting a child, the aggregate result is + * updated by calling {@link #aggregateResult aggregateResult} with the + * previous aggregate result and the result of visiting the child.

      + * + *

      The default implementation is not safe for use in visitors that modify + * the tree structure. Visitors that modify the tree should override this + * method to behave properly in respect to the specific algorithm in use.

      + */ + virtual antlrcpp::Any visitChildren(ParseTree *node) override { + antlrcpp::Any result = defaultResult(); + size_t n = node->children.size(); + for (size_t i = 0; i < n; i++) { + if (!shouldVisitNextChild(node, result)) { + break; + } + + antlrcpp::Any childResult = node->children[i]->accept(this); + result = aggregateResult(result, childResult); + } + + return result; + } + + /// The default implementation returns the result of + /// . + virtual antlrcpp::Any visitTerminal(TerminalNode * /*node*/) override { + return defaultResult(); + } + + /// The default implementation returns the result of + /// . + virtual antlrcpp::Any visitErrorNode(ErrorNode * /*node*/) override { + return defaultResult(); + } + + protected: + /// + /// Gets the default value returned by visitor methods. This value is + /// returned by the default implementations of + /// , . + /// The default implementation of + /// initializes its aggregate result to this value. + ///

      + /// The base implementation returns {@code null}. + ///

      + /// The default value returned by visitor methods. + virtual antlrcpp::Any defaultResult() { + return nullptr; // support isNotNull + } + + /// + /// Aggregates the results of visiting multiple children of a node. After + /// either all children are visited or returns + /// {@code false}, the aggregate value is returned as the result of + /// . + ///

      + /// The default implementation returns {@code nextResult}, meaning + /// will return the result of the last child visited + /// (or return the initial value if the node has no children). + ///

      + /// The previous aggregate value. In the default + /// implementation, the aggregate value is initialized to + /// , which is passed as the {@code aggregate} argument + /// to this method after the first child node is visited. + /// The result of the immediately preceeding call to visit + /// a child node. + /// + /// The updated aggregate result. + virtual antlrcpp::Any aggregateResult(antlrcpp::Any /*aggregate*/, const antlrcpp::Any &nextResult) { + return nextResult; + } + + /// + /// This method is called after visiting each child in + /// . This method is first called before the first + /// child is visited; at that point {@code currentResult} will be the initial + /// value (in the default implementation, the initial value is returned by a + /// call to . This method is not called after the last + /// child is visited. + ///

      + /// The default implementation always returns {@code true}, indicating that + /// {@code visitChildren} should only return after all children are visited. + /// One reason to override this method is to provide a "short circuit" + /// evaluation option for situations where the result of visiting a single + /// child has the potential to determine the result of the visit operation as + /// a whole. + ///

      + /// The whose children are currently being + /// visited. + /// The current aggregate result of the children visited + /// to the current point. + /// + /// {@code true} to continue visiting children. Otherwise return + /// {@code false} to stop visiting children and immediately return the + /// current aggregate result from . + virtual bool shouldVisitNextChild(ParseTree * /*node*/, const antlrcpp::Any &/*currentResult*/) { + return true; + } + + }; + +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/tree/ErrorNode.h b/lib/antlr4/include/tree/ErrorNode.h new file mode 100644 index 0000000..619f44d --- /dev/null +++ b/lib/antlr4/include/tree/ErrorNode.h @@ -0,0 +1,19 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "tree/TerminalNode.h" + +namespace antlr4 { +namespace tree { + + class ANTLR4CPP_PUBLIC ErrorNode : public virtual TerminalNode { + public: + ~ErrorNode() override; + }; + +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/tree/ErrorNodeImpl.h b/lib/antlr4/include/tree/ErrorNodeImpl.h new file mode 100644 index 0000000..b64b6f9 --- /dev/null +++ b/lib/antlr4/include/tree/ErrorNodeImpl.h @@ -0,0 +1,33 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "tree/ErrorNode.h" +#include "tree/TerminalNodeImpl.h" +#include "misc/Interval.h" + +#include "support/Any.h" + +namespace antlr4 { +namespace tree { + + /// + /// Represents a token that was consumed during resynchronization + /// rather than during a valid match operation. For example, + /// we will create this kind of a node during single token insertion + /// and deletion as well as during "consume until error recovery set" + /// upon no viable alternative exceptions. + /// + class ANTLR4CPP_PUBLIC ErrorNodeImpl : public virtual TerminalNodeImpl, public virtual ErrorNode { + public: + ErrorNodeImpl(Token *token); + ~ErrorNodeImpl() override; + + virtual antlrcpp::Any accept(ParseTreeVisitor *visitor) override; + }; + +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/tree/IterativeParseTreeWalker.h b/lib/antlr4/include/tree/IterativeParseTreeWalker.h new file mode 100644 index 0000000..8957d87 --- /dev/null +++ b/lib/antlr4/include/tree/IterativeParseTreeWalker.h @@ -0,0 +1,53 @@ +/* + * [The "BSD license"] + * Copyright (c) 2012 Terence Parr + * Copyright (c) 2012 Sam Harwell + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include "antlr4-common.h" + +#include "tree/ParseTreeWalker.h" + +namespace antlr4 { +namespace tree { + + class ParseTreeListener; + + /** + * An iterative (read: non-recursive) pre-order and post-order tree walker that + * doesn't use the thread stack but heap-based stacks. Makes it possible to + * process deeply nested parse trees. + */ + class ANTLR4CPP_PUBLIC IterativeParseTreeWalker : public ParseTreeWalker { + public: + virtual void walk(ParseTreeListener *listener, ParseTree *t) const override; + }; + +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/tree/ParseTree.h b/lib/antlr4/include/tree/ParseTree.h new file mode 100644 index 0000000..088aac3 --- /dev/null +++ b/lib/antlr4/include/tree/ParseTree.h @@ -0,0 +1,102 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "support/Any.h" + +namespace antlr4 { +namespace tree { + + /// An interface to access the tree of objects created + /// during a parse that makes the data structure look like a simple parse tree. + /// This node represents both internal nodes, rule invocations, + /// and leaf nodes, token matches. + /// + /// The payload is either a or a object. + // ml: This class unites 4 Java classes: RuleNode, ParseTree, SyntaxTree and Tree. + class ANTLR4CPP_PUBLIC ParseTree { + public: + ParseTree(); + ParseTree(ParseTree const&) = delete; + virtual ~ParseTree() {} + + ParseTree& operator=(ParseTree const&) = delete; + + /// The parent of this node. If the return value is null, then this + /// node is the root of the tree. + ParseTree *parent; + + /// If we are debugging or building a parse tree for a visitor, + /// we need to track all of the tokens and rule invocations associated + /// with this rule's context. This is empty for parsing w/o tree constr. + /// operation because we don't the need to track the details about + /// how we parse this rule. + // ml: memory is not managed here, but by the owning class. This is just for the structure. + std::vector children; + + /// Print out a whole tree, not just a node, in LISP format + /// {@code (root child1 .. childN)}. Print just a node if this is a leaf. + virtual std::string toStringTree(bool pretty = false) = 0; + virtual std::string toString() = 0; + + /// Specialize toStringTree so that it can print out more information + /// based upon the parser. + virtual std::string toStringTree(Parser *parser, bool pretty = false) = 0; + + virtual bool operator == (const ParseTree &other) const; + + /// The needs a double dispatch method. + // ml: This has been changed to use Any instead of a template parameter, to avoid the need of a virtual template function. + virtual antlrcpp::Any accept(ParseTreeVisitor *visitor) = 0; + + /// Return the combined text of all leaf nodes. Does not get any + /// off-channel tokens (if any) so won't return whitespace and + /// comments if they are sent to parser on hidden channel. + virtual std::string getText() = 0; + + /** + * Return an {@link Interval} indicating the index in the + * {@link TokenStream} of the first and last token associated with this + * subtree. If this node is a leaf, then the interval represents a single + * token and has interval i..i for token index i. + * + *

      An interval of i..i-1 indicates an empty interval at position + * i in the input stream, where 0 <= i <= the size of the input + * token stream. Currently, the code base can only have i=0..n-1 but + * in concept one could have an empty interval after EOF.

      + * + *

      If source interval is unknown, this returns {@link Interval#INVALID}.

      + * + *

      As a weird special case, the source interval for rules matched after + * EOF is unspecified.

      + */ + virtual misc::Interval getSourceInterval() = 0; + }; + + // A class to help managing ParseTree instances without the need of a shared_ptr. + class ANTLR4CPP_PUBLIC ParseTreeTracker { + public: + template + T* createInstance(Args&& ... args) { + static_assert(std::is_base_of::value, "Argument must be a parse tree type"); + T* result = new T(args...); + _allocated.push_back(result); + return result; + } + + void reset() { + for (auto entry : _allocated) + delete entry; + _allocated.clear(); + } + + private: + std::vector _allocated; + }; + + +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/tree/ParseTreeListener.h b/lib/antlr4/include/tree/ParseTreeListener.h new file mode 100644 index 0000000..6a7f96a --- /dev/null +++ b/lib/antlr4/include/tree/ParseTreeListener.h @@ -0,0 +1,39 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace tree { + + /** This interface describes the minimal core of methods triggered + * by {@link ParseTreeWalker}. E.g., + * + * ParseTreeWalker walker = new ParseTreeWalker(); + * walker.walk(myParseTreeListener, myParseTree); <-- triggers events in your listener + * + * If you want to trigger events in multiple listeners during a single + * tree walk, you can use the ParseTreeDispatcher object available at + * + * https://github.com/antlr/antlr4/issues/841 + */ + class ANTLR4CPP_PUBLIC ParseTreeListener { + public: + virtual ~ParseTreeListener(); + + virtual void visitTerminal(TerminalNode *node) = 0; + virtual void visitErrorNode(ErrorNode *node) = 0; + virtual void enterEveryRule(ParserRuleContext *ctx) = 0; + virtual void exitEveryRule(ParserRuleContext *ctx) = 0; + + bool operator == (const ParseTreeListener &other) { + return this == &other; + } + }; + +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/tree/ParseTreeProperty.h b/lib/antlr4/include/tree/ParseTreeProperty.h new file mode 100644 index 0000000..8669a10 --- /dev/null +++ b/lib/antlr4/include/tree/ParseTreeProperty.h @@ -0,0 +1,50 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace tree { + + /// + /// Associate a property with a parse tree node. Useful with parse tree listeners + /// that need to associate values with particular tree nodes, kind of like + /// specifying a return value for the listener event method that visited a + /// particular node. Example: + /// + ///
      +  /// ParseTreeProperty<Integer> values = new ParseTreeProperty<Integer>();
      +  /// values.put(tree, 36);
      +  /// int x = values.get(tree);
      +  /// values.removeFrom(tree);
      +  /// 
      + /// + /// You would make one decl (values here) in the listener and use lots of times + /// in your event methods. + ///
      + template + class ANTLR4CPP_PUBLIC ParseTreeProperty { + public: + virtual ~ParseTreeProperty() {} + virtual V get(ParseTree *node) { + return _annotations[node]; + } + virtual void put(ParseTree *node, V value) { + _annotations[node] = value; + } + virtual V removeFrom(ParseTree *node) { + auto value = _annotations[node]; + _annotations.erase(node); + return value; + } + + protected: + std::map _annotations; + }; + +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/tree/ParseTreeVisitor.h b/lib/antlr4/include/tree/ParseTreeVisitor.h new file mode 100644 index 0000000..5a08599 --- /dev/null +++ b/lib/antlr4/include/tree/ParseTreeVisitor.h @@ -0,0 +1,57 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "support/Any.h" + +namespace antlr4 { +namespace tree { + + /// + /// This interface defines the basic notion of a parse tree visitor. Generated + /// visitors implement this interface and the {@code XVisitor} interface for + /// grammar {@code X}. + /// + /// @param The return type of the visit operation. Use for + /// operations with no return type. + // ml: no template parameter here, to avoid the need for virtual template functions. Instead we have our Any class. + class ANTLR4CPP_PUBLIC ParseTreeVisitor { + public: + virtual ~ParseTreeVisitor(); + + /// + /// Visit a parse tree, and return a user-defined result of the operation. + /// + /// The to visit. + /// The result of visiting the parse tree. + virtual antlrcpp::Any visit(ParseTree *tree) = 0; + + /// + /// Visit the children of a node, and return a user-defined result of the + /// operation. + /// + /// The whose children should be visited. + /// The result of visiting the children of the node. + virtual antlrcpp::Any visitChildren(ParseTree *node) = 0; + + /// + /// Visit a terminal node, and return a user-defined result of the operation. + /// + /// The to visit. + /// The result of visiting the node. + virtual antlrcpp::Any visitTerminal(TerminalNode *node) = 0; + + /// + /// Visit an error node, and return a user-defined result of the operation. + /// + /// The to visit. + /// The result of visiting the node. + virtual antlrcpp::Any visitErrorNode(ErrorNode *node) = 0; + + }; + +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/tree/ParseTreeWalker.h b/lib/antlr4/include/tree/ParseTreeWalker.h new file mode 100644 index 0000000..ca3e241 --- /dev/null +++ b/lib/antlr4/include/tree/ParseTreeWalker.h @@ -0,0 +1,31 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace tree { + + class ANTLR4CPP_PUBLIC ParseTreeWalker { + public: + static ParseTreeWalker &DEFAULT; + + virtual ~ParseTreeWalker(); + + virtual void walk(ParseTreeListener *listener, ParseTree *t) const; + + protected: + /// The discovery of a rule node, involves sending two events: the generic + /// and a + /// -specific event. First we trigger the generic and then + /// the rule specific. We do them in reverse order upon finishing the node. + virtual void enterRule(ParseTreeListener *listener, ParseTree *r) const; + virtual void exitRule(ParseTreeListener *listener, ParseTree *r) const; + }; + +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/tree/TerminalNode.h b/lib/antlr4/include/tree/TerminalNode.h new file mode 100644 index 0000000..7108f70 --- /dev/null +++ b/lib/antlr4/include/tree/TerminalNode.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "tree/ParseTree.h" + +namespace antlr4 { +namespace tree { + + class ANTLR4CPP_PUBLIC TerminalNode : public ParseTree { + public: + ~TerminalNode() override; + + virtual Token* getSymbol() = 0; + + /** Set the parent for this leaf node. + * + * Technically, this is not backward compatible as it changes + * the interface but no one was able to create custom + * TerminalNodes anyway so I'm adding as it improves internal + * code quality. + * + * @since 4.7 + */ + virtual void setParent(RuleContext *parent) = 0; + }; + +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/tree/TerminalNodeImpl.h b/lib/antlr4/include/tree/TerminalNodeImpl.h new file mode 100644 index 0000000..6f65d82 --- /dev/null +++ b/lib/antlr4/include/tree/TerminalNodeImpl.h @@ -0,0 +1,33 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "tree/TerminalNode.h" + +namespace antlr4 { +namespace tree { + + class ANTLR4CPP_PUBLIC TerminalNodeImpl : public virtual TerminalNode { + public: + Token *symbol; + + TerminalNodeImpl(Token *symbol); + + virtual Token* getSymbol() override; + virtual void setParent(RuleContext *parent) override; + virtual misc::Interval getSourceInterval() override; + + virtual antlrcpp::Any accept(ParseTreeVisitor *visitor) override; + + virtual std::string getText() override; + virtual std::string toStringTree(Parser *parser, bool pretty = false) override; + virtual std::string toString() override; + virtual std::string toStringTree(bool pretty = false) override; + + }; + +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/tree/Trees.h b/lib/antlr4/include/tree/Trees.h new file mode 100644 index 0000000..d9d0462 --- /dev/null +++ b/lib/antlr4/include/tree/Trees.h @@ -0,0 +1,78 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "tree/TerminalNode.h" +#include "ParserRuleContext.h" +#include "Recognizer.h" + +namespace antlr4 { +namespace tree { + + /// A set of utility routines useful for all kinds of ANTLR trees. + class ANTLR4CPP_PUBLIC Trees { + public: + /// Print out a whole tree in LISP form. getNodeText is used on the + /// node payloads to get the text for the nodes. Detect + /// parse trees and extract data appropriately. + static std::string toStringTree(ParseTree *t, bool pretty = false); + + /// Print out a whole tree in LISP form. getNodeText is used on the + /// node payloads to get the text for the nodes. Detect + /// parse trees and extract data appropriately. + static std::string toStringTree(ParseTree *t, Parser *recog, bool pretty = false); + + /// Print out a whole tree in LISP form. getNodeText is used on the + /// node payloads to get the text for the nodes. Detect + /// parse trees and extract data appropriately. + static std::string toStringTree(ParseTree *t, const std::vector &ruleNames, bool pretty = false); + static std::string getNodeText(ParseTree *t, Parser *recog); + static std::string getNodeText(ParseTree *t, const std::vector &ruleNames); + + /// Return a list of all ancestors of this node. The first node of + /// list is the root and the last is the parent of this node. + static std::vector getAncestors(ParseTree *t); + + /** Return true if t is u's parent or a node on path to root from u. + * Use == not equals(). + * + * @since 4.5.1 + */ + static bool isAncestorOf(ParseTree *t, ParseTree *u); + static std::vector findAllTokenNodes(ParseTree *t, size_t ttype); + static std::vector findAllRuleNodes(ParseTree *t, size_t ruleIndex); + static std::vector findAllNodes(ParseTree *t, size_t index, bool findTokens); + + /** Get all descendents; includes t itself. + * + * @since 4.5.1 + */ + static std::vector getDescendants(ParseTree *t); + + /** @deprecated */ + static std::vector descendants(ParseTree *t); + + /** Find smallest subtree of t enclosing range startTokenIndex..stopTokenIndex + * inclusively using postorder traversal. Recursive depth-first-search. + * + * @since 4.5.1 + */ + static ParserRuleContext* getRootOfSubtreeEnclosingRegion(ParseTree *t, + size_t startTokenIndex, // inclusive + size_t stopTokenIndex); // inclusive + + /** Return first node satisfying the pred + * + * @since 4.5.1 + */ + static ParseTree* findNodeSuchThat(ParseTree *t, Ref const& pred); + + private: + Trees(); + }; + +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/tree/pattern/Chunk.h b/lib/antlr4/include/tree/pattern/Chunk.h new file mode 100644 index 0000000..42e7838 --- /dev/null +++ b/lib/antlr4/include/tree/pattern/Chunk.h @@ -0,0 +1,44 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace tree { +namespace pattern { + + /// + /// A chunk is either a token tag, a rule tag, or a span of literal text within a + /// tree pattern. + ///

      + /// The method returns a list of + /// chunks in preparation for creating a token stream by + /// . From there, we get a parse + /// tree from with . These + /// chunks are converted to , , or the + /// regular tokens of the text surrounding the tags. + ///

      + class ANTLR4CPP_PUBLIC Chunk { + public: + Chunk() = default; + Chunk(Chunk const&) = default; + virtual ~Chunk(); + + Chunk& operator=(Chunk const&) = default; + + /// This method returns a text representation of the tag chunk. Labeled tags + /// are returned in the form {@code label:tag}, and unlabeled tags are + /// returned as just the tag name. + virtual std::string toString() { + std::string str; + return str; + } + }; + +} // namespace pattern +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/tree/pattern/ParseTreeMatch.h b/lib/antlr4/include/tree/pattern/ParseTreeMatch.h new file mode 100644 index 0000000..eefde46 --- /dev/null +++ b/lib/antlr4/include/tree/pattern/ParseTreeMatch.h @@ -0,0 +1,132 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace tree { +namespace pattern { + + /// Represents the result of matching a ParseTree against a tree pattern. + class ANTLR4CPP_PUBLIC ParseTreeMatch { + private: + /// This is the backing field for getTree(). + ParseTree *_tree; + + /// This is the backing field for getPattern(). + const ParseTreePattern &_pattern; + + /// This is the backing field for getLabels(). + std::map> _labels; + + /// This is the backing field for getMismatchedNode(). + ParseTree *_mismatchedNode; + + public: + /// + /// Constructs a new instance of from the specified + /// parse tree and pattern. + /// + /// The parse tree to match against the pattern. + /// The parse tree pattern. + /// A mapping from label names to collections of + /// objects located by the tree pattern matching process. + /// The first node which failed to match the tree + /// pattern during the matching process. + /// + /// if {@code tree} is {@code null} + /// if {@code pattern} is {@code null} + /// if {@code labels} is {@code null} + ParseTreeMatch(ParseTree *tree, ParseTreePattern const& pattern, + const std::map> &labels, ParseTree *mismatchedNode); + ParseTreeMatch(ParseTreeMatch const&) = default; + virtual ~ParseTreeMatch(); + + /// + /// Get the last node associated with a specific {@code label}. + ///

      + /// For example, for pattern {@code }, {@code get("id")} returns the + /// node matched for that {@code ID}. If more than one node + /// matched the specified label, only the last is returned. If there is + /// no node associated with the label, this returns {@code null}. + ///

      + /// Pattern tags like {@code } and {@code } without labels are + /// considered to be labeled with {@code ID} and {@code expr}, respectively. + ///

      + /// The label to check. + /// + /// The last to match a tag with the specified + /// label, or {@code null} if no parse tree matched a tag with the label. + virtual ParseTree* get(const std::string &label); + + /// + /// Return all nodes matching a rule or token tag with the specified label. + ///

      + /// If the {@code label} is the name of a parser rule or token in the + /// grammar, the resulting list will contain both the parse trees matching + /// rule or tags explicitly labeled with the label and the complete set of + /// parse trees matching the labeled and unlabeled tags in the pattern for + /// the parser rule or token. For example, if {@code label} is {@code "foo"}, + /// the result will contain all of the following. + /// + ///

        + ///
      • Parse tree nodes matching tags of the form {@code } and + /// {@code }.
      • + ///
      • Parse tree nodes matching tags of the form {@code }.
      • + ///
      • Parse tree nodes matching tags of the form {@code }.
      • + ///
      + ///
      + /// The label. + /// + /// A collection of all nodes matching tags with + /// the specified {@code label}. If no nodes matched the label, an empty list + /// is returned. + virtual std::vector getAll(const std::string &label); + + /// + /// Return a mapping from label → [list of nodes]. + ///

      + /// The map includes special entries corresponding to the names of rules and + /// tokens referenced in tags in the original pattern. For additional + /// information, see the description of . + ///

      + /// A mapping from labels to parse tree nodes. If the parse tree + /// pattern did not contain any rule or token tags, this map will be empty. + virtual std::map>& getLabels(); + + /// + /// Get the node at which we first detected a mismatch. + /// + /// the node at which we first detected a mismatch, or {@code null} + /// if the match was successful. + virtual ParseTree* getMismatchedNode(); + + /// + /// Gets a value indicating whether the match operation succeeded. + /// + /// {@code true} if the match operation succeeded; otherwise, + /// {@code false}. + virtual bool succeeded(); + + /// + /// Get the tree pattern we are matching against. + /// + /// The tree pattern we are matching against. + virtual const ParseTreePattern& getPattern(); + + /// + /// Get the parse tree we are trying to match to a pattern. + /// + /// The we are trying to match to a pattern. + virtual ParseTree* getTree(); + + virtual std::string toString(); + }; + +} // namespace pattern +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/tree/pattern/ParseTreePattern.h b/lib/antlr4/include/tree/pattern/ParseTreePattern.h new file mode 100644 index 0000000..d5b86ff --- /dev/null +++ b/lib/antlr4/include/tree/pattern/ParseTreePattern.h @@ -0,0 +1,105 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace tree { +namespace pattern { + + /// + /// A pattern like {@code = ;} converted to a by + /// . + /// + class ANTLR4CPP_PUBLIC ParseTreePattern { + public: + /// + /// Construct a new instance of the class. + /// + /// The which created this + /// tree pattern. + /// The tree pattern in concrete syntax form. + /// The parser rule which serves as the root of the + /// tree pattern. + /// The tree pattern in form. + ParseTreePattern(ParseTreePatternMatcher *matcher, const std::string &pattern, int patternRuleIndex, + ParseTree *patternTree); + ParseTreePattern(ParseTreePattern const&) = default; + virtual ~ParseTreePattern(); + + /// + /// Match a specific parse tree against this tree pattern. + /// + /// The parse tree to match against this tree pattern. + /// A object describing the result of the + /// match operation. The method can be + /// used to determine whether or not the match was successful. + virtual ParseTreeMatch match(ParseTree *tree); + + /// + /// Determine whether or not a parse tree matches this tree pattern. + /// + /// The parse tree to match against this tree pattern. + /// {@code true} if {@code tree} is a match for the current tree + /// pattern; otherwise, {@code false}. + virtual bool matches(ParseTree *tree); + + /// Find all nodes using XPath and then try to match those subtrees against + /// this tree pattern. + /// @param tree The ParseTree to match against this pattern. + /// @param xpath An expression matching the nodes + /// + /// @returns A collection of ParseTreeMatch objects describing the + /// successful matches. Unsuccessful matches are omitted from the result, + /// regardless of the reason for the failure. + virtual std::vector findAll(ParseTree *tree, const std::string &xpath); + + /// + /// Get the which created this tree pattern. + /// + /// The which created this tree + /// pattern. + virtual ParseTreePatternMatcher *getMatcher() const; + + /// + /// Get the tree pattern in concrete syntax form. + /// + /// The tree pattern in concrete syntax form. + virtual std::string getPattern() const; + + /// + /// Get the parser rule which serves as the outermost rule for the tree + /// pattern. + /// + /// The parser rule which serves as the outermost rule for the tree + /// pattern. + virtual int getPatternRuleIndex() const; + + /// + /// Get the tree pattern as a . The rule and token tags from + /// the pattern are present in the parse tree as terminal nodes with a symbol + /// of type or . + /// + /// The tree pattern as a . + virtual ParseTree* getPatternTree() const; + + private: + const int patternRuleIndex; + + /// This is the backing field for . + const std::string _pattern; + + /// This is the backing field for . + ParseTree *_patternTree; + + /// This is the backing field for . + ParseTreePatternMatcher *const _matcher; + }; + +} // namespace pattern +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/tree/pattern/ParseTreePatternMatcher.h b/lib/antlr4/include/tree/pattern/ParseTreePatternMatcher.h new file mode 100644 index 0000000..e77c7bc --- /dev/null +++ b/lib/antlr4/include/tree/pattern/ParseTreePatternMatcher.h @@ -0,0 +1,185 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Exceptions.h" + +namespace antlr4 { +namespace tree { +namespace pattern { + + /// + /// A tree pattern matching mechanism for ANTLR s. + ///

      + /// Patterns are strings of source input text with special tags representing + /// token or rule references such as: + ///

      + /// {@code = ;} + ///

      + /// Given a pattern start rule such as {@code statement}, this object constructs + /// a with placeholders for the {@code ID} and {@code expr} + /// subtree. Then the routines can compare an actual + /// from a parse with this pattern. Tag {@code } matches + /// any {@code ID} token and tag {@code } references the result of the + /// {@code expr} rule (generally an instance of {@code ExprContext}. + ///

      + /// Pattern {@code x = 0;} is a similar pattern that matches the same pattern + /// except that it requires the identifier to be {@code x} and the expression to + /// be {@code 0}. + ///

      + /// The routines return {@code true} or {@code false} based + /// upon a match for the tree rooted at the parameter sent in. The + /// routines return a object that + /// contains the parse tree, the parse tree pattern, and a map from tag name to + /// matched nodes (more below). A subtree that fails to match, returns with + /// set to the first tree node that did not + /// match. + ///

      + /// For efficiency, you can compile a tree pattern in string form to a + /// object. + ///

      + /// See {@code TestParseTreeMatcher} for lots of examples. + /// has two static helper methods: + /// and that + /// are easy to use but not super efficient because they create new + /// objects each time and have to compile the + /// pattern in string form before using it. + ///

      + /// The lexer and parser that you pass into the + /// constructor are used to parse the pattern in string form. The lexer converts + /// the {@code = ;} into a sequence of four tokens (assuming lexer + /// throws out whitespace or puts it on a hidden channel). Be aware that the + /// input stream is reset for the lexer (but not the parser; a + /// is created to parse the input.). Any user-defined + /// fields you have put into the lexer might get changed when this mechanism asks + /// it to scan the pattern string. + ///

      + /// Normally a parser does not accept token {@code } as a valid + /// {@code expr} but, from the parser passed in, we create a special version of + /// the underlying grammar representation (an ) that allows imaginary + /// tokens representing rules ({@code }) to match entire rules. We call + /// these bypass alternatives. + ///

      + /// Delimiters are {@code <} and {@code >}, with {@code \} as the escape string + /// by default, but you can set them to whatever you want using + /// . You must escape both start and stop strings + /// {@code \<} and {@code \>}. + ///

      + class ANTLR4CPP_PUBLIC ParseTreePatternMatcher { + public: + class CannotInvokeStartRule : public RuntimeException { + public: + CannotInvokeStartRule(const RuntimeException &e); + ~CannotInvokeStartRule(); + }; + + // Fixes https://github.com/antlr/antlr4/issues/413 + // "Tree pattern compilation doesn't check for a complete parse" + class StartRuleDoesNotConsumeFullPattern : public RuntimeException { + public: + StartRuleDoesNotConsumeFullPattern() = default; + StartRuleDoesNotConsumeFullPattern(StartRuleDoesNotConsumeFullPattern const&) = default; + ~StartRuleDoesNotConsumeFullPattern(); + + StartRuleDoesNotConsumeFullPattern& operator=(StartRuleDoesNotConsumeFullPattern const&) = default; + }; + + /// Constructs a or from a and + /// object. The lexer input stream is altered for tokenizing + /// the tree patterns. The parser is used as a convenient mechanism to get + /// the grammar name, plus token, rule names. + ParseTreePatternMatcher(Lexer *lexer, Parser *parser); + virtual ~ParseTreePatternMatcher(); + + /// + /// Set the delimiters used for marking rule and token tags within concrete + /// syntax used by the tree pattern parser. + /// + /// The start delimiter. + /// The stop delimiter. + /// The escape sequence to use for escaping a start or stop delimiter. + /// + /// if {@code start} is {@code null} or empty. + /// if {@code stop} is {@code null} or empty. + virtual void setDelimiters(const std::string &start, const std::string &stop, const std::string &escapeLeft); + + /// + /// Does {@code pattern} matched as rule {@code patternRuleIndex} match {@code tree}? + virtual bool matches(ParseTree *tree, const std::string &pattern, int patternRuleIndex); + + /// + /// Does {@code pattern} matched as rule patternRuleIndex match tree? Pass in a + /// compiled pattern instead of a string representation of a tree pattern. + /// + virtual bool matches(ParseTree *tree, const ParseTreePattern &pattern); + + /// + /// Compare {@code pattern} matched as rule {@code patternRuleIndex} against + /// {@code tree} and return a object that contains the + /// matched elements, or the node at which the match failed. + /// + virtual ParseTreeMatch match(ParseTree *tree, const std::string &pattern, int patternRuleIndex); + + /// + /// Compare {@code pattern} matched against {@code tree} and return a + /// object that contains the matched elements, or the + /// node at which the match failed. Pass in a compiled pattern instead of a + /// string representation of a tree pattern. + /// + virtual ParseTreeMatch match(ParseTree *tree, const ParseTreePattern &pattern); + + /// + /// For repeated use of a tree pattern, compile it to a + /// using this method. + /// + virtual ParseTreePattern compile(const std::string &pattern, int patternRuleIndex); + + /// + /// Used to convert the tree pattern string into a series of tokens. The + /// input stream is reset. + /// + virtual Lexer* getLexer(); + + /// + /// Used to collect to the grammar file name, token names, rule names for + /// used to parse the pattern into a parse tree. + /// + virtual Parser* getParser(); + + // ---- SUPPORT CODE ---- + + virtual std::vector> tokenize(const std::string &pattern); + + /// Split " = ;" into 4 chunks for tokenizing by tokenize(). + virtual std::vector split(const std::string &pattern); + + protected: + std::string _start; + std::string _stop; + std::string _escape; // e.g., \< and \> must escape BOTH! + + /// Recursively walk {@code tree} against {@code patternTree}, filling + /// {@code match.}. + /// + /// the first node encountered in {@code tree} which does not match + /// a corresponding node in {@code patternTree}, or {@code null} if the match + /// was successful. The specific node returned depends on the matching + /// algorithm used by the implementation, and may be overridden. + virtual ParseTree* matchImpl(ParseTree *tree, ParseTree *patternTree, std::map> &labels); + + /// Is t subtree? + virtual RuleTagToken* getRuleTagToken(ParseTree *t); + + private: + Lexer *_lexer; + Parser *_parser; + + void InitializeInstanceFields(); + }; + +} // namespace pattern +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/tree/pattern/RuleTagToken.h b/lib/antlr4/include/tree/pattern/RuleTagToken.h new file mode 100644 index 0000000..368ae41 --- /dev/null +++ b/lib/antlr4/include/tree/pattern/RuleTagToken.h @@ -0,0 +1,117 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Token.h" + +namespace antlr4 { +namespace tree { +namespace pattern { + + /// + /// A object representing an entire subtree matched by a parser + /// rule; e.g., {@code }. These tokens are created for + /// chunks where the tag corresponds to a parser rule. + /// + class ANTLR4CPP_PUBLIC RuleTagToken : public Token { + /// + /// This is the backing field for . + /// + private: + const std::string ruleName; + + /// The token type for the current token. This is the token type assigned to + /// the bypass alternative for the rule during ATN deserialization. + const size_t bypassTokenType; + + /// This is the backing field for . + const std::string label; + + public: + /// + /// Constructs a new instance of with the specified rule + /// name and bypass token type and no label. + /// + /// The name of the parser rule this rule tag matches. + /// The bypass token type assigned to the parser rule. + /// + /// if {@code ruleName} is {@code null} + /// or empty. + RuleTagToken(const std::string &ruleName, int bypassTokenType); //this(ruleName, bypassTokenType, nullptr); + + /// + /// Constructs a new instance of with the specified rule + /// name, bypass token type, and label. + /// + /// The name of the parser rule this rule tag matches. + /// The bypass token type assigned to the parser rule. + /// The label associated with the rule tag, or {@code null} if + /// the rule tag is unlabeled. + /// + /// if {@code ruleName} is {@code null} + /// or empty. + RuleTagToken(const std::string &ruleName, size_t bypassTokenType, const std::string &label); + + /// + /// Gets the name of the rule associated with this rule tag. + /// + /// The name of the parser rule associated with this rule tag. + std::string getRuleName() const; + + /// + /// Gets the label associated with the rule tag. + /// + /// The name of the label associated with the rule tag, or + /// {@code null} if this is an unlabeled rule tag. + std::string getLabel() const; + + /// + /// {@inheritDoc} + ///

      + /// Rule tag tokens are always placed on the . + ///

      + virtual size_t getChannel() const override; + + /// + /// {@inheritDoc} + ///

      + /// This method returns the rule tag formatted with {@code <} and {@code >} + /// delimiters. + ///

      + virtual std::string getText() const override; + + /// Rule tag tokens have types assigned according to the rule bypass + /// transitions created during ATN deserialization. + virtual size_t getType() const override; + + /// The implementation for always returns 0. + virtual size_t getLine() const override; + + /// The implementation for always returns INVALID_INDEX. + virtual size_t getCharPositionInLine() const override; + + /// The implementation for always returns INVALID_INDEX. + virtual size_t getTokenIndex() const override; + + /// The implementation for always returns INVALID_INDEX. + virtual size_t getStartIndex() const override; + + /// The implementation for always returns INVALID_INDEX. + virtual size_t getStopIndex() const override; + + /// The implementation for always returns {@code null}. + virtual TokenSource *getTokenSource() const override; + + /// The implementation for always returns {@code null}. + virtual CharStream *getInputStream() const override; + + /// The implementation for returns a string of the form {@code ruleName:bypassTokenType}. + virtual std::string toString() const override; + }; + +} // namespace pattern +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/tree/pattern/TagChunk.h b/lib/antlr4/include/tree/pattern/TagChunk.h new file mode 100644 index 0000000..3d0c9f8 --- /dev/null +++ b/lib/antlr4/include/tree/pattern/TagChunk.h @@ -0,0 +1,86 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Chunk.h" + +namespace antlr4 { +namespace tree { +namespace pattern { + + /// + /// Represents a placeholder tag in a tree pattern. A tag can have any of the + /// following forms. + /// + ///
        + ///
      • {@code expr}: An unlabeled placeholder for a parser rule {@code expr}.
      • + ///
      • {@code ID}: An unlabeled placeholder for a token of type {@code ID}.
      • + ///
      • {@code e:expr}: A labeled placeholder for a parser rule {@code expr}.
      • + ///
      • {@code id:ID}: A labeled placeholder for a token of type {@code ID}.
      • + ///
      + /// + /// This class does not perform any validation on the tag or label names aside + /// from ensuring that the tag is a non-null, non-empty string. + ///
      + class ANTLR4CPP_PUBLIC TagChunk : public Chunk { + public: + /// + /// Construct a new instance of using the specified tag and + /// no label. + /// + /// The tag, which should be the name of a parser rule or token + /// type. + /// + /// if {@code tag} is {@code null} or + /// empty. + TagChunk(const std::string &tag); + virtual ~TagChunk(); + + /// + /// Construct a new instance of using the specified label + /// and tag. + /// + /// The label for the tag. If this is {@code null}, the + /// represents an unlabeled tag. + /// The tag, which should be the name of a parser rule or token + /// type. + /// + /// if {@code tag} is {@code null} or + /// empty. + TagChunk(const std::string &label, const std::string &tag); + + /// + /// Get the tag for this chunk. + /// + /// The tag for the chunk. + std::string getTag(); + + /// + /// Get the label, if any, assigned to this chunk. + /// + /// The label assigned to this chunk, or {@code null} if no label is + /// assigned to the chunk. + std::string getLabel(); + + /// + /// This method returns a text representation of the tag chunk. Labeled tags + /// are returned in the form {@code label:tag}, and unlabeled tags are + /// returned as just the tag name. + /// + virtual std::string toString() override; + + private: + /// This is the backing field for . + const std::string _tag; + /// + /// This is the backing field for . + /// + const std::string _label; + }; + +} // namespace pattern +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/tree/pattern/TextChunk.h b/lib/antlr4/include/tree/pattern/TextChunk.h new file mode 100644 index 0000000..1cbc0dd --- /dev/null +++ b/lib/antlr4/include/tree/pattern/TextChunk.h @@ -0,0 +1,51 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Chunk.h" + +namespace antlr4 { +namespace tree { +namespace pattern { + + /// + /// Represents a span of raw text (concrete syntax) between tags in a tree + /// pattern string. + /// + class ANTLR4CPP_PUBLIC TextChunk : public Chunk { + private: + /// + /// This is the backing field for . + /// + const std::string text; + + /// + /// Constructs a new instance of with the specified text. + /// + /// The text of this chunk. + /// if {@code text} is {@code null}. + public: + TextChunk(const std::string &text); + virtual ~TextChunk(); + + /// + /// Gets the raw text of this chunk. + /// + /// The text of the chunk. + std::string getText(); + + /// + /// {@inheritDoc} + ///

      + /// The implementation for returns the result of + /// in single quotes. + ///

      + virtual std::string toString() override; + }; + +} // namespace pattern +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/tree/pattern/TokenTagToken.h b/lib/antlr4/include/tree/pattern/TokenTagToken.h new file mode 100644 index 0000000..9013fb8 --- /dev/null +++ b/lib/antlr4/include/tree/pattern/TokenTagToken.h @@ -0,0 +1,80 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "CommonToken.h" + +namespace antlr4 { +namespace tree { +namespace pattern { + + /// + /// A object representing a token of a particular type; e.g., + /// {@code }. These tokens are created for chunks where the + /// tag corresponds to a lexer rule or token type. + /// + class ANTLR4CPP_PUBLIC TokenTagToken : public CommonToken { + /// + /// This is the backing field for . + /// + private: + const std::string tokenName; + /// + /// This is the backing field for . + /// + const std::string label; + + /// + /// Constructs a new instance of for an unlabeled tag + /// with the specified token name and type. + /// + /// The token name. + /// The token type. + public: + TokenTagToken(const std::string &tokenName, int type); //this(tokenName, type, nullptr); + + /// + /// Constructs a new instance of with the specified + /// token name, type, and label. + /// + /// The token name. + /// The token type. + /// The label associated with the token tag, or {@code null} if + /// the token tag is unlabeled. + TokenTagToken(const std::string &tokenName, int type, const std::string &label); + + /// + /// Gets the token name. + /// The token name. + std::string getTokenName() const; + + /// + /// Gets the label associated with the rule tag. + /// + /// The name of the label associated with the rule tag, or + /// {@code null} if this is an unlabeled rule tag. + std::string getLabel() const; + + /// + /// {@inheritDoc} + ///

      + /// The implementation for returns the token tag + /// formatted with {@code <} and {@code >} delimiters. + ///

      + virtual std::string getText() const override; + + /// + /// {@inheritDoc} + ///

      + /// The implementation for returns a string of the form + /// {@code tokenName:type}. + ///

      + virtual std::string toString() const override; + }; + +} // namespace pattern +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/tree/xpath/XPath.h b/lib/antlr4/include/tree/xpath/XPath.h new file mode 100644 index 0000000..e38d482 --- /dev/null +++ b/lib/antlr4/include/tree/xpath/XPath.h @@ -0,0 +1,86 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace tree { +namespace xpath { + + /// Represent a subset of XPath XML path syntax for use in identifying nodes in + /// parse trees. + /// + /// + /// Split path into words and separators {@code /} and {@code //} via ANTLR + /// itself then walk path elements from left to right. At each separator-word + /// pair, find set of nodes. Next stage uses those as work list. + /// + /// + /// The basic interface is + /// {@code (tree, pathString, parser)}. + /// But that is just shorthand for: + /// + ///
      +  ///  p = new (parser, pathString);
      +  /// return p.(tree);
      +  /// 
      + /// + /// + /// See {@code org.antlr.v4.test.TestXPath} for descriptions. In short, this + /// allows operators: + /// + ///
      + ///
      /
      root
      + ///
      //
      anywhere
      + ///
      !
      invert; this must appear directly after root or anywhere + /// operator
      + ///
      + /// + /// + /// and path elements: + /// + ///
      + ///
      ID
      token name
      + ///
      'string'
      any string literal token from the grammar
      + ///
      expr
      rule name
      + ///
      *
      wildcard matching any node
      + ///
      + /// + /// + /// Whitespace is not allowed. + + class ANTLR4CPP_PUBLIC XPath { + public: + static const std::string WILDCARD; // word not operator/separator + static const std::string NOT; // word for invert operator + + XPath(Parser *parser, const std::string &path); + virtual ~XPath() {} + + // TODO: check for invalid token/rule names, bad syntax + virtual std::vector> split(const std::string &path); + + static std::vector findAll(ParseTree *tree, std::string const& xpath, Parser *parser); + + /// Return a list of all nodes starting at {@code t} as root that satisfy the + /// path. The root {@code /} is relative to the node passed to + /// . + virtual std::vector evaluate(ParseTree *t); + + protected: + std::string _path; + Parser *_parser; + + /// Convert word like {@code *} or {@code ID} or {@code expr} to a path + /// element. {@code anywhere} is {@code true} if {@code //} precedes the + /// word. + virtual std::unique_ptr getXPathElement(Token *wordToken, bool anywhere); + }; + +} // namespace xpath +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/tree/xpath/XPathElement.h b/lib/antlr4/include/tree/xpath/XPathElement.h new file mode 100644 index 0000000..f339117 --- /dev/null +++ b/lib/antlr4/include/tree/xpath/XPathElement.h @@ -0,0 +1,40 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace tree { + class ParseTree; + +namespace xpath { + + class ANTLR4CPP_PUBLIC XPathElement { + public: + /// Construct element like {@code /ID} or {@code ID} or {@code /*} etc... + /// op is null if just node + XPathElement(const std::string &nodeName); + XPathElement(XPathElement const&) = default; + virtual ~XPathElement(); + + XPathElement& operator=(XPathElement const&) = default; + + /// Given tree rooted at {@code t} return all nodes matched by this path + /// element. + virtual std::vector evaluate(ParseTree *t); + virtual std::string toString() const; + + void setInvert(bool value); + + protected: + std::string _nodeName; + bool _invert = false; + }; + +} // namespace xpath +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/tree/xpath/XPathLexer.h b/lib/antlr4/include/tree/xpath/XPathLexer.h new file mode 100644 index 0000000..ca471c9 --- /dev/null +++ b/lib/antlr4/include/tree/xpath/XPathLexer.h @@ -0,0 +1,56 @@ +#pragma once + + +#include "antlr4-runtime.h" + + + + +class XPathLexer : public antlr4::Lexer { +public: + enum { + TOKEN_REF = 1, RULE_REF = 2, ANYWHERE = 3, ROOT = 4, WILDCARD = 5, BANG = 6, + ID = 7, STRING = 8 + }; + + XPathLexer(antlr4::CharStream *input); + ~XPathLexer(); + + virtual std::string getGrammarFileName() const override; + virtual const std::vector& getRuleNames() const override; + + virtual const std::vector& getChannelNames() const override; + virtual const std::vector& getModeNames() const override; + virtual const std::vector& getTokenNames() const override; // deprecated, use vocabulary instead + virtual antlr4::dfa::Vocabulary& getVocabulary() const override; + + virtual const std::vector getSerializedATN() const override; + virtual const antlr4::atn::ATN& getATN() const override; + + virtual void action(antlr4::RuleContext *context, size_t ruleIndex, size_t actionIndex) override; +private: + static std::vector _decisionToDFA; + static antlr4::atn::PredictionContextCache _sharedContextCache; + static std::vector _ruleNames; + static std::vector _tokenNames; + static std::vector _channelNames; + static std::vector _modeNames; + + static std::vector _literalNames; + static std::vector _symbolicNames; + static antlr4::dfa::Vocabulary _vocabulary; + static antlr4::atn::ATN _atn; + static std::vector _serializedATN; + + + // Individual action functions triggered by action() above. + void IDAction(antlr4::RuleContext *context, size_t actionIndex); + + // Individual semantic predicate functions triggered by sempred() above. + + struct Initializer { + Initializer(); + }; + static Initializer _init; +}; + diff --git a/lib/antlr4/include/tree/xpath/XPathLexerErrorListener.h b/lib/antlr4/include/tree/xpath/XPathLexerErrorListener.h new file mode 100644 index 0000000..c0c3eaa --- /dev/null +++ b/lib/antlr4/include/tree/xpath/XPathLexerErrorListener.h @@ -0,0 +1,22 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "BaseErrorListener.h" + +namespace antlr4 { +namespace tree { +namespace xpath { + + class ANTLR4CPP_PUBLIC XPathLexerErrorListener : public BaseErrorListener { + public: + virtual void syntaxError(Recognizer *recognizer, Token *offendingSymbol, size_t line, + size_t charPositionInLine, const std::string &msg, std::exception_ptr e) override; + }; + +} // namespace xpath +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/tree/xpath/XPathRuleAnywhereElement.h b/lib/antlr4/include/tree/xpath/XPathRuleAnywhereElement.h new file mode 100644 index 0000000..2ceb75c --- /dev/null +++ b/lib/antlr4/include/tree/xpath/XPathRuleAnywhereElement.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "XPathElement.h" + +namespace antlr4 { +namespace tree { +namespace xpath { + + /// Either {@code ID} at start of path or {@code ...//ID} in middle of path. + class ANTLR4CPP_PUBLIC XPathRuleAnywhereElement : public XPathElement { + public: + XPathRuleAnywhereElement(const std::string &ruleName, int ruleIndex); + + virtual std::vector evaluate(ParseTree *t) override; + + protected: + int _ruleIndex = 0; + }; + +} // namespace xpath +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/tree/xpath/XPathRuleElement.h b/lib/antlr4/include/tree/xpath/XPathRuleElement.h new file mode 100644 index 0000000..b57276f --- /dev/null +++ b/lib/antlr4/include/tree/xpath/XPathRuleElement.h @@ -0,0 +1,26 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "XPathElement.h" + +namespace antlr4 { +namespace tree { +namespace xpath { + + class ANTLR4CPP_PUBLIC XPathRuleElement : public XPathElement { + public: + XPathRuleElement(const std::string &ruleName, size_t ruleIndex); + + virtual std::vector evaluate(ParseTree *t) override; + + protected: + size_t _ruleIndex = 0; + }; + +} // namespace xpath +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/tree/xpath/XPathTokenAnywhereElement.h b/lib/antlr4/include/tree/xpath/XPathTokenAnywhereElement.h new file mode 100644 index 0000000..2045d91 --- /dev/null +++ b/lib/antlr4/include/tree/xpath/XPathTokenAnywhereElement.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "XPathElement.h" + +namespace antlr4 { +namespace tree { +namespace xpath { + + class ANTLR4CPP_PUBLIC XPathTokenAnywhereElement : public XPathElement { + protected: + int tokenType = 0; + public: + XPathTokenAnywhereElement(const std::string &tokenName, int tokenType); + + virtual std::vector evaluate(ParseTree *t) override; + }; + +} // namespace xpath +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/tree/xpath/XPathTokenElement.h b/lib/antlr4/include/tree/xpath/XPathTokenElement.h new file mode 100644 index 0000000..7221530 --- /dev/null +++ b/lib/antlr4/include/tree/xpath/XPathTokenElement.h @@ -0,0 +1,26 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "XPathElement.h" + +namespace antlr4 { +namespace tree { +namespace xpath { + + class ANTLR4CPP_PUBLIC XPathTokenElement : public XPathElement { + public: + XPathTokenElement(const std::string &tokenName, size_t tokenType); + + virtual std::vector evaluate(ParseTree *t) override; + + protected: + size_t _tokenType = 0; + }; + +} // namespace xpath +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/tree/xpath/XPathWildcardAnywhereElement.h b/lib/antlr4/include/tree/xpath/XPathWildcardAnywhereElement.h new file mode 100644 index 0000000..dc5d1e5 --- /dev/null +++ b/lib/antlr4/include/tree/xpath/XPathWildcardAnywhereElement.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "XPathElement.h" + +namespace antlr4 { +namespace tree { +namespace xpath { + + class ANTLR4CPP_PUBLIC XPathWildcardAnywhereElement : public XPathElement { + public: + XPathWildcardAnywhereElement(); + + virtual std::vector evaluate(ParseTree *t) override; + }; + +} // namespace xpath +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/include/tree/xpath/XPathWildcardElement.h b/lib/antlr4/include/tree/xpath/XPathWildcardElement.h new file mode 100644 index 0000000..accb461 --- /dev/null +++ b/lib/antlr4/include/tree/xpath/XPathWildcardElement.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "XPathElement.h" + +namespace antlr4 { +namespace tree { +namespace xpath { + + class ANTLR4CPP_PUBLIC XPathWildcardElement : public XPathElement { + public: + XPathWildcardElement(); + + virtual std::vector evaluate(ParseTree *t) override; + }; + +} // namespace xpath +} // namespace tree +} // namespace antlr4 diff --git a/lib/antlr4/lib/libantlr4-runtime.so.4.8 b/lib/antlr4/lib/libantlr4-runtime.so.4.8 new file mode 100755 index 0000000..8b92f6e Binary files /dev/null and b/lib/antlr4/lib/libantlr4-runtime.so.4.8 differ diff --git a/lib/generated/bf.tokens b/lib/generated/bf.tokens new file mode 100644 index 0000000..bc770ec --- /dev/null +++ b/lib/generated/bf.tokens @@ -0,0 +1,14 @@ +COMMENT=1 +INPUT=2 +OUTPUT=3 +DEC=4 +INC=5 +LEFT=6 +RIGHT=7 +'\\\\.*?\\\\'=1 +'?'=2 +'.'=3 +'-'=4 +'+'=5 +'>'=6 +'<'=7 diff --git a/lib/generated/bfBaseListener.cpp b/lib/generated/bfBaseListener.cpp new file mode 100644 index 0000000..eae987c --- /dev/null +++ b/lib/generated/bfBaseListener.cpp @@ -0,0 +1,7 @@ + +// Generated from /home/atreya/Documents/Projects/Miscellaneous/bfc/grammar/bf.g4 by ANTLR 4.8 + + +#include "bfBaseListener.h" + + diff --git a/lib/generated/bfBaseListener.h b/lib/generated/bfBaseListener.h new file mode 100644 index 0000000..9acf9d4 --- /dev/null +++ b/lib/generated/bfBaseListener.h @@ -0,0 +1,32 @@ + +// Generated from /home/atreya/Documents/Projects/Miscellaneous/bfc/grammar/bf.g4 by ANTLR 4.8 + +#pragma once + + +#include "antlr4-runtime.h" +#include "bfListener.h" + + +/** + * This class provides an empty implementation of bfListener, + * which can be extended to create a listener which only needs to handle a subset + * of the available methods. + */ +class bfBaseListener : public bfListener { +public: + + virtual void enterProgram(bfParser::ProgramContext * /*ctx*/) override { } + virtual void exitProgram(bfParser::ProgramContext * /*ctx*/) override { } + + virtual void enterStatements(bfParser::StatementsContext * /*ctx*/) override { } + virtual void exitStatements(bfParser::StatementsContext * /*ctx*/) override { } + + + virtual void enterEveryRule(antlr4::ParserRuleContext * /*ctx*/) override { } + virtual void exitEveryRule(antlr4::ParserRuleContext * /*ctx*/) override { } + virtual void visitTerminal(antlr4::tree::TerminalNode * /*node*/) override { } + virtual void visitErrorNode(antlr4::tree::ErrorNode * /*node*/) override { } + +}; + diff --git a/lib/generated/bfBaseVisitor.cpp b/lib/generated/bfBaseVisitor.cpp new file mode 100644 index 0000000..361f7f0 --- /dev/null +++ b/lib/generated/bfBaseVisitor.cpp @@ -0,0 +1,7 @@ + +// Generated from /home/atreya/Documents/Projects/Miscellaneous/bfc/grammar/bf.g4 by ANTLR 4.8 + + +#include "bfBaseVisitor.h" + + diff --git a/lib/generated/bfBaseVisitor.h b/lib/generated/bfBaseVisitor.h new file mode 100644 index 0000000..ef1b6a1 --- /dev/null +++ b/lib/generated/bfBaseVisitor.h @@ -0,0 +1,28 @@ + +// Generated from /home/atreya/Documents/Projects/Miscellaneous/bfc/grammar/bf.g4 by ANTLR 4.8 + +#pragma once + + +#include "antlr4-runtime.h" +#include "bfVisitor.h" + + +/** + * This class provides an empty implementation of bfVisitor, which can be + * extended to create a visitor which only needs to handle a subset of the available methods. + */ +class bfBaseVisitor : public bfVisitor { +public: + + virtual antlrcpp::Any visitProgram(bfParser::ProgramContext *ctx) override { + return visitChildren(ctx); + } + + virtual antlrcpp::Any visitStatements(bfParser::StatementsContext *ctx) override { + return visitChildren(ctx); + } + + +}; + diff --git a/lib/generated/bfLexer.cpp b/lib/generated/bfLexer.cpp new file mode 100644 index 0000000..260dcb0 --- /dev/null +++ b/lib/generated/bfLexer.cpp @@ -0,0 +1,139 @@ + +// Generated from /home/atreya/Documents/Projects/Miscellaneous/bfc/grammar/bf.g4 by ANTLR 4.8 + + +#include "bfLexer.h" + + +using namespace antlr4; + + +bfLexer::bfLexer(CharStream *input) : Lexer(input) { + _interpreter = new atn::LexerATNSimulator(this, _atn, _decisionToDFA, _sharedContextCache); +} + +bfLexer::~bfLexer() { + delete _interpreter; +} + +std::string bfLexer::getGrammarFileName() const { + return "bf.g4"; +} + +const std::vector& bfLexer::getRuleNames() const { + return _ruleNames; +} + +const std::vector& bfLexer::getChannelNames() const { + return _channelNames; +} + +const std::vector& bfLexer::getModeNames() const { + return _modeNames; +} + +const std::vector& bfLexer::getTokenNames() const { + return _tokenNames; +} + +dfa::Vocabulary& bfLexer::getVocabulary() const { + return _vocabulary; +} + +const std::vector bfLexer::getSerializedATN() const { + return _serializedATN; +} + +const atn::ATN& bfLexer::getATN() const { + return _atn; +} + + + + +// Static vars and initialization. +std::vector bfLexer::_decisionToDFA; +atn::PredictionContextCache bfLexer::_sharedContextCache; + +// We own the ATN which in turn owns the ATN states. +atn::ATN bfLexer::_atn; +std::vector bfLexer::_serializedATN; + +std::vector bfLexer::_ruleNames = { + u8"COMMENT", u8"INPUT", u8"OUTPUT", u8"DEC", u8"INC", u8"LEFT", u8"RIGHT" +}; + +std::vector bfLexer::_channelNames = { + "DEFAULT_TOKEN_CHANNEL", "HIDDEN" +}; + +std::vector bfLexer::_modeNames = { + u8"DEFAULT_MODE" +}; + +std::vector bfLexer::_literalNames = { + "", u8"'\\\\.*?\\\\'", u8"'?'", u8"'.'", u8"'-'", u8"'+'", u8"'>'", u8"'<'" +}; + +std::vector bfLexer::_symbolicNames = { + "", u8"COMMENT", u8"INPUT", u8"OUTPUT", u8"DEC", u8"INC", u8"LEFT", u8"RIGHT" +}; + +dfa::Vocabulary bfLexer::_vocabulary(_literalNames, _symbolicNames); + +std::vector bfLexer::_tokenNames; + +bfLexer::Initializer::Initializer() { + // This code could be in a static initializer lambda, but VS doesn't allow access to private class members from there. + for (size_t i = 0; i < _symbolicNames.size(); ++i) { + std::string name = _vocabulary.getLiteralName(i); + if (name.empty()) { + name = _vocabulary.getSymbolicName(i); + } + + if (name.empty()) { + _tokenNames.push_back(""); + } else { + _tokenNames.push_back(name); + } + } + + _serializedATN = { + 0x3, 0x608b, 0xa72a, 0x8133, 0xb9ed, 0x417c, 0x3be7, 0x7786, 0x5964, + 0x2, 0x9, 0x27, 0x8, 0x1, 0x4, 0x2, 0x9, 0x2, 0x4, 0x3, 0x9, 0x3, 0x4, + 0x4, 0x9, 0x4, 0x4, 0x5, 0x9, 0x5, 0x4, 0x6, 0x9, 0x6, 0x4, 0x7, 0x9, + 0x7, 0x4, 0x8, 0x9, 0x8, 0x3, 0x2, 0x3, 0x2, 0x3, 0x2, 0x3, 0x2, 0x3, + 0x2, 0x3, 0x2, 0x3, 0x2, 0x3, 0x2, 0x3, 0x2, 0x3, 0x2, 0x3, 0x3, 0x3, + 0x3, 0x3, 0x4, 0x3, 0x4, 0x3, 0x5, 0x3, 0x5, 0x3, 0x6, 0x3, 0x6, 0x3, + 0x7, 0x3, 0x7, 0x3, 0x8, 0x3, 0x8, 0x2, 0x2, 0x9, 0x3, 0x3, 0x5, 0x4, + 0x7, 0x5, 0x9, 0x6, 0xb, 0x7, 0xd, 0x8, 0xf, 0x9, 0x3, 0x2, 0x2, 0x2, + 0x26, 0x2, 0x3, 0x3, 0x2, 0x2, 0x2, 0x2, 0x5, 0x3, 0x2, 0x2, 0x2, 0x2, + 0x7, 0x3, 0x2, 0x2, 0x2, 0x2, 0x9, 0x3, 0x2, 0x2, 0x2, 0x2, 0xb, 0x3, + 0x2, 0x2, 0x2, 0x2, 0xd, 0x3, 0x2, 0x2, 0x2, 0x2, 0xf, 0x3, 0x2, 0x2, + 0x2, 0x3, 0x11, 0x3, 0x2, 0x2, 0x2, 0x5, 0x1b, 0x3, 0x2, 0x2, 0x2, 0x7, + 0x1d, 0x3, 0x2, 0x2, 0x2, 0x9, 0x1f, 0x3, 0x2, 0x2, 0x2, 0xb, 0x21, + 0x3, 0x2, 0x2, 0x2, 0xd, 0x23, 0x3, 0x2, 0x2, 0x2, 0xf, 0x25, 0x3, 0x2, + 0x2, 0x2, 0x11, 0x12, 0x7, 0x5e, 0x2, 0x2, 0x12, 0x13, 0x7, 0x5e, 0x2, + 0x2, 0x13, 0x14, 0x7, 0x30, 0x2, 0x2, 0x14, 0x15, 0x7, 0x2c, 0x2, 0x2, + 0x15, 0x16, 0x7, 0x41, 0x2, 0x2, 0x16, 0x17, 0x7, 0x5e, 0x2, 0x2, 0x17, + 0x18, 0x7, 0x5e, 0x2, 0x2, 0x18, 0x19, 0x3, 0x2, 0x2, 0x2, 0x19, 0x1a, + 0x8, 0x2, 0x2, 0x2, 0x1a, 0x4, 0x3, 0x2, 0x2, 0x2, 0x1b, 0x1c, 0x7, + 0x41, 0x2, 0x2, 0x1c, 0x6, 0x3, 0x2, 0x2, 0x2, 0x1d, 0x1e, 0x7, 0x30, + 0x2, 0x2, 0x1e, 0x8, 0x3, 0x2, 0x2, 0x2, 0x1f, 0x20, 0x7, 0x2f, 0x2, + 0x2, 0x20, 0xa, 0x3, 0x2, 0x2, 0x2, 0x21, 0x22, 0x7, 0x2d, 0x2, 0x2, + 0x22, 0xc, 0x3, 0x2, 0x2, 0x2, 0x23, 0x24, 0x7, 0x40, 0x2, 0x2, 0x24, + 0xe, 0x3, 0x2, 0x2, 0x2, 0x25, 0x26, 0x7, 0x3e, 0x2, 0x2, 0x26, 0x10, + 0x3, 0x2, 0x2, 0x2, 0x3, 0x2, 0x3, 0x8, 0x2, 0x2, + }; + + atn::ATNDeserializer deserializer; + _atn = deserializer.deserialize(_serializedATN); + + size_t count = _atn.getNumberOfDecisions(); + _decisionToDFA.reserve(count); + for (size_t i = 0; i < count; i++) { + _decisionToDFA.emplace_back(_atn.getDecisionState(i), i); + } +} + +bfLexer::Initializer bfLexer::_init; diff --git a/lib/generated/bfLexer.h b/lib/generated/bfLexer.h new file mode 100644 index 0000000..dd4f275 --- /dev/null +++ b/lib/generated/bfLexer.h @@ -0,0 +1,56 @@ + +// Generated from /home/atreya/Documents/Projects/Miscellaneous/bfc/grammar/bf.g4 by ANTLR 4.8 + +#pragma once + + +#include "antlr4-runtime.h" + + + + +class bfLexer : public antlr4::Lexer { +public: + enum { + COMMENT = 1, INPUT = 2, OUTPUT = 3, DEC = 4, INC = 5, LEFT = 6, RIGHT = 7 + }; + + bfLexer(antlr4::CharStream *input); + ~bfLexer(); + + virtual std::string getGrammarFileName() const override; + virtual const std::vector& getRuleNames() const override; + + virtual const std::vector& getChannelNames() const override; + virtual const std::vector& getModeNames() const override; + virtual const std::vector& getTokenNames() const override; // deprecated, use vocabulary instead + virtual antlr4::dfa::Vocabulary& getVocabulary() const override; + + virtual const std::vector getSerializedATN() const override; + virtual const antlr4::atn::ATN& getATN() const override; + +private: + static std::vector _decisionToDFA; + static antlr4::atn::PredictionContextCache _sharedContextCache; + static std::vector _ruleNames; + static std::vector _tokenNames; + static std::vector _channelNames; + static std::vector _modeNames; + + static std::vector _literalNames; + static std::vector _symbolicNames; + static antlr4::dfa::Vocabulary _vocabulary; + static antlr4::atn::ATN _atn; + static std::vector _serializedATN; + + + // Individual action functions triggered by action() above. + + // Individual semantic predicate functions triggered by sempred() above. + + struct Initializer { + Initializer(); + }; + static Initializer _init; +}; + diff --git a/lib/generated/bfLexer.tokens b/lib/generated/bfLexer.tokens new file mode 100644 index 0000000..bc770ec --- /dev/null +++ b/lib/generated/bfLexer.tokens @@ -0,0 +1,14 @@ +COMMENT=1 +INPUT=2 +OUTPUT=3 +DEC=4 +INC=5 +LEFT=6 +RIGHT=7 +'\\\\.*?\\\\'=1 +'?'=2 +'.'=3 +'-'=4 +'+'=5 +'>'=6 +'<'=7 diff --git a/lib/generated/bfListener.cpp b/lib/generated/bfListener.cpp new file mode 100644 index 0000000..f0f4686 --- /dev/null +++ b/lib/generated/bfListener.cpp @@ -0,0 +1,7 @@ + +// Generated from /home/atreya/Documents/Projects/Miscellaneous/bfc/grammar/bf.g4 by ANTLR 4.8 + + +#include "bfListener.h" + + diff --git a/lib/generated/bfListener.h b/lib/generated/bfListener.h new file mode 100644 index 0000000..f13ad17 --- /dev/null +++ b/lib/generated/bfListener.h @@ -0,0 +1,25 @@ + +// Generated from /home/atreya/Documents/Projects/Miscellaneous/bfc/grammar/bf.g4 by ANTLR 4.8 + +#pragma once + + +#include "antlr4-runtime.h" +#include "bfParser.h" + + +/** + * This interface defines an abstract listener for a parse tree produced by bfParser. + */ +class bfListener : public antlr4::tree::ParseTreeListener { +public: + + virtual void enterProgram(bfParser::ProgramContext *ctx) = 0; + virtual void exitProgram(bfParser::ProgramContext *ctx) = 0; + + virtual void enterStatements(bfParser::StatementsContext *ctx) = 0; + virtual void exitStatements(bfParser::StatementsContext *ctx) = 0; + + +}; + diff --git a/lib/generated/bfParser.cpp b/lib/generated/bfParser.cpp new file mode 100644 index 0000000..9c806ae --- /dev/null +++ b/lib/generated/bfParser.cpp @@ -0,0 +1,231 @@ + +// Generated from /home/atreya/Documents/Projects/Miscellaneous/bfc/grammar/bf.g4 by ANTLR 4.8 + + +#include "bfListener.h" +#include "bfVisitor.h" + +#include "bfParser.h" + + +using namespace antlrcpp; +using namespace antlr4; + +bfParser::bfParser(TokenStream *input) : Parser(input) { + _interpreter = new atn::ParserATNSimulator(this, _atn, _decisionToDFA, _sharedContextCache); +} + +bfParser::~bfParser() { + delete _interpreter; +} + +std::string bfParser::getGrammarFileName() const { + return "bf.g4"; +} + +const std::vector& bfParser::getRuleNames() const { + return _ruleNames; +} + +dfa::Vocabulary& bfParser::getVocabulary() const { + return _vocabulary; +} + + +//----------------- ProgramContext ------------------------------------------------------------------ + +bfParser::ProgramContext::ProgramContext(ParserRuleContext *parent, size_t invokingState) + : ParserRuleContext(parent, invokingState) { +} + +bfParser::StatementsContext* bfParser::ProgramContext::statements() { + return getRuleContext(0); +} + + +size_t bfParser::ProgramContext::getRuleIndex() const { + return bfParser::RuleProgram; +} + +void bfParser::ProgramContext::enterRule(tree::ParseTreeListener *listener) { + auto parserListener = dynamic_cast(listener); + if (parserListener != nullptr) + parserListener->enterProgram(this); +} + +void bfParser::ProgramContext::exitRule(tree::ParseTreeListener *listener) { + auto parserListener = dynamic_cast(listener); + if (parserListener != nullptr) + parserListener->exitProgram(this); +} + + +antlrcpp::Any bfParser::ProgramContext::accept(tree::ParseTreeVisitor *visitor) { + if (auto parserVisitor = dynamic_cast(visitor)) + return parserVisitor->visitProgram(this); + else + return visitor->visitChildren(this); +} + +bfParser::ProgramContext* bfParser::program() { + ProgramContext *_localctx = _tracker.createInstance(_ctx, getState()); + enterRule(_localctx, 0, bfParser::RuleProgram); + + auto onExit = finally([=] { + exitRule(); + }); + try { + enterOuterAlt(_localctx, 1); + setState(4); + statements(); + + } + catch (RecognitionException &e) { + _errHandler->reportError(this, e); + _localctx->exception = std::current_exception(); + _errHandler->recover(this, _localctx->exception); + } + + return _localctx; +} + +//----------------- StatementsContext ------------------------------------------------------------------ + +bfParser::StatementsContext::StatementsContext(ParserRuleContext *parent, size_t invokingState) + : ParserRuleContext(parent, invokingState) { +} + +tree::TerminalNode* bfParser::StatementsContext::INC() { + return getToken(bfParser::INC, 0); +} + +tree::TerminalNode* bfParser::StatementsContext::DEC() { + return getToken(bfParser::DEC, 0); +} + +tree::TerminalNode* bfParser::StatementsContext::INPUT() { + return getToken(bfParser::INPUT, 0); +} + +tree::TerminalNode* bfParser::StatementsContext::OUTPUT() { + return getToken(bfParser::OUTPUT, 0); +} + + +size_t bfParser::StatementsContext::getRuleIndex() const { + return bfParser::RuleStatements; +} + +void bfParser::StatementsContext::enterRule(tree::ParseTreeListener *listener) { + auto parserListener = dynamic_cast(listener); + if (parserListener != nullptr) + parserListener->enterStatements(this); +} + +void bfParser::StatementsContext::exitRule(tree::ParseTreeListener *listener) { + auto parserListener = dynamic_cast(listener); + if (parserListener != nullptr) + parserListener->exitStatements(this); +} + + +antlrcpp::Any bfParser::StatementsContext::accept(tree::ParseTreeVisitor *visitor) { + if (auto parserVisitor = dynamic_cast(visitor)) + return parserVisitor->visitStatements(this); + else + return visitor->visitChildren(this); +} + +bfParser::StatementsContext* bfParser::statements() { + StatementsContext *_localctx = _tracker.createInstance(_ctx, getState()); + enterRule(_localctx, 2, bfParser::RuleStatements); + size_t _la = 0; + + auto onExit = finally([=] { + exitRule(); + }); + try { + enterOuterAlt(_localctx, 1); + setState(6); + _la = _input->LA(1); + if (!((((_la & ~ 0x3fULL) == 0) && + ((1ULL << _la) & ((1ULL << bfParser::INPUT) + | (1ULL << bfParser::OUTPUT) + | (1ULL << bfParser::DEC) + | (1ULL << bfParser::INC))) != 0))) { + _errHandler->recoverInline(this); + } + else { + _errHandler->reportMatch(this); + consume(); + } + + } + catch (RecognitionException &e) { + _errHandler->reportError(this, e); + _localctx->exception = std::current_exception(); + _errHandler->recover(this, _localctx->exception); + } + + return _localctx; +} + +// Static vars and initialization. +std::vector bfParser::_decisionToDFA; +atn::PredictionContextCache bfParser::_sharedContextCache; + +// We own the ATN which in turn owns the ATN states. +atn::ATN bfParser::_atn; +std::vector bfParser::_serializedATN; + +std::vector bfParser::_ruleNames = { + "program", "statements" +}; + +std::vector bfParser::_literalNames = { + "", "'\\\\.*?\\\\'", "'?'", "'.'", "'-'", "'+'", "'>'", "'<'" +}; + +std::vector bfParser::_symbolicNames = { + "", "COMMENT", "INPUT", "OUTPUT", "DEC", "INC", "LEFT", "RIGHT" +}; + +dfa::Vocabulary bfParser::_vocabulary(_literalNames, _symbolicNames); + +std::vector bfParser::_tokenNames; + +bfParser::Initializer::Initializer() { + for (size_t i = 0; i < _symbolicNames.size(); ++i) { + std::string name = _vocabulary.getLiteralName(i); + if (name.empty()) { + name = _vocabulary.getSymbolicName(i); + } + + if (name.empty()) { + _tokenNames.push_back(""); + } else { + _tokenNames.push_back(name); + } + } + + _serializedATN = { + 0x3, 0x608b, 0xa72a, 0x8133, 0xb9ed, 0x417c, 0x3be7, 0x7786, 0x5964, + 0x3, 0x9, 0xb, 0x4, 0x2, 0x9, 0x2, 0x4, 0x3, 0x9, 0x3, 0x3, 0x2, 0x3, + 0x2, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x2, 0x2, 0x4, 0x2, 0x4, 0x2, 0x3, + 0x3, 0x2, 0x4, 0x7, 0x2, 0x8, 0x2, 0x6, 0x3, 0x2, 0x2, 0x2, 0x4, 0x8, + 0x3, 0x2, 0x2, 0x2, 0x6, 0x7, 0x5, 0x4, 0x3, 0x2, 0x7, 0x3, 0x3, 0x2, + 0x2, 0x2, 0x8, 0x9, 0x9, 0x2, 0x2, 0x2, 0x9, 0x5, 0x3, 0x2, 0x2, 0x2, + 0x2, + }; + + atn::ATNDeserializer deserializer; + _atn = deserializer.deserialize(_serializedATN); + + size_t count = _atn.getNumberOfDecisions(); + _decisionToDFA.reserve(count); + for (size_t i = 0; i < count; i++) { + _decisionToDFA.emplace_back(_atn.getDecisionState(i), i); + } +} + +bfParser::Initializer bfParser::_init; diff --git a/lib/generated/bfParser.h b/lib/generated/bfParser.h new file mode 100644 index 0000000..7c908c4 --- /dev/null +++ b/lib/generated/bfParser.h @@ -0,0 +1,87 @@ + +// Generated from /home/atreya/Documents/Projects/Miscellaneous/bfc/grammar/bf.g4 by ANTLR 4.8 + +#pragma once + + +#include "antlr4-runtime.h" + + + + +class bfParser : public antlr4::Parser { +public: + enum { + COMMENT = 1, INPUT = 2, OUTPUT = 3, DEC = 4, INC = 5, LEFT = 6, RIGHT = 7 + }; + + enum { + RuleProgram = 0, RuleStatements = 1 + }; + + bfParser(antlr4::TokenStream *input); + ~bfParser(); + + virtual std::string getGrammarFileName() const override; + virtual const antlr4::atn::ATN& getATN() const override { return _atn; }; + virtual const std::vector& getTokenNames() const override { return _tokenNames; }; // deprecated: use vocabulary instead. + virtual const std::vector& getRuleNames() const override; + virtual antlr4::dfa::Vocabulary& getVocabulary() const override; + + + class ProgramContext; + class StatementsContext; + + class ProgramContext : public antlr4::ParserRuleContext { + public: + ProgramContext(antlr4::ParserRuleContext *parent, size_t invokingState); + virtual size_t getRuleIndex() const override; + StatementsContext *statements(); + + virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override; + virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override; + + virtual antlrcpp::Any accept(antlr4::tree::ParseTreeVisitor *visitor) override; + + }; + + ProgramContext* program(); + + class StatementsContext : public antlr4::ParserRuleContext { + public: + StatementsContext(antlr4::ParserRuleContext *parent, size_t invokingState); + virtual size_t getRuleIndex() const override; + antlr4::tree::TerminalNode *INC(); + antlr4::tree::TerminalNode *DEC(); + antlr4::tree::TerminalNode *INPUT(); + antlr4::tree::TerminalNode *OUTPUT(); + + virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override; + virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override; + + virtual antlrcpp::Any accept(antlr4::tree::ParseTreeVisitor *visitor) override; + + }; + + StatementsContext* statements(); + + +private: + static std::vector _decisionToDFA; + static antlr4::atn::PredictionContextCache _sharedContextCache; + static std::vector _ruleNames; + static std::vector _tokenNames; + + static std::vector _literalNames; + static std::vector _symbolicNames; + static antlr4::dfa::Vocabulary _vocabulary; + static antlr4::atn::ATN _atn; + static std::vector _serializedATN; + + + struct Initializer { + Initializer(); + }; + static Initializer _init; +}; + diff --git a/lib/generated/bfVisitor.cpp b/lib/generated/bfVisitor.cpp new file mode 100644 index 0000000..54cefd9 --- /dev/null +++ b/lib/generated/bfVisitor.cpp @@ -0,0 +1,7 @@ + +// Generated from /home/atreya/Documents/Projects/Miscellaneous/bfc/grammar/bf.g4 by ANTLR 4.8 + + +#include "bfVisitor.h" + + diff --git a/lib/generated/bfVisitor.h b/lib/generated/bfVisitor.h new file mode 100644 index 0000000..eada3a7 --- /dev/null +++ b/lib/generated/bfVisitor.h @@ -0,0 +1,28 @@ + +// Generated from /home/atreya/Documents/Projects/Miscellaneous/bfc/grammar/bf.g4 by ANTLR 4.8 + +#pragma once + + +#include "antlr4-runtime.h" +#include "bfParser.h" + + + +/** + * This class defines an abstract visitor for a parse tree + * produced by bfParser. + */ +class bfVisitor : public antlr4::tree::AbstractParseTreeVisitor { +public: + + /** + * Visit parse trees produced by bfParser. + */ + virtual antlrcpp::Any visitProgram(bfParser::ProgramContext *context) = 0; + + virtual antlrcpp::Any visitStatements(bfParser::StatementsContext *context) = 0; + + +}; + diff --git a/sample.txt b/sample.txt new file mode 100644 index 0000000..4745939 --- /dev/null +++ b/sample.txt @@ -0,0 +1 @@ ++++--- diff --git a/src/main.cpp b/src/main.cpp index ac361c8..d4d8c4f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,5 +1,33 @@ -#include +#include +#include +#include "main.hpp" +#include "bfLexer.h" +#include "bfParser.h" +#include "bfBaseListener.h" +// #include "bfBaseVisitor.h" -int main(){ - std::cout<<"Hello World"<getText(); + } +}; + +int main(int argc, const char* argv[]) +{ + // std::cout<<"Hello World "<