diff --git a/.gitignore b/.gitignore
index 307433c..4a785b7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
+.antlr/
+build/
+
# Prerequisites
*.d
@@ -31,8 +34,6 @@
*.out
*.app
-build/
-
CMakeLists.txt.user
CMakeCache.txt
CMakeFiles
diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json
new file mode 100644
index 0000000..612616b
--- /dev/null
+++ b/.vscode/c_cpp_properties.json
@@ -0,0 +1,18 @@
+{
+ "configurations": [
+ {
+ "name": "Linux",
+ "includePath": [
+ "${workspaceFolder}/include",
+ "${workspaceFolder}/**"
+ ],
+ "defines": [],
+ "compilerPath": "/usr/lib64/ccache/clang",
+ "cStandard": "c11",
+ "cppStandard": "c++14",
+ "intelliSenseMode": "clang-x64",
+ "configurationProvider": "ms-vscode.cmake-tools"
+ }
+ ],
+ "version": 4
+}
\ No newline at end of file
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 0db5873..4544474 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,3 +1,10 @@
{
- "cmake.configureOnOpen": true
+ "cmake.configureOnOpen": true,
+ "antlr4.generation": {
+ "mode": "external",
+ "language": "Cpp",
+ "listeners": true,
+ "visitors": true,
+ "outputDir": "../lib/generated"
+ }
}
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 057fe38..b53171f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -9,11 +9,21 @@ set(CMAKE_CXX_STANDARD_REQUIRED True)
set(SOURCES
src/main.cpp
+ lib/generated/bfLexer.cpp
+ lib/generated/bfParser.cpp
+ lib/generated/bfBaseVisitor.cpp
+ lib/generated/bfVisitor.cpp
+ lib/generated/bfBaseListener.cpp
+ lib/generated/bfListener.cpp
)
-
add_executable(main.out ${SOURCES})
+target_link_libraries(main.out ${PROJECT_SOURCE_DIR}/lib/antlr4/lib/libantlr4-runtime.a)
+
target_include_directories(main.out
PRIVATE
${PROJECT_SOURCE_DIR}/include
+ ${PROJECT_SOURCE_DIR}/lib/antlr4/include
+ ${PROJECT_SOURCE_DIR}/lib/generated
+
)
diff --git a/grammar/bf.g4 b/grammar/bf.g4
new file mode 100644
index 0000000..fc91c00
--- /dev/null
+++ b/grammar/bf.g4
@@ -0,0 +1,13 @@
+grammar bf;
+
+program: statements;
+
+statements: INC | DEC | INPUT|OUTPUT;
+
+COMMENT: '\\\\.*?\\\\' -> skip;
+INPUT: '?';
+OUTPUT: '.';
+DEC: '-';
+INC: '+';
+LEFT: '>';
+RIGHT: '<';
diff --git a/include/main.hpp b/include/main.hpp
new file mode 100644
index 0000000..b4243fa
--- /dev/null
+++ b/include/main.hpp
@@ -0,0 +1,3 @@
+#ifndef __MAIN_HPP
+#define FIVE 5
+#endif
\ No newline at end of file
diff --git a/lib/antlr4/include/ANTLRErrorListener.h b/lib/antlr4/include/ANTLRErrorListener.h
new file mode 100644
index 0000000..d6efad1
--- /dev/null
+++ b/lib/antlr4/include/ANTLRErrorListener.h
@@ -0,0 +1,167 @@
+/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
+ * Use of this file is governed by the BSD 3-clause license that
+ * can be found in the LICENSE.txt file in the project root.
+ */
+
+#pragma once
+
+#include "RecognitionException.h"
+
+namespace antlrcpp {
+ class BitSet;
+}
+
+namespace antlr4 {
+
+ /// How to emit recognition errors (an interface in Java).
+ class ANTLR4CPP_PUBLIC ANTLRErrorListener {
+ public:
+ virtual ~ANTLRErrorListener();
+
+ ///
Each full-context prediction which does not result in a syntax error + * will call either {@link #reportContextSensitivity} or + * {@link #reportAmbiguity}.
+ * + *When {@code ambigAlts} is not null, it contains the set of potentially + * viable alternatives identified by the prediction algorithm. When + * {@code ambigAlts} is null, use {@link ATNConfigSet#getAlts} to obtain the + * represented alternatives from the {@code configs} argument.
+ * + *When {@code exact} is {@code true}, all of the potentially + * viable alternatives are truly viable, i.e. this is reporting an exact + * ambiguity. When {@code exact} is {@code false}, at least two of + * the potentially viable alternatives are viable for the current input, but + * the prediction algorithm terminated as soon as it determined that at + * least the minimum potentially viable alternative is truly + * viable.
+ * + *When the {@link PredictionMode#LL_EXACT_AMBIG_DETECTION} prediction + * mode is used, the parser is required to identify exact ambiguities so + * {@code exact} will always be {@code true}.
+ * + *This method is not used by lexers.
+ * + * @param recognizer the parser instance + * @param dfa the DFA for the current decision + * @param startIndex the input index where the decision started + * @param stopIndex the input input where the ambiguity was identified + * @param exact {@code true} if the ambiguity is exactly known, otherwise + * {@code false}. This is always {@code true} when + * {@link PredictionMode#LL_EXACT_AMBIG_DETECTION} is used. + * @param ambigAlts the potentially ambiguous alternatives, or {@code null} + * to indicate that the potentially ambiguous alternatives are the complete + * set of represented alternatives in {@code configs} + * @param configs the ATN configuration set where the ambiguity was + * identified + */ + virtual void reportAmbiguity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, bool exact, + const antlrcpp::BitSet &ambigAlts, atn::ATNConfigSet *configs) = 0; + + /** + * This method is called when an SLL conflict occurs and the parser is about + * to use the full context information to make an LL decision. + * + *If one or more configurations in {@code configs} contains a semantic + * predicate, the predicates are evaluated before this method is called. The + * subset of alternatives which are still viable after predicates are + * evaluated is reported in {@code conflictingAlts}.
+ * + *This method is not used by lexers.
+ * + * @param recognizer the parser instance + * @param dfa the DFA for the current decision + * @param startIndex the input index where the decision started + * @param stopIndex the input index where the SLL conflict occurred + * @param conflictingAlts The specific conflicting alternatives. If this is + * {@code null}, the conflicting alternatives are all alternatives + * represented in {@code configs}. At the moment, conflictingAlts is non-null + * (for the reference implementation, but Sam's optimized version can see this + * as null). + * @param configs the ATN configuration set where the SLL conflict was + * detected + */ + virtual void reportAttemptingFullContext(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + const antlrcpp::BitSet &conflictingAlts, atn::ATNConfigSet *configs) = 0; + + /** + * This method is called by the parser when a full-context prediction has a + * unique result. + * + *Each full-context prediction which does not result in a syntax error + * will call either {@link #reportContextSensitivity} or + * {@link #reportAmbiguity}.
+ * + *For prediction implementations that only evaluate full-context + * predictions when an SLL conflict is found (including the default + * {@link ParserATNSimulator} implementation), this method reports cases + * where SLL conflicts were resolved to unique full-context predictions, + * i.e. the decision was context-sensitive. This report does not necessarily + * indicate a problem, and it may appear even in completely unambiguous + * grammars.
+ * + *{@code configs} may have more than one represented alternative if the + * full-context prediction algorithm does not evaluate predicates before + * beginning the full-context prediction. In all cases, the final prediction + * is passed as the {@code prediction} argument.
+ * + *Note that the definition of "context sensitivity" in this method + * differs from the concept in {@link DecisionInfo#contextSensitivities}. + * This method reports all instances where an SLL conflict occurred but LL + * parsing produced a unique result, whether or not that unique result + * matches the minimum alternative in the SLL conflicting set.
+ * + *This method is not used by lexers.
+ * + * @param recognizer the parser instance + * @param dfa the DFA for the current decision + * @param startIndex the input index where the decision started + * @param stopIndex the input index where the context sensitivity was + * finally determined + * @param prediction the unambiguous result of the full-context prediction + * @param configs the ATN configuration set where the unambiguous prediction + * was determined + */ + virtual void reportContextSensitivity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + size_t prediction, atn::ATNConfigSet *configs) = 0; + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/ANTLRErrorStrategy.h b/lib/antlr4/include/ANTLRErrorStrategy.h new file mode 100644 index 0000000..a3eecd1 --- /dev/null +++ b/lib/antlr4/include/ANTLRErrorStrategy.h @@ -0,0 +1,121 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Token.h" + +namespace antlr4 { + + ///This method handles the consumption of any tokens - the caller should + * not call {@link Parser#consume} after a successful recovery.
+ * + *Note that the calling code will not report an error if this method + * returns successfully. The error strategy implementation is responsible + * for calling {@link Parser#notifyErrorListeners} as appropriate.
+ * + * @param recognizer the parser instance + * @throws RecognitionException if the error strategy was not able to + * recover from the unexpected input symbol + */ + virtual Token* recoverInline(Parser *recognizer) = 0; + + ///+ * This error strategy is useful in the following scenarios.
+ * + *+ * {@code myparser.setErrorHandler(new BailErrorStrategy());}
+ * + * @see Parser#setErrorHandler(ANTLRErrorStrategy) + */ + class ANTLR4CPP_PUBLIC BailErrorStrategy : public DefaultErrorStrategy { + ///+ * This token stream ignores the value of {@link Token#getChannel}. If your + * parser requires the token stream filter tokens to only those on a particular + * channel, such as {@link Token#DEFAULT_CHANNEL} or + * {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a + * {@link CommonTokenStream}.
+ */ + class ANTLR4CPP_PUBLIC BufferedTokenStream : public TokenStream { + public: + BufferedTokenStream(TokenSource *tokenSource); + BufferedTokenStream(const BufferedTokenStream& other) = delete; + + BufferedTokenStream& operator = (const BufferedTokenStream& other) = delete; + + virtual TokenSource* getTokenSource() const override; + virtual size_t index() override; + virtual ssize_t mark() override; + + virtual void release(ssize_t marker) override; + virtual void reset(); + virtual void seek(size_t index) override; + + virtual size_t size() override; + virtual void consume() override; + + virtual Token* get(size_t i) const override; + + /// Get all tokens from start..stop inclusively. + virtual std::vectorThis field is set to -1 when the stream is first constructed or when + * {@link #setTokenSource} is called, indicating that the first token has + * not yet been fetched from the token source. For additional information, + * see the documentation of {@link IntStream} for a description of + * Initializing Methods.
+ */ + // ml: since -1 requires to make this member signed for just this single aspect we use a member _needSetup instead. + // Use bool isInitialized() to find out if this stream has started reading. + size_t _p; + + /** + * Indicates whether the {@link Token#EOF} token has been fetched from + * {@link #tokenSource} and added to {@link #tokens}. This field improves + * performance for the following cases: + * + *+ * If {@code i} specifies an index at or after the EOF token, the EOF token + * index is returned. This is due to the fact that the EOF token is treated + * as though it were on every channel.
+ */ + virtual ssize_t previousTokenOnChannel(size_t i, size_t channel); + + virtual std::vector+ * These properties share a field to reduce the memory footprint of + * {@link CommonToken}. Tokens created by a {@link CommonTokenFactory} from + * the same source and input stream share a reference to the same + * {@link Pair} containing these values.
+ */ + + std::pair+ * If {@code oldToken} is also a {@link CommonToken} instance, the newly + * constructed token will share a reference to the {@link #text} field and + * the {@link Pair} stored in {@link #source}. Otherwise, {@link #text} will + * be assigned the result of calling {@link #getText}, and {@link #source} + * will be constructed from the result of {@link Token#getTokenSource} and + * {@link Token#getInputStream}.
+ * + * @param oldToken The token to copy. + */ + CommonToken(Token *oldToken); + + virtual size_t getType() const override; + + /** + * Explicitly set the text for this token. If {code text} is not + * {@code null}, then {@link #getText} will return this value rather than + * extracting the text from the input. + * + * @param text The explicit text of the token, or {@code null} if the text + * should be obtained from the input along with the start and stop indexes + * of the token. + */ + virtual void setText(const std::string &text) override; + virtual std::string getText() const override; + + virtual void setLine(size_t line) override; + virtual size_t getLine() const override; + + virtual size_t getCharPositionInLine() const override; + virtual void setCharPositionInLine(size_t charPositionInLine) override; + + virtual size_t getChannel() const override; + virtual void setChannel(size_t channel) override; + + virtual void setType(size_t type) override; + + virtual size_t getStartIndex() const override; + virtual void setStartIndex(size_t start); + + virtual size_t getStopIndex() const override; + virtual void setStopIndex(size_t stop); + + virtual size_t getTokenIndex() const override; + virtual void setTokenIndex(size_t index) override; + + virtual TokenSource *getTokenSource() const override; + virtual CharStream *getInputStream() const override; + + virtual std::string toString() const override; + + virtual std::string toString(Recognizer *r) const; + private: + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/CommonTokenFactory.h b/lib/antlr4/include/CommonTokenFactory.h new file mode 100644 index 0000000..096f93b --- /dev/null +++ b/lib/antlr4/include/CommonTokenFactory.h @@ -0,0 +1,74 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "TokenFactory.h" + +namespace antlr4 { + + /** + * This default implementation of {@link TokenFactory} creates + * {@link CommonToken} objects. + */ + class ANTLR4CPP_PUBLIC CommonTokenFactory : public TokenFactory+ * This token factory does not explicitly copy token text when constructing + * tokens.
+ */ + static const Ref+ * The default value is {@code false} to avoid the performance and memory + * overhead of copying text for every token unless explicitly requested.
+ */ + const bool copyText; + + public: + /** + * Constructs a {@link CommonTokenFactory} with the specified value for + * {@link #copyText}. + * + *+ * When {@code copyText} is {@code false}, the {@link #DEFAULT} instance + * should be used instead of constructing a new instance.
+ * + * @param copyText The value for {@link #copyText}. + */ + CommonTokenFactory(bool copyText); + + /** + * Constructs a {@link CommonTokenFactory} with {@link #copyText} set to + * {@code false}. + * + *+ * The {@link #DEFAULT} instance should be used instead of calling this + * directly.
+ */ + CommonTokenFactory(); + + virtual std::unique_ptr+ * This token stream provides access to all tokens by index or when calling + * methods like {@link #getText}. The channel filtering is only used for code + * accessing tokens via the lookahead methods {@link #LA}, {@link #LT}, and + * {@link #LB}.
+ * + *+ * By default, tokens are placed on the default channel + * ({@link Token#DEFAULT_CHANNEL}), but may be reassigned by using the + * {@code ->channel(HIDDEN)} lexer command, or by using an embedded action to + * call {@link Lexer#setChannel}. + *
+ * + *+ * Note: lexer rules which use the {@code ->skip} lexer command or call + * {@link Lexer#skip} do not produce tokens at all, so input text matched by + * such a rule will not be available as part of the token stream, regardless of + * channel.
+ */ + class ANTLR4CPP_PUBLIC CommonTokenStream : public BufferedTokenStream { + public: + /** + * Constructs a new {@link CommonTokenStream} using the specified token + * source and the default token channel ({@link Token#DEFAULT_CHANNEL}). + * + * @param tokenSource The token source. + */ + CommonTokenStream(TokenSource *tokenSource); + + /** + * Constructs a new {@link CommonTokenStream} using the specified token + * source and filtering tokens to the specified channel. Only tokens whose + * {@link Token#getChannel} matches {@code channel} or have the + * {@link Token#getType} equal to {@link Token#EOF} will be returned by the + * token stream lookahead methods. + * + * @param tokenSource The token source. + * @param channel The channel to use for filtering tokens. + */ + CommonTokenStream(TokenSource *tokenSource, size_t channel); + + virtual Token* LT(ssize_t k) override; + + /// Count EOF just once. + virtual int getNumberOfOnChannelTokens(); + + protected: + /** + * Specifies the channel to use for filtering tokens. + * + *+ * The default value is {@link Token#DEFAULT_CHANNEL}, which matches the + * default channel assigned to tokens created by the lexer.
+ */ + size_t channel; + + virtual ssize_t adjustSeekIndex(size_t i) override; + + virtual Token* LB(size_t k) override; + + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/ConsoleErrorListener.h b/lib/antlr4/include/ConsoleErrorListener.h new file mode 100644 index 0000000..65c6f8c --- /dev/null +++ b/lib/antlr4/include/ConsoleErrorListener.h @@ -0,0 +1,35 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "BaseErrorListener.h" + +namespace antlr4 { + + class ANTLR4CPP_PUBLIC ConsoleErrorListener : public BaseErrorListener { + public: + /** + * Provides a default instance of {@link ConsoleErrorListener}. + */ + static ConsoleErrorListener INSTANCE; + + /** + * {@inheritDoc} + * + *+ * This implementation prints messages to {@link System#err} containing the + * values of {@code line}, {@code charPositionInLine}, and {@code msg} using + * the following format.
+ * + *+ * line line:charPositionInLine msg + *+ */ + virtual void syntaxError(Recognizer *recognizer, Token * offendingSymbol, size_t line, size_t charPositionInLine, + const std::string &msg, std::exception_ptr e) override; + }; + +} // namespace antlr4 diff --git a/lib/antlr4/include/DefaultErrorStrategy.h b/lib/antlr4/include/DefaultErrorStrategy.h new file mode 100644 index 0000000..47dabb8 --- /dev/null +++ b/lib/antlr4/include/DefaultErrorStrategy.h @@ -0,0 +1,466 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "ANTLRErrorStrategy.h" +#include "misc/IntervalSet.h" + +namespace antlr4 { + + /** + * This is the default implementation of {@link ANTLRErrorStrategy} used for + * error reporting and recovery in ANTLR parsers. + */ + class ANTLR4CPP_PUBLIC DefaultErrorStrategy : public ANTLRErrorStrategy { + public: + DefaultErrorStrategy(); + DefaultErrorStrategy(DefaultErrorStrategy const& other) = delete; + virtual ~DefaultErrorStrategy(); + + DefaultErrorStrategy& operator = (DefaultErrorStrategy const& other) = delete; + + protected: + /** + * Indicates whether the error strategy is currently "recovering from an + * error". This is used to suppress reporting multiple error messages while + * attempting to recover from a detected syntax error. + * + * @see #inErrorRecoveryMode + */ + bool errorRecoveryMode; + + /** The index into the input stream where the last error occurred. + * This is used to prevent infinite loops where an error is found + * but no token is consumed during recovery...another error is found, + * ad nauseum. This is a failsafe mechanism to guarantee that at least + * one token/tree node is consumed for two errors. + */ + int lastErrorIndex; + + misc::IntervalSet lastErrorStates; + + ///
Implements Jim Idle's magic sync mechanism in closures and optional + * subrules. E.g.,
+ * + *+ * a : sync ( stuff sync )* ; + * sync : {consume to what can follow sync} ; + *+ * + * At the start of a sub rule upon error, {@link #sync} performs single + * token deletion, if possible. If it can't do that, it bails on the current + * rule and uses the default error recovery, which consumes until the + * resynchronization set of the current rule. + * + *
If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block + * with an empty alternative), then the expected set includes what follows + * the subrule.
+ * + *During loop iteration, it consumes until it sees a token that can start a + * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to + * stay in the loop as long as possible.
+ * + *ORIGINS
+ * + *Previous versions of ANTLR did a poor job of their recovery within loops. + * A single mismatch token or missing token would force the parser to bail + * out of the entire rules surrounding the loop. So, for rule
+ * + *+ * classDef : 'class' ID '{' member* '}' + *+ * + * input with an extra token between members would force the parser to + * consume until it found the next class definition rather than the next + * member definition of the current class. + * + *
This functionality cost a little bit of effort because the parser has to + * compare token set at the start of the loop and at each iteration. If for + * some reason speed is suffering for you, you can turn off this + * functionality by simply overriding this method as a blank { }.
+ */ + virtual void sync(Parser *recognizer) override; + + ///This method is called when {@link #singleTokenDeletion} identifies + * single-token deletion as a viable recovery strategy for a mismatched + * input error.
+ * + *The default implementation simply returns if the handler is already in + * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to + * enter error recovery mode, followed by calling + * {@link Parser#notifyErrorListeners}.
+ * + * @param recognizer the parser instance + */ + virtual void reportUnwantedToken(Parser *recognizer); + + /** + * This method is called to report a syntax error which requires the + * insertion of a missing token into the input stream. At the time this + * method is called, the missing token has not yet been inserted. When this + * method returns, {@code recognizer} is in error recovery mode. + * + *This method is called when {@link #singleTokenInsertion} identifies + * single-token insertion as a viable recovery strategy for a mismatched + * input error.
+ * + *The default implementation simply returns if the handler is already in + * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to + * enter error recovery mode, followed by calling + * {@link Parser#notifyErrorListeners}.
+ * + * @param recognizer the parser instance + */ + virtual void reportMissingToken(Parser *recognizer); + + public: + /** + * {@inheritDoc} + * + *The default implementation attempts to recover from the mismatched input + * by using single token insertion and deletion as described below. If the + * recovery attempt fails, this method throws an + * {@link InputMismatchException}.
+ * + *EXTRA TOKEN (single token deletion)
+ * + *{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the + * right token, however, then assume {@code LA(1)} is some extra spurious + * token and delete it. Then consume and return the next token (which was + * the {@code LA(2)} token) as the successful result of the match operation.
+ * + *This recovery strategy is implemented by {@link #singleTokenDeletion}.
+ * + *MISSING TOKEN (single token insertion)
+ * + *If current token (at {@code LA(1)}) is consistent with what could come + * after the expected {@code LA(1)} token, then assume the token is missing + * and use the parser's {@link TokenFactory} to create it on the fly. The + * "insertion" is performed by returning the created token as the successful + * result of the match operation.
+ * + *This recovery strategy is implemented by {@link #singleTokenInsertion}.
+ * + *EXAMPLE
+ * + *For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When + * the parser returns from the nested call to {@code expr}, it will have + * call chain:
+ * + *+ * stat → expr → atom + *+ * + * and it will be trying to match the {@code ')'} at this point in the + * derivation: + * + *
+ * => ID '=' '(' INT ')' ('+' atom)* ';' + * ^ + *+ * + * The attempt to match {@code ')'} will fail when it sees {@code ';'} and + * call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'} + * is in the set of tokens that can follow the {@code ')'} token reference + * in rule {@code atom}. It can assume that you forgot the {@code ')'}. + */ + virtual Token* recoverInline(Parser *recognizer) override; + + ///
+ /// IntStream stream = ...; + /// int index = -1; + /// int mark = stream.mark(); + /// try { + /// index = stream.index(); + /// // perform work here... + /// } finally { + /// if (index != -1) { + /// stream.seek(index); + /// } + /// stream.release(mark); + /// } + ///+ ///
+ * {@link ParserRuleContext} does not include field storage for the rule index + * since the context classes created by the code generator override the + * {@link #getRuleIndex} method to return the correct value for that context. + * Since the parser interpreter does not use the context classes generated for a + * parser, this class (with slightly more memory overhead per node) is used to + * provide equivalent functionality.
+ */ + class ANTLR4CPP_PUBLIC InterpreterRuleContext : public ParserRuleContext { + public: + InterpreterRuleContext(); + + /** + * Constructs a new {@link InterpreterRuleContext} with the specified + * parent, invoking state, and rule index. + * + * @param parent The parent context. + * @param invokingStateNumber The invoking state number. + * @param ruleIndex The rule index for the current context. + */ + InterpreterRuleContext(ParserRuleContext *parent, size_t invokingStateNumber, size_t ruleIndex); + + virtual size_t getRuleIndex() const override; + + protected: + /** This is the backing field for {@link #getRuleIndex}. */ + const size_t _ruleIndex = INVALID_INDEX; +}; + +} // namespace antlr4 diff --git a/lib/antlr4/include/Lexer.h b/lib/antlr4/include/Lexer.h new file mode 100644 index 0000000..f722f7f --- /dev/null +++ b/lib/antlr4/include/Lexer.h @@ -0,0 +1,196 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Recognizer.h" +#include "TokenSource.h" +#include "CharStream.h" +#include "Token.h" + +namespace antlr4 { + + /// A lexer is recognizer that draws input symbols from a character stream. + /// lexer grammars result in a subclass of this object. A Lexer object + /// uses simplified match() and error recovery mechanisms in the interest + /// of speed. + class ANTLR4CPP_PUBLIC Lexer : public Recognizer, public TokenSource { + public: + static const size_t DEFAULT_MODE = 0; + static const size_t MORE = static_cast+ /// ParseTree t = parser.expr(); + /// ParseTreePattern p = parser.compileParseTreePattern("+ ///+0", MyParser.RULE_expr); + /// ParseTreeMatch m = p.match(t); + /// String id = m.get("ID"); + ///
+ /// A B + /// ^ + ///+ /// + /// If the parser is not in error recovery mode, the consumed symbol is added + /// to the parse tree using
+ /// return getExpectedTokens().contains(symbol); + ///+ ///
+ * The non-negative numbers less than {@link #MIN_USER_CHANNEL_VALUE} are + * assigned to the predefined channels {@link #DEFAULT_CHANNEL} and + * {@link #HIDDEN_CHANNEL}.
+ * + * @see Token#getChannel() + */ + static const size_t MIN_USER_CHANNEL_VALUE = 2; + + /// Get the text of the token. + virtual std::string getText() const = 0; + + /// Get the token type of the token + virtual size_t getType() const = 0; + + /// The line number on which the 1st character of this token was matched, line=1..n + virtual size_t getLine() const = 0; + + /// The index of the first character of this token relative to the + /// beginning of the line at which it occurs, 0..n-1 + virtual size_t getCharPositionInLine() const = 0; + + /// Return the channel this token. Each token can arrive at the parser + /// on a different channel, but the parser only "tunes" to a single channel. + /// The parser ignores everything not on DEFAULT_CHANNEL. + virtual size_t getChannel() const = 0; + + /// An index from 0..n-1 of the token object in the input stream. + /// This must be valid in order to print token streams and + /// use TokenRewriteStream. + /// + /// Return INVALID_INDEX to indicate that this token was conjured up since + /// it doesn't have a valid index. + virtual size_t getTokenIndex() const = 0; + + /// The starting character index of the token + /// This method is optional; return INVALID_INDEX if not implemented. + virtual size_t getStartIndex() const = 0; + + /// The last character index of the token. + /// This method is optional; return INVALID_INDEX if not implemented. + virtual size_t getStopIndex() const = 0; + + /// Gets the+ /// TokenStream stream = ...; + /// String text = ""; + /// for (int i = interval.a; i <= interval.b; i++) { + /// text += stream.get(i).getText(); + /// } + ///+ ///
+ /// TokenStream stream = ...; + /// String text = stream.getText(new Interval(0, stream.size())); + ///+ ///
+ /// TokenStream stream = ...; + /// String text = stream.getText(ctx.getSourceInterval()); + ///+ ///
+ /// TokenStream stream = ...; + /// String text = ""; + /// for (int i = start.getTokenIndex(); i <= stop.getTokenIndex(); i++) { + /// text += stream.get(i).getText(); + /// } + ///+ ///
+ * You can insert stuff, replace, and delete chunks. Note that the operations + * are done lazily--only if you convert the buffer to a {@link String} with + * {@link TokenStream#getText()}. This is very efficient because you are not + * moving data around all the time. As the buffer of tokens is converted to + * strings, the {@link #getText()} method(s) scan the input token stream and + * check to see if there is an operation at the current index. If so, the + * operation is done and then normal {@link String} rendering continues on the + * buffer. This is like having multiple Turing machine instruction streams + * (programs) operating on a single input tape. :)
+ * + *+ * This rewriter makes no modifications to the token stream. It does not ask the + * stream to fill itself up nor does it advance the input cursor. The token + * stream {@link TokenStream#index()} will return the same value before and + * after any {@link #getText()} call.
+ * + *+ * The rewriter only works on tokens that you have in the buffer and ignores the + * current input cursor. If you are buffering tokens on-demand, calling + * {@link #getText()} halfway through the input will only do rewrites for those + * tokens in the first half of the file.
+ * + *+ * Since the operations are done lazily at {@link #getText}-time, operations do + * not screw up the token index values. That is, an insert operation at token + * index {@code i} does not change the index values for tokens + * {@code i}+1..n-1.
+ * + *+ * Because operations never actually alter the buffer, you may always get the + * original token stream back without undoing anything. Since the instructions + * are queued up, you can easily simulate transactions and roll back any changes + * if there is an error just by removing instructions. For example,
+ * + *+ * CharStream input = new ANTLRFileStream("input"); + * TLexer lex = new TLexer(input); + * CommonTokenStream tokens = new CommonTokenStream(lex); + * T parser = new T(tokens); + * TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens); + * parser.startRule(); + *+ * + *
+ * Then in the rules, you can execute (assuming rewriter is visible):
+ * + *+ * Token t,u; + * ... + * rewriter.insertAfter(t, "text to put after t");} + * rewriter.insertAfter(u, "text after u");} + * System.out.println(rewriter.getText()); + *+ * + *
+ * You can also have multiple "instruction streams" and get multiple rewrites + * from a single pass over the input. Just name the instruction streams and use + * that name again when printing the buffer. This could be useful for generating + * a C file and also its header file--all from the same buffer:
+ * + *+ * rewriter.insertAfter("pass1", t, "text to put after t");} + * rewriter.insertAfter("pass2", u, "text after u");} + * System.out.println(rewriter.getText("pass1")); + * System.out.println(rewriter.getText("pass2")); + *+ * + *
+ * If you don't use named rewrite streams, a "default" stream is used as the + * first example shows.
+ */ + class ANTLR4CPP_PUBLIC TokenStreamRewriter { + public: + static const std::string DEFAULT_PROGRAM_NAME; + static const size_t PROGRAM_INIT_SIZE = 100; + static const size_t MIN_TOKEN_INDEX = 0; + + TokenStreamRewriter(TokenStream *tokens); + virtual ~TokenStreamRewriter(); + + TokenStream *getTokenStream(); + + virtual void rollback(size_t instructionIndex); + + /// Rollback the instruction stream for a program so that + /// the indicated instruction (via instructionIndex) is no + /// longer in the stream. UNTESTED! + virtual void rollback(const std::string &programName, size_t instructionIndex); + + virtual void deleteProgram(); + + /// Reset the program so that no instructions exist. + virtual void deleteProgram(const std::string &programName); + virtual void insertAfter(Token *t, const std::string& text); + virtual void insertAfter(size_t index, const std::string& text); + virtual void insertAfter(const std::string &programName, Token *t, const std::string& text); + virtual void insertAfter(const std::string &programName, size_t index, const std::string& text); + + virtual void insertBefore(Token *t, const std::string& text); + virtual void insertBefore(size_t index, const std::string& text); + virtual void insertBefore(const std::string &programName, Token *t, const std::string& text); + virtual void insertBefore(const std::string &programName, size_t index, const std::string& text); + + virtual void replace(size_t index, const std::string& text); + virtual void replace(size_t from, size_t to, const std::string& text); + virtual void replace(Token *indexT, const std::string& text); + virtual void replace(Token *from, Token *to, const std::string& text); + virtual void replace(const std::string &programName, size_t from, size_t to, const std::string& text); + virtual void replace(const std::string &programName, Token *from, Token *to, const std::string& text); + + virtual void Delete(size_t index); + virtual void Delete(size_t from, size_t to); + virtual void Delete(Token *indexT); + virtual void Delete(Token *from, Token *to); + virtual void Delete(const std::string &programName, size_t from, size_t to); + virtual void Delete(const std::string &programName, Token *from, Token *to); + + virtual size_t getLastRewriteTokenIndex(); + + /// Return the text from the original tokens altered per the + /// instructions given to this rewriter. + virtual std::string getText(); + + /** Return the text from the original tokens altered per the + * instructions given to this rewriter in programName. + */ + std::string getText(std::string programName); + + /// Return the text associated with the tokens in the interval from the + /// original token stream but with the alterations given to this rewriter. + /// The interval refers to the indexes in the original token stream. + /// We do not alter the token stream in any way, so the indexes + /// and intervals are still consistent. Includes any operations done + /// to the first and last token in the interval. So, if you did an + /// insertBefore on the first token, you would get that insertion. + /// The same is true if you do an insertAfter the stop token. + virtual std::string getText(const misc::Interval &interval); + + virtual std::string getText(const std::string &programName, const misc::Interval &interval); + + protected: + class RewriteOperation { + public: + /// What index into rewrites List are we? + size_t index; + std::string text; + + /// Token buffer index. + size_t instructionIndex; + + RewriteOperation(TokenStreamRewriter *outerInstance, size_t index); + RewriteOperation(TokenStreamRewriter *outerInstance, size_t index, const std::string& text); + virtual ~RewriteOperation(); + + /// Execute the rewrite operation by possibly adding to the buffer. + /// Return the index of the next token to operate on. + + virtual size_t execute(std::string *buf); + virtual std::string toString(); + + private: + TokenStreamRewriter *const outerInstance; + void InitializeInstanceFields(); + }; + + class InsertBeforeOp : public RewriteOperation { + private: + TokenStreamRewriter *const outerInstance; + + public: + InsertBeforeOp(TokenStreamRewriter *outerInstance, size_t index, const std::string& text); + + virtual size_t execute(std::string *buf) override; + }; + + class ReplaceOp : public RewriteOperation { + private: + TokenStreamRewriter *const outerInstance; + + public: + size_t lastIndex; + + ReplaceOp(TokenStreamRewriter *outerInstance, size_t from, size_t to, const std::string& text); + virtual size_t execute(std::string *buf) override; + virtual std::string toString() override; + + private: + void InitializeInstanceFields(); + }; + + /// Our source stream + TokenStream *const tokens; + + /// You may have multiple, named streams of rewrite operations. + /// I'm calling these things "programs." + /// Maps String (name) -> rewrite (List) + std::mapRule | + ///Literal Name | + ///Java String Literal | + ///
---|---|---|
{@code THIS : 'this';} | + ///{@code 'this'} | + ///{@code "'this'"} | + ///
{@code SQUOTE : '\'';} | + ///{@code '\''} | + ///{@code "'\\''"} | + ///
{@code ID : [A-Z]+;} | + ///n/a | + ///{@code null} | + ///
tokens{}
block in a lexer or parser
+ /// grammar.Rule | + ///Symbolic Name | + ///
---|---|
{@code THIS : 'this';} | + ///{@code THIS} | + ///
{@code SQUOTE : '\'';} | + ///{@code SQUOTE} | + ///
{@code ID : [A-Z]+;} | + ///{@code ID} | + ///