add antlr

2020-09-29 23:16:42 +05:30
parent 34754a7130
commit 2b36e74ff5
181 changed files with 14689 additions and 7 deletions
--- a/lib/antlr4/include/atn/LexerATNSimulator.h
+++ b/lib/antlr4/include/atn/LexerATNSimulator.h
@@ -0,0 +1,210 @@
+/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
+ * Use of this file is governed by the BSD 3-clause license that
+ * can be found in the LICENSE.txt file in the project root.
+ */
+
+#pragma once
+
+#include "atn/ATNSimulator.h"
+#include "atn/LexerATNConfig.h"
+#include "atn/ATNConfigSet.h"
+
+namespace antlr4 {
+namespace atn {
+
+  /// "dup" of ParserInterpreter
+  class ANTLR4CPP_PUBLIC LexerATNSimulator : public ATNSimulator {
+  protected:
+    class SimState {
+    public:
+      virtual ~SimState();
+
+    protected:
+      size_t index;
+      size_t line;
+      size_t charPos;
+      dfa::DFAState *dfaState;
+      virtual void reset();
+      friend class LexerATNSimulator;
+
+    private:
+      void InitializeInstanceFields();
+
+    public:
+      SimState() {
+        InitializeInstanceFields();
+      }
+    };
+
+
+  public:
+    static const size_t MIN_DFA_EDGE = 0;
+    static const size_t MAX_DFA_EDGE = 127; // forces unicode to stay in ATN
+
+  protected:
+    /// <summary>
+    /// When we hit an accept state in either the DFA or the ATN, we
+    ///  have to notify the character stream to start buffering characters
+    ///  via <seealso cref="IntStream#mark"/> and record the current state. The current sim state
+    ///  includes the current index into the input, the current line,
+    ///  and current character position in that line. Note that the Lexer is
+    ///  tracking the starting line and characterization of the token. These
+    ///  variables track the "state" of the simulator when it hits an accept state.
+    /// <p/>
+    ///  We track these variables separately for the DFA and ATN simulation
+    ///  because the DFA simulation often has to fail over to the ATN
+    ///  simulation. If the ATN simulation fails, we need the DFA to fall
+    ///  back to its previously accepted state, if any. If the ATN succeeds,
+    ///  then the ATN does the accept and the DFA simulator that invoked it
+    ///  can simply return the predicted token type.
+    /// </summary>
+    Lexer *const _recog;
+
+    /// The current token's starting index into the character stream.
+    ///  Shared across DFA to ATN simulation in case the ATN fails and the
+    ///  DFA did not have a previous accept state. In this case, we use the
+    ///  ATN-generated exception object.
+    size_t _startIndex;
+
+    /// line number 1..n within the input.
+    size_t _line;
+
+    /// The index of the character relative to the beginning of the line 0..n-1.
+    size_t _charPositionInLine;
+
+  public:
+    std::vector<dfa::DFA> &_decisionToDFA;
+
+  protected:
+    size_t _mode;
+
+    /// Used during DFA/ATN exec to record the most recent accept configuration info.
+    SimState _prevAccept;
+
+  public:
+    static int match_calls;
+
+    LexerATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, PredictionContextCache &sharedContextCache);
+    LexerATNSimulator(Lexer *recog, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, PredictionContextCache &sharedContextCache);
+    virtual ~LexerATNSimulator () {}
+
+    virtual void copyState(LexerATNSimulator *simulator);
+    virtual size_t match(CharStream *input, size_t mode);
+    virtual void reset() override;
+
+    virtual void clearDFA() override;
+
+  protected:
+    virtual size_t matchATN(CharStream *input);
+    virtual size_t execATN(CharStream *input, dfa::DFAState *ds0);
+
+    /// <summary>
+    /// Get an existing target state for an edge in the DFA. If the target state
+    /// for the edge has not yet been computed or is otherwise not available,
+    /// this method returns {@code null}.
+    /// </summary>
+    /// <param name="s"> The current DFA state </param>
+    /// <param name="t"> The next input symbol </param>
+    /// <returns> The existing target DFA state for the given input symbol
+    /// {@code t}, or {@code null} if the target state for this edge is not
+    /// already cached </returns>
+    virtual dfa::DFAState *getExistingTargetState(dfa::DFAState *s, size_t t);
+
+    /// <summary>
+    /// Compute a target state for an edge in the DFA, and attempt to add the
+    /// computed state and corresponding edge to the DFA.
+    /// </summary>
+    /// <param name="input"> The input stream </param>
+    /// <param name="s"> The current DFA state </param>
+    /// <param name="t"> The next input symbol
+    /// </param>
+    /// <returns> The computed target DFA state for the given input symbol
+    /// {@code t}. If {@code t} does not lead to a valid DFA state, this method
+    /// returns <seealso cref="#ERROR"/>. </returns>
+    virtual dfa::DFAState *computeTargetState(CharStream *input, dfa::DFAState *s, size_t t);
+
+    virtual size_t failOrAccept(CharStream *input, ATNConfigSet *reach, size_t t);
+
+    /// <summary>
+    /// Given a starting configuration set, figure out all ATN configurations
+    ///  we can reach upon input {@code t}. Parameter {@code reach} is a return
+    ///  parameter.
+    /// </summary>
+    void getReachableConfigSet(CharStream *input, ATNConfigSet *closure_, // closure_ as we have a closure() already
+                               ATNConfigSet *reach, size_t t);
+
+    virtual void accept(CharStream *input, const Ref<LexerActionExecutor> &lexerActionExecutor, size_t startIndex, size_t index,
+                        size_t line, size_t charPos);
+
+    virtual ATNState *getReachableTarget(Transition *trans, size_t t);
+
+    virtual std::unique_ptr<ATNConfigSet> computeStartState(CharStream *input, ATNState *p);
+
+    /// <summary>
+    /// Since the alternatives within any lexer decision are ordered by
+    /// preference, this method stops pursuing the closure as soon as an accept
+    /// state is reached. After the first accept state is reached by depth-first
+    /// search from {@code config}, all other (potentially reachable) states for
+    /// this rule would have a lower priority.
+    /// </summary>
+    /// <returns> {@code true} if an accept state is reached, otherwise
+    /// {@code false}. </returns>
+    virtual bool closure(CharStream *input, const Ref<LexerATNConfig> &config, ATNConfigSet *configs,
+                         bool currentAltReachedAcceptState, bool speculative, bool treatEofAsEpsilon);
+
+    // side-effect: can alter configs.hasSemanticContext
+    virtual Ref<LexerATNConfig> getEpsilonTarget(CharStream *input, const Ref<LexerATNConfig> &config, Transition *t,
+      ATNConfigSet *configs, bool speculative, bool treatEofAsEpsilon);
+
+    /// <summary>
+    /// Evaluate a predicate specified in the lexer.
+    /// <p/>
+    /// If {@code speculative} is {@code true}, this method was called before
+    /// <seealso cref="#consume"/> for the matched character. This method should call
+    /// <seealso cref="#consume"/> before evaluating the predicate to ensure position
+    /// sensitive values, including <seealso cref="Lexer#getText"/>, <seealso cref="Lexer#getLine"/>,
+    /// and <seealso cref="Lexer#getCharPositionInLine"/>, properly reflect the current
+    /// lexer state. This method should restore {@code input} and the simulator
+    /// to the original state before returning (i.e. undo the actions made by the
+    /// call to <seealso cref="#consume"/>.
+    /// </summary>
+    /// <param name="input"> The input stream. </param>
+    /// <param name="ruleIndex"> The rule containing the predicate. </param>
+    /// <param name="predIndex"> The index of the predicate within the rule. </param>
+    /// <param name="speculative"> {@code true} if the current index in {@code input} is
+    /// one character before the predicate's location.
+    /// </param>
+    /// <returns> {@code true} if the specified predicate evaluates to
+    /// {@code true}. </returns>
+    virtual bool evaluatePredicate(CharStream *input, size_t ruleIndex, size_t predIndex, bool speculative);
+
+    virtual void captureSimState(CharStream *input, dfa::DFAState *dfaState);
+    virtual dfa::DFAState* addDFAEdge(dfa::DFAState *from, size_t t, ATNConfigSet *q);
+    virtual void addDFAEdge(dfa::DFAState *p, size_t t, dfa::DFAState *q);
+
+    /// <summary>
+    /// Add a new DFA state if there isn't one with this set of
+    /// configurations already. This method also detects the first
+    /// configuration containing an ATN rule stop state. Later, when
+    /// traversing the DFA, we will know which rule to accept.
+    /// </summary>
+    virtual dfa::DFAState *addDFAState(ATNConfigSet *configs);
+
+  public:
+    dfa::DFA& getDFA(size_t mode);
+
+    /// Get the text matched so far for the current token.
+    virtual std::string getText(CharStream *input);
+    virtual size_t getLine() const;
+    virtual void setLine(size_t line);
+    virtual size_t getCharPositionInLine();
+    virtual void setCharPositionInLine(size_t charPositionInLine);
+    virtual void consume(CharStream *input);
+    virtual std::string getTokenName(size_t t);
+
+  private:
+    void InitializeInstanceFields();
+  };
+
+} // namespace atn
+} // namespace antlr4