add antlr
This commit is contained in:
436
lib/antlr4/include/atn/PredictionMode.h
Normal file
436
lib/antlr4/include/atn/PredictionMode.h
Normal file
@@ -0,0 +1,436 @@
|
||||
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
|
||||
* Use of this file is governed by the BSD 3-clause license that
|
||||
* can be found in the LICENSE.txt file in the project root.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "support/BitSet.h"
|
||||
|
||||
namespace antlr4 {
|
||||
namespace atn {
|
||||
|
||||
/**
|
||||
* This enumeration defines the prediction modes available in ANTLR 4 along with
|
||||
* utility methods for analyzing configuration sets for conflicts and/or
|
||||
* ambiguities.
|
||||
*/
|
||||
enum class PredictionMode {
|
||||
/**
|
||||
* The SLL(*) prediction mode. This prediction mode ignores the current
|
||||
* parser context when making predictions. This is the fastest prediction
|
||||
* mode, and provides correct results for many grammars. This prediction
|
||||
* mode is more powerful than the prediction mode provided by ANTLR 3, but
|
||||
* may result in syntax errors for grammar and input combinations which are
|
||||
* not SLL.
|
||||
*
|
||||
* <p>
|
||||
* When using this prediction mode, the parser will either return a correct
|
||||
* parse tree (i.e. the same parse tree that would be returned with the
|
||||
* {@link #LL} prediction mode), or it will report a syntax error. If a
|
||||
* syntax error is encountered when using the {@link #SLL} prediction mode,
|
||||
* it may be due to either an actual syntax error in the input or indicate
|
||||
* that the particular combination of grammar and input requires the more
|
||||
* powerful {@link #LL} prediction abilities to complete successfully.</p>
|
||||
*
|
||||
* <p>
|
||||
* This prediction mode does not provide any guarantees for prediction
|
||||
* behavior for syntactically-incorrect inputs.</p>
|
||||
*/
|
||||
SLL,
|
||||
|
||||
/**
|
||||
* The LL(*) prediction mode. This prediction mode allows the current parser
|
||||
* context to be used for resolving SLL conflicts that occur during
|
||||
* prediction. This is the fastest prediction mode that guarantees correct
|
||||
* parse results for all combinations of grammars with syntactically correct
|
||||
* inputs.
|
||||
*
|
||||
* <p>
|
||||
* When using this prediction mode, the parser will make correct decisions
|
||||
* for all syntactically-correct grammar and input combinations. However, in
|
||||
* cases where the grammar is truly ambiguous this prediction mode might not
|
||||
* report a precise answer for <em>exactly which</em> alternatives are
|
||||
* ambiguous.</p>
|
||||
*
|
||||
* <p>
|
||||
* This prediction mode does not provide any guarantees for prediction
|
||||
* behavior for syntactically-incorrect inputs.</p>
|
||||
*/
|
||||
LL,
|
||||
|
||||
/**
|
||||
* The LL(*) prediction mode with exact ambiguity detection. In addition to
|
||||
* the correctness guarantees provided by the {@link #LL} prediction mode,
|
||||
* this prediction mode instructs the prediction algorithm to determine the
|
||||
* complete and exact set of ambiguous alternatives for every ambiguous
|
||||
* decision encountered while parsing.
|
||||
*
|
||||
* <p>
|
||||
* This prediction mode may be used for diagnosing ambiguities during
|
||||
* grammar development. Due to the performance overhead of calculating sets
|
||||
* of ambiguous alternatives, this prediction mode should be avoided when
|
||||
* the exact results are not necessary.</p>
|
||||
*
|
||||
* <p>
|
||||
* This prediction mode does not provide any guarantees for prediction
|
||||
* behavior for syntactically-incorrect inputs.</p>
|
||||
*/
|
||||
LL_EXACT_AMBIG_DETECTION
|
||||
};
|
||||
|
||||
class ANTLR4CPP_PUBLIC PredictionModeClass {
|
||||
public:
|
||||
/**
|
||||
* Computes the SLL prediction termination condition.
|
||||
*
|
||||
* <p>
|
||||
* This method computes the SLL prediction termination condition for both of
|
||||
* the following cases.</p>
|
||||
*
|
||||
* <ul>
|
||||
* <li>The usual SLL+LL fallback upon SLL conflict</li>
|
||||
* <li>Pure SLL without LL fallback</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p><strong>COMBINED SLL+LL PARSING</strong></p>
|
||||
*
|
||||
* <p>When LL-fallback is enabled upon SLL conflict, correct predictions are
|
||||
* ensured regardless of how the termination condition is computed by this
|
||||
* method. Due to the substantially higher cost of LL prediction, the
|
||||
* prediction should only fall back to LL when the additional lookahead
|
||||
* cannot lead to a unique SLL prediction.</p>
|
||||
*
|
||||
* <p>Assuming combined SLL+LL parsing, an SLL configuration set with only
|
||||
* conflicting subsets should fall back to full LL, even if the
|
||||
* configuration sets don't resolve to the same alternative (e.g.
|
||||
* {@code {1,2}} and {@code {3,4}}. If there is at least one non-conflicting
|
||||
* configuration, SLL could continue with the hopes that more lookahead will
|
||||
* resolve via one of those non-conflicting configurations.</p>
|
||||
*
|
||||
* <p>Here's the prediction termination rule them: SLL (for SLL+LL parsing)
|
||||
* stops when it sees only conflicting configuration subsets. In contrast,
|
||||
* full LL keeps going when there is uncertainty.</p>
|
||||
*
|
||||
* <p><strong>HEURISTIC</strong></p>
|
||||
*
|
||||
* <p>As a heuristic, we stop prediction when we see any conflicting subset
|
||||
* unless we see a state that only has one alternative associated with it.
|
||||
* The single-alt-state thing lets prediction continue upon rules like
|
||||
* (otherwise, it would admit defeat too soon):</p>
|
||||
*
|
||||
* <p>{@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;}</p>
|
||||
*
|
||||
* <p>When the ATN simulation reaches the state before {@code ';'}, it has a
|
||||
* DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally
|
||||
* {@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop
|
||||
* processing this node because alternative to has another way to continue,
|
||||
* via {@code [6|2|[]]}.</p>
|
||||
*
|
||||
* <p>It also let's us continue for this rule:</p>
|
||||
*
|
||||
* <p>{@code [1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;}</p>
|
||||
*
|
||||
* <p>After matching input A, we reach the stop state for rule A, state 1.
|
||||
* State 8 is the state right before B. Clearly alternatives 1 and 2
|
||||
* conflict and no amount of further lookahead will separate the two.
|
||||
* However, alternative 3 will be able to continue and so we do not stop
|
||||
* working on this state. In the previous example, we're concerned with
|
||||
* states associated with the conflicting alternatives. Here alt 3 is not
|
||||
* associated with the conflicting configs, but since we can continue
|
||||
* looking for input reasonably, don't declare the state done.</p>
|
||||
*
|
||||
* <p><strong>PURE SLL PARSING</strong></p>
|
||||
*
|
||||
* <p>To handle pure SLL parsing, all we have to do is make sure that we
|
||||
* combine stack contexts for configurations that differ only by semantic
|
||||
* predicate. From there, we can do the usual SLL termination heuristic.</p>
|
||||
*
|
||||
* <p><strong>PREDICATES IN SLL+LL PARSING</strong></p>
|
||||
*
|
||||
* <p>SLL decisions don't evaluate predicates until after they reach DFA stop
|
||||
* states because they need to create the DFA cache that works in all
|
||||
* semantic situations. In contrast, full LL evaluates predicates collected
|
||||
* during start state computation so it can ignore predicates thereafter.
|
||||
* This means that SLL termination detection can totally ignore semantic
|
||||
* predicates.</p>
|
||||
*
|
||||
* <p>Implementation-wise, {@link ATNConfigSet} combines stack contexts but not
|
||||
* semantic predicate contexts so we might see two configurations like the
|
||||
* following.</p>
|
||||
*
|
||||
* <p>{@code (s, 1, x, {}), (s, 1, x', {p})}</p>
|
||||
*
|
||||
* <p>Before testing these configurations against others, we have to merge
|
||||
* {@code x} and {@code x'} (without modifying the existing configurations).
|
||||
* For example, we test {@code (x+x')==x''} when looking for conflicts in
|
||||
* the following configurations.</p>
|
||||
*
|
||||
* <p>{@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})}</p>
|
||||
*
|
||||
* <p>If the configuration set has predicates (as indicated by
|
||||
* {@link ATNConfigSet#hasSemanticContext}), this algorithm makes a copy of
|
||||
* the configurations to strip out all of the predicates so that a standard
|
||||
* {@link ATNConfigSet} will merge everything ignoring predicates.</p>
|
||||
*/
|
||||
static bool hasSLLConflictTerminatingPrediction(PredictionMode mode, ATNConfigSet *configs);
|
||||
|
||||
/// <summary>
|
||||
/// Checks if any configuration in {@code configs} is in a
|
||||
/// <seealso cref="RuleStopState"/>. Configurations meeting this condition have
|
||||
/// reached
|
||||
/// the end of the decision rule (local context) or end of start rule (full
|
||||
/// context).
|
||||
/// </summary>
|
||||
/// <param name="configs"> the configuration set to test </param>
|
||||
/// <returns> {@code true} if any configuration in {@code configs} is in a
|
||||
/// <seealso cref="RuleStopState"/>, otherwise {@code false} </returns>
|
||||
static bool hasConfigInRuleStopState(ATNConfigSet *configs);
|
||||
|
||||
/// <summary>
|
||||
/// Checks if all configurations in {@code configs} are in a
|
||||
/// <seealso cref="RuleStopState"/>. Configurations meeting this condition have
|
||||
/// reached
|
||||
/// the end of the decision rule (local context) or end of start rule (full
|
||||
/// context).
|
||||
/// </summary>
|
||||
/// <param name="configs"> the configuration set to test </param>
|
||||
/// <returns> {@code true} if all configurations in {@code configs} are in a
|
||||
/// <seealso cref="RuleStopState"/>, otherwise {@code false} </returns>
|
||||
static bool allConfigsInRuleStopStates(ATNConfigSet *configs);
|
||||
|
||||
/**
|
||||
* Full LL prediction termination.
|
||||
*
|
||||
* <p>Can we stop looking ahead during ATN simulation or is there some
|
||||
* uncertainty as to which alternative we will ultimately pick, after
|
||||
* consuming more input? Even if there are partial conflicts, we might know
|
||||
* that everything is going to resolve to the same minimum alternative. That
|
||||
* means we can stop since no more lookahead will change that fact. On the
|
||||
* other hand, there might be multiple conflicts that resolve to different
|
||||
* minimums. That means we need more look ahead to decide which of those
|
||||
* alternatives we should predict.</p>
|
||||
*
|
||||
* <p>The basic idea is to split the set of configurations {@code C}, into
|
||||
* conflicting subsets {@code (s, _, ctx, _)} and singleton subsets with
|
||||
* non-conflicting configurations. Two configurations conflict if they have
|
||||
* identical {@link ATNConfig#state} and {@link ATNConfig#context} values
|
||||
* but different {@link ATNConfig#alt} value, e.g. {@code (s, i, ctx, _)}
|
||||
* and {@code (s, j, ctx, _)} for {@code i!=j}.</p>
|
||||
*
|
||||
* <p>Reduce these configuration subsets to the set of possible alternatives.
|
||||
* You can compute the alternative subsets in one pass as follows:</p>
|
||||
*
|
||||
* <p>{@code A_s,ctx = {i | (s, i, ctx, _)}} for each configuration in
|
||||
* {@code C} holding {@code s} and {@code ctx} fixed.</p>
|
||||
*
|
||||
* <p>Or in pseudo-code, for each configuration {@code c} in {@code C}:</p>
|
||||
*
|
||||
* <pre>
|
||||
* map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not
|
||||
* alt and not pred
|
||||
* </pre>
|
||||
*
|
||||
* <p>The values in {@code map} are the set of {@code A_s,ctx} sets.</p>
|
||||
*
|
||||
* <p>If {@code |A_s,ctx|=1} then there is no conflict associated with
|
||||
* {@code s} and {@code ctx}.</p>
|
||||
*
|
||||
* <p>Reduce the subsets to singletons by choosing a minimum of each subset. If
|
||||
* the union of these alternative subsets is a singleton, then no amount of
|
||||
* more lookahead will help us. We will always pick that alternative. If,
|
||||
* however, there is more than one alternative, then we are uncertain which
|
||||
* alternative to predict and must continue looking for resolution. We may
|
||||
* or may not discover an ambiguity in the future, even if there are no
|
||||
* conflicting subsets this round.</p>
|
||||
*
|
||||
* <p>The biggest sin is to terminate early because it means we've made a
|
||||
* decision but were uncertain as to the eventual outcome. We haven't used
|
||||
* enough lookahead. On the other hand, announcing a conflict too late is no
|
||||
* big deal; you will still have the conflict. It's just inefficient. It
|
||||
* might even look until the end of file.</p>
|
||||
*
|
||||
* <p>No special consideration for semantic predicates is required because
|
||||
* predicates are evaluated on-the-fly for full LL prediction, ensuring that
|
||||
* no configuration contains a semantic context during the termination
|
||||
* check.</p>
|
||||
*
|
||||
* <p><strong>CONFLICTING CONFIGS</strong></p>
|
||||
*
|
||||
* <p>Two configurations {@code (s, i, x)} and {@code (s, j, x')}, conflict
|
||||
* when {@code i!=j} but {@code x=x'}. Because we merge all
|
||||
* {@code (s, i, _)} configurations together, that means that there are at
|
||||
* most {@code n} configurations associated with state {@code s} for
|
||||
* {@code n} possible alternatives in the decision. The merged stacks
|
||||
* complicate the comparison of configuration contexts {@code x} and
|
||||
* {@code x'}. Sam checks to see if one is a subset of the other by calling
|
||||
* merge and checking to see if the merged result is either {@code x} or
|
||||
* {@code x'}. If the {@code x} associated with lowest alternative {@code i}
|
||||
* is the superset, then {@code i} is the only possible prediction since the
|
||||
* others resolve to {@code min(i)} as well. However, if {@code x} is
|
||||
* associated with {@code j>i} then at least one stack configuration for
|
||||
* {@code j} is not in conflict with alternative {@code i}. The algorithm
|
||||
* should keep going, looking for more lookahead due to the uncertainty.</p>
|
||||
*
|
||||
* <p>For simplicity, I'm doing a equality check between {@code x} and
|
||||
* {@code x'} that lets the algorithm continue to consume lookahead longer
|
||||
* than necessary. The reason I like the equality is of course the
|
||||
* simplicity but also because that is the test you need to detect the
|
||||
* alternatives that are actually in conflict.</p>
|
||||
*
|
||||
* <p><strong>CONTINUE/STOP RULE</strong></p>
|
||||
*
|
||||
* <p>Continue if union of resolved alternative sets from non-conflicting and
|
||||
* conflicting alternative subsets has more than one alternative. We are
|
||||
* uncertain about which alternative to predict.</p>
|
||||
*
|
||||
* <p>The complete set of alternatives, {@code [i for (_,i,_)]}, tells us which
|
||||
* alternatives are still in the running for the amount of input we've
|
||||
* consumed at this point. The conflicting sets let us to strip away
|
||||
* configurations that won't lead to more states because we resolve
|
||||
* conflicts to the configuration with a minimum alternate for the
|
||||
* conflicting set.</p>
|
||||
*
|
||||
* <p><strong>CASES</strong></p>
|
||||
*
|
||||
* <ul>
|
||||
*
|
||||
* <li>no conflicts and more than 1 alternative in set => continue</li>
|
||||
*
|
||||
* <li> {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s, 3, z)},
|
||||
* {@code (s', 1, y)}, {@code (s', 2, y)} yields non-conflicting set
|
||||
* {@code {3}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} =
|
||||
* {@code {1,3}} => continue
|
||||
* </li>
|
||||
*
|
||||
* <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)},
|
||||
* {@code (s', 2, y)}, {@code (s'', 1, z)} yields non-conflicting set
|
||||
* {@code {1}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} =
|
||||
* {@code {1}} => stop and predict 1</li>
|
||||
*
|
||||
* <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)},
|
||||
* {@code (s', 2, y)} yields conflicting, reduced sets {@code {1}} U
|
||||
* {@code {1}} = {@code {1}} => stop and predict 1, can announce
|
||||
* ambiguity {@code {1,2}}</li>
|
||||
*
|
||||
* <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 2, y)},
|
||||
* {@code (s', 3, y)} yields conflicting, reduced sets {@code {1}} U
|
||||
* {@code {2}} = {@code {1,2}} => continue</li>
|
||||
*
|
||||
* <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 3, y)},
|
||||
* {@code (s', 4, y)} yields conflicting, reduced sets {@code {1}} U
|
||||
* {@code {3}} = {@code {1,3}} => continue</li>
|
||||
*
|
||||
* </ul>
|
||||
*
|
||||
* <p><strong>EXACT AMBIGUITY DETECTION</strong></p>
|
||||
*
|
||||
* <p>If all states report the same conflicting set of alternatives, then we
|
||||
* know we have the exact ambiguity set.</p>
|
||||
*
|
||||
* <p><code>|A_<em>i</em>|>1</code> and
|
||||
* <code>A_<em>i</em> = A_<em>j</em></code> for all <em>i</em>, <em>j</em>.</p>
|
||||
*
|
||||
* <p>In other words, we continue examining lookahead until all {@code A_i}
|
||||
* have more than one alternative and all {@code A_i} are the same. If
|
||||
* {@code A={{1,2}, {1,3}}}, then regular LL prediction would terminate
|
||||
* because the resolved set is {@code {1}}. To determine what the real
|
||||
* ambiguity is, we have to know whether the ambiguity is between one and
|
||||
* two or one and three so we keep going. We can only stop prediction when
|
||||
* we need exact ambiguity detection when the sets look like
|
||||
* {@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc...</p>
|
||||
*/
|
||||
static size_t resolvesToJustOneViableAlt(const std::vector<antlrcpp::BitSet> &altsets);
|
||||
|
||||
/// <summary>
|
||||
/// Determines if every alternative subset in {@code altsets} contains more
|
||||
/// than one alternative.
|
||||
/// </summary>
|
||||
/// <param name="altsets"> a collection of alternative subsets </param>
|
||||
/// <returns> {@code true} if every <seealso cref="BitSet"/> in {@code altsets}
|
||||
/// has
|
||||
/// <seealso cref="BitSet#cardinality cardinality"/> > 1, otherwise {@code
|
||||
/// false} </returns>
|
||||
static bool allSubsetsConflict(const std::vector<antlrcpp::BitSet> &altsets);
|
||||
|
||||
/// <summary>
|
||||
/// Determines if any single alternative subset in {@code altsets} contains
|
||||
/// exactly one alternative.
|
||||
/// </summary>
|
||||
/// <param name="altsets"> a collection of alternative subsets </param>
|
||||
/// <returns> {@code true} if {@code altsets} contains a <seealso
|
||||
/// cref="BitSet"/> with
|
||||
/// <seealso cref="BitSet#cardinality cardinality"/> 1, otherwise {@code false}
|
||||
/// </returns>
|
||||
static bool hasNonConflictingAltSet(const std::vector<antlrcpp::BitSet> &altsets);
|
||||
|
||||
/// <summary>
|
||||
/// Determines if any single alternative subset in {@code altsets} contains
|
||||
/// more than one alternative.
|
||||
/// </summary>
|
||||
/// <param name="altsets"> a collection of alternative subsets </param>
|
||||
/// <returns> {@code true} if {@code altsets} contains a <seealso
|
||||
/// cref="BitSet"/> with
|
||||
/// <seealso cref="BitSet#cardinality cardinality"/> > 1, otherwise {@code
|
||||
/// false} </returns>
|
||||
static bool hasConflictingAltSet(const std::vector<antlrcpp::BitSet> &altsets);
|
||||
|
||||
/// <summary>
|
||||
/// Determines if every alternative subset in {@code altsets} is equivalent.
|
||||
/// </summary>
|
||||
/// <param name="altsets"> a collection of alternative subsets </param>
|
||||
/// <returns> {@code true} if every member of {@code altsets} is equal to the
|
||||
/// others, otherwise {@code false} </returns>
|
||||
static bool allSubsetsEqual(const std::vector<antlrcpp::BitSet> &altsets);
|
||||
|
||||
/// <summary>
|
||||
/// Returns the unique alternative predicted by all alternative subsets in
|
||||
/// {@code altsets}. If no such alternative exists, this method returns
|
||||
/// <seealso cref="ATN#INVALID_ALT_NUMBER"/>.
|
||||
/// </summary>
|
||||
/// <param name="altsets"> a collection of alternative subsets </param>
|
||||
static size_t getUniqueAlt(const std::vector<antlrcpp::BitSet> &altsets);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the complete set of represented alternatives for a collection of
|
||||
/// alternative subsets. This method returns the union of each <seealso
|
||||
/// cref="BitSet"/>
|
||||
/// in {@code altsets}.
|
||||
/// </summary>
|
||||
/// <param name="altsets"> a collection of alternative subsets </param>
|
||||
/// <returns> the set of represented alternatives in {@code altsets} </returns>
|
||||
static antlrcpp::BitSet getAlts(const std::vector<antlrcpp::BitSet> &altsets);
|
||||
|
||||
/** Get union of all alts from configs. @since 4.5.1 */
|
||||
static antlrcpp::BitSet getAlts(ATNConfigSet *configs);
|
||||
|
||||
/// <summary>
|
||||
/// This function gets the conflicting alt subsets from a configuration set.
|
||||
/// For each configuration {@code c} in {@code configs}:
|
||||
///
|
||||
/// <pre>
|
||||
/// map[c] U= c.<seealso cref="ATNConfig#alt alt"/> # map hash/equals uses s and
|
||||
/// x, not
|
||||
/// alt and not pred
|
||||
/// </pre>
|
||||
/// </summary>
|
||||
static std::vector<antlrcpp::BitSet> getConflictingAltSubsets(ATNConfigSet *configs);
|
||||
|
||||
/// <summary>
|
||||
/// Get a map from state to alt subset from a configuration set. For each
|
||||
/// configuration {@code c} in {@code configs}:
|
||||
///
|
||||
/// <pre>
|
||||
/// map[c.<seealso cref="ATNConfig#state state"/>] U= c.<seealso
|
||||
/// cref="ATNConfig#alt alt"/>
|
||||
/// </pre>
|
||||
/// </summary>
|
||||
static std::map<ATNState*, antlrcpp::BitSet> getStateToAltMap(ATNConfigSet *configs);
|
||||
|
||||
static bool hasStateAssociatedWithOneAlt(ATNConfigSet *configs);
|
||||
|
||||
static size_t getSingleViableAlt(const std::vector<antlrcpp::BitSet> &altsets);
|
||||
};
|
||||
|
||||
} // namespace atn
|
||||
} // namespace antlr4
|
Reference in New Issue
Block a user