Save new folder
This commit is contained in:
@ -0,0 +1,400 @@
|
||||
"""Classes representing state-machine concepts"""
|
||||
|
||||
class NFA:
|
||||
"""A non deterministic finite automata
|
||||
|
||||
A non deterministic automata is a form of a finite state
|
||||
machine. An NFA's rules are less restrictive than a DFA.
|
||||
The NFA rules are:
|
||||
|
||||
* A transition can be non-deterministic and can result in
|
||||
nothing, one, or two or more states.
|
||||
|
||||
* An epsilon transition consuming empty input is valid.
|
||||
Transitions consuming labeled symbols are also permitted.
|
||||
|
||||
This class assumes that there is only one starting state and one
|
||||
accepting (ending) state.
|
||||
|
||||
Attributes:
|
||||
name (str): The name of the rule the NFA is representing.
|
||||
start (NFAState): The starting state.
|
||||
end (NFAState): The ending state
|
||||
"""
|
||||
|
||||
def __init__(self, start, end):
|
||||
self.name = start.rule_name
|
||||
self.start = start
|
||||
self.end = end
|
||||
|
||||
def __repr__(self):
|
||||
return "NFA(start={}, end={})".format(self.start, self.end)
|
||||
|
||||
def dump(self, writer=print):
|
||||
"""Dump a graphical representation of the NFA"""
|
||||
todo = [self.start]
|
||||
for i, state in enumerate(todo):
|
||||
writer(" State", i, state is self.end and "(final)" or "")
|
||||
for arc in state.arcs:
|
||||
label = arc.label
|
||||
next = arc.target
|
||||
if next in todo:
|
||||
j = todo.index(next)
|
||||
else:
|
||||
j = len(todo)
|
||||
todo.append(next)
|
||||
if label is None:
|
||||
writer(" -> %d" % j)
|
||||
else:
|
||||
writer(" %s -> %d" % (label, j))
|
||||
|
||||
def dump_graph(self, writer):
|
||||
"""Dump a DOT representation of the NFA"""
|
||||
writer('digraph %s_nfa {\n' % self.name)
|
||||
todo = [self.start]
|
||||
for i, state in enumerate(todo):
|
||||
writer(' %d [label="State %d %s"];\n' % (i, i, state is self.end and "(final)" or ""))
|
||||
for arc in state.arcs:
|
||||
label = arc.label
|
||||
next = arc.target
|
||||
if next in todo:
|
||||
j = todo.index(next)
|
||||
else:
|
||||
j = len(todo)
|
||||
todo.append(next)
|
||||
if label is None:
|
||||
writer(" %d -> %d [style=dotted label=ε];\n" % (i, j))
|
||||
else:
|
||||
writer(" %d -> %d [label=%s];\n" % (i, j, label.replace("'", '"')))
|
||||
writer('}\n')
|
||||
|
||||
|
||||
class NFAArc:
|
||||
"""An arc representing a transition between two NFA states.
|
||||
|
||||
NFA states can be connected via two ways:
|
||||
|
||||
* A label transition: An input equal to the label must
|
||||
be consumed to perform the transition.
|
||||
* An epsilon transition: The transition can be taken without
|
||||
consuming any input symbol.
|
||||
|
||||
Attributes:
|
||||
target (NFAState): The ending state of the transition arc.
|
||||
label (Optional[str]): The label that must be consumed to make
|
||||
the transition. An epsilon transition is represented
|
||||
using `None`.
|
||||
"""
|
||||
|
||||
def __init__(self, target, label):
|
||||
self.target = target
|
||||
self.label = label
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s: %s>" % (self.__class__.__name__, self.label)
|
||||
|
||||
|
||||
class NFAState:
|
||||
"""A state of a NFA, non deterministic finite automata.
|
||||
|
||||
Attributes:
|
||||
target (rule_name): The name of the rule used to represent the NFA's
|
||||
ending state after a transition.
|
||||
arcs (Dict[Optional[str], NFAState]): A mapping representing transitions
|
||||
between the current NFA state and another NFA state via following
|
||||
a label.
|
||||
"""
|
||||
|
||||
def __init__(self, rule_name):
|
||||
self.rule_name = rule_name
|
||||
self.arcs = []
|
||||
|
||||
def add_arc(self, target, label=None):
|
||||
"""Add a new arc to connect the state to a target state within the NFA
|
||||
|
||||
The method adds a new arc to the list of arcs available as transitions
|
||||
from the present state. An optional label indicates a named transition
|
||||
that consumes an input while the absence of a label represents an epsilon
|
||||
transition.
|
||||
|
||||
Attributes:
|
||||
target (NFAState): The end of the transition that the arc represents.
|
||||
label (Optional[str]): The label that must be consumed for making
|
||||
the transition. If the label is not provided the transition is assumed
|
||||
to be an epsilon-transition.
|
||||
"""
|
||||
assert label is None or isinstance(label, str)
|
||||
assert isinstance(target, NFAState)
|
||||
self.arcs.append(NFAArc(target, label))
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s: from %s>" % (self.__class__.__name__, self.rule_name)
|
||||
|
||||
|
||||
class DFA:
|
||||
"""A deterministic finite automata
|
||||
|
||||
A deterministic finite automata is a form of a finite state machine
|
||||
that obeys the following rules:
|
||||
|
||||
* Each of the transitions is uniquely determined by
|
||||
the source state and input symbol
|
||||
* Reading an input symbol is required for each state
|
||||
transition (no epsilon transitions).
|
||||
|
||||
The finite-state machine will accept or reject a string of symbols
|
||||
and only produces a unique computation of the automaton for each input
|
||||
string. The DFA must have a unique starting state (represented as the first
|
||||
element in the list of states) but can have multiple final states.
|
||||
|
||||
Attributes:
|
||||
name (str): The name of the rule the DFA is representing.
|
||||
states (List[DFAState]): A collection of DFA states.
|
||||
"""
|
||||
|
||||
def __init__(self, name, states):
|
||||
self.name = name
|
||||
self.states = states
|
||||
|
||||
@classmethod
|
||||
def from_nfa(cls, nfa):
|
||||
"""Constructs a DFA from a NFA using the Rabin–Scott construction algorithm.
|
||||
|
||||
To simulate the operation of a DFA on a given input string, it's
|
||||
necessary to keep track of a single state at any time, or more precisely,
|
||||
the state that the automaton will reach after seeing a prefix of the
|
||||
input. In contrast, to simulate an NFA, it's necessary to keep track of
|
||||
a set of states: all of the states that the automaton could reach after
|
||||
seeing the same prefix of the input, according to the nondeterministic
|
||||
choices made by the automaton. There are two possible sources of
|
||||
non-determinism:
|
||||
|
||||
1) Multiple (one or more) transitions with the same label
|
||||
|
||||
'A' +-------+
|
||||
+----------->+ State +----------->+
|
||||
| | 2 |
|
||||
+-------+ +-------+
|
||||
| State |
|
||||
| 1 | +-------+
|
||||
+-------+ | State |
|
||||
+----------->+ 3 +----------->+
|
||||
'A' +-------+
|
||||
|
||||
2) Epsilon transitions (transitions that can be taken without consuming any input)
|
||||
|
||||
+-------+ +-------+
|
||||
| State | ε | State |
|
||||
| 1 +----------->+ 2 +----------->+
|
||||
+-------+ +-------+
|
||||
|
||||
Looking at the first case above, we can't determine which transition should be
|
||||
followed when given an input A. We could choose whether or not to follow the
|
||||
transition while in the second case the problem is that we can choose both to
|
||||
follow the transition or not doing it. To solve this problem we can imagine that
|
||||
we follow all possibilities at the same time and we construct new states from the
|
||||
set of all possible reachable states. For every case in the previous example:
|
||||
|
||||
|
||||
1) For multiple transitions with the same label we colapse all of the
|
||||
final states under the same one
|
||||
|
||||
+-------+ +-------+
|
||||
| State | 'A' | State |
|
||||
| 1 +----------->+ 2-3 +----------->+
|
||||
+-------+ +-------+
|
||||
|
||||
2) For epsilon transitions we collapse all epsilon-reachable states
|
||||
into the same one
|
||||
|
||||
+-------+
|
||||
| State |
|
||||
| 1-2 +----------->
|
||||
+-------+
|
||||
|
||||
Because the DFA states consist of sets of NFA states, an n-state NFA
|
||||
may be converted to a DFA with at most 2**n states. Notice that the
|
||||
constructed DFA is not minimal and can be simplified or reduced
|
||||
afterwards.
|
||||
|
||||
Parameters:
|
||||
name (NFA): The NFA to transform to DFA.
|
||||
"""
|
||||
assert isinstance(nfa, NFA)
|
||||
|
||||
def add_closure(nfa_state, base_nfa_set):
|
||||
"""Calculate the epsilon-closure of a given state
|
||||
|
||||
Add to the *base_nfa_set* all the states that are
|
||||
reachable from *nfa_state* via epsilon-transitions.
|
||||
"""
|
||||
assert isinstance(nfa_state, NFAState)
|
||||
if nfa_state in base_nfa_set:
|
||||
return
|
||||
base_nfa_set.add(nfa_state)
|
||||
for nfa_arc in nfa_state.arcs:
|
||||
if nfa_arc.label is None:
|
||||
add_closure(nfa_arc.target, base_nfa_set)
|
||||
|
||||
# Calculate the epsilon-closure of the starting state
|
||||
base_nfa_set = set()
|
||||
add_closure(nfa.start, base_nfa_set)
|
||||
|
||||
# Start by visiting the NFA starting state (there is only one).
|
||||
states = [DFAState(nfa.name, base_nfa_set, nfa.end)]
|
||||
|
||||
for state in states: # NB states grow while we're iterating
|
||||
|
||||
# Find transitions from the current state to other reachable states
|
||||
# and store them in mapping that correlates the label to all the
|
||||
# possible reachable states that can be obtained by consuming a
|
||||
# token equal to the label. Each set of all the states that can
|
||||
# be reached after following a label will be the a DFA state.
|
||||
arcs = {}
|
||||
for nfa_state in state.nfa_set:
|
||||
for nfa_arc in nfa_state.arcs:
|
||||
if nfa_arc.label is not None:
|
||||
nfa_set = arcs.setdefault(nfa_arc.label, set())
|
||||
# All states that can be reached by epsilon-transitions
|
||||
# are also included in the set of reachable states.
|
||||
add_closure(nfa_arc.target, nfa_set)
|
||||
|
||||
# Now create new DFAs by visiting all posible transitions between
|
||||
# the current DFA state and the new power-set states (each nfa_set)
|
||||
# via the different labels. As the nodes are appended to *states* this
|
||||
# is performing a breadth-first search traversal over the power-set of
|
||||
# the states of the original NFA.
|
||||
for label, nfa_set in sorted(arcs.items()):
|
||||
for exisisting_state in states:
|
||||
if exisisting_state.nfa_set == nfa_set:
|
||||
# The DFA state already exists for this rule.
|
||||
next_state = exisisting_state
|
||||
break
|
||||
else:
|
||||
next_state = DFAState(nfa.name, nfa_set, nfa.end)
|
||||
states.append(next_state)
|
||||
|
||||
# Add a transition between the current DFA state and the new
|
||||
# DFA state (the power-set state) via the current label.
|
||||
state.add_arc(next_state, label)
|
||||
|
||||
return cls(nfa.name, states)
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.states)
|
||||
|
||||
def simplify(self):
|
||||
"""Attempt to reduce the number of states of the DFA
|
||||
|
||||
Transform the DFA into an equivalent DFA that has fewer states. Two
|
||||
classes of states can be removed or merged from the original DFA without
|
||||
affecting the language it accepts to minimize it:
|
||||
|
||||
* Unreachable states can not be reached from the initial
|
||||
state of the DFA, for any input string.
|
||||
* Nondistinguishable states are those that cannot be distinguished
|
||||
from one another for any input string.
|
||||
|
||||
This algorithm does not achieve the optimal fully-reduced solution, but it
|
||||
works well enough for the particularities of the Python grammar. The
|
||||
algorithm repeatedly looks for two states that have the same set of
|
||||
arcs (same labels pointing to the same nodes) and unifies them, until
|
||||
things stop changing.
|
||||
"""
|
||||
changes = True
|
||||
while changes:
|
||||
changes = False
|
||||
for i, state_i in enumerate(self.states):
|
||||
for j in range(i + 1, len(self.states)):
|
||||
state_j = self.states[j]
|
||||
if state_i == state_j:
|
||||
del self.states[j]
|
||||
for state in self.states:
|
||||
state.unifystate(state_j, state_i)
|
||||
changes = True
|
||||
break
|
||||
|
||||
def dump(self, writer=print):
|
||||
"""Dump a graphical representation of the DFA"""
|
||||
for i, state in enumerate(self.states):
|
||||
writer(" State", i, state.is_final and "(final)" or "")
|
||||
for label, next in sorted(state.arcs.items()):
|
||||
writer(" %s -> %d" % (label, self.states.index(next)))
|
||||
|
||||
def dump_graph(self, writer):
|
||||
"""Dump a DOT representation of the DFA"""
|
||||
writer('digraph %s_dfa {\n' % self.name)
|
||||
for i, state in enumerate(self.states):
|
||||
writer(' %d [label="State %d %s"];\n' % (i, i, state.is_final and "(final)" or ""))
|
||||
for label, next in sorted(state.arcs.items()):
|
||||
writer(" %d -> %d [label=%s];\n" % (i, self.states.index(next), label.replace("'", '"')))
|
||||
writer('}\n')
|
||||
|
||||
|
||||
class DFAState(object):
|
||||
"""A state of a DFA
|
||||
|
||||
Attributes:
|
||||
rule_name (rule_name): The name of the DFA rule containing the represented state.
|
||||
nfa_set (Set[NFAState]): The set of NFA states used to create this state.
|
||||
final (bool): True if the state represents an accepting state of the DFA
|
||||
containing this state.
|
||||
arcs (Dict[label, DFAState]): A mapping representing transitions between
|
||||
the current DFA state and another DFA state via following a label.
|
||||
"""
|
||||
|
||||
def __init__(self, rule_name, nfa_set, final):
|
||||
assert isinstance(nfa_set, set)
|
||||
assert isinstance(next(iter(nfa_set)), NFAState)
|
||||
assert isinstance(final, NFAState)
|
||||
self.rule_name = rule_name
|
||||
self.nfa_set = nfa_set
|
||||
self.arcs = {} # map from terminals/nonterminals to DFAState
|
||||
self.is_final = final in nfa_set
|
||||
|
||||
def add_arc(self, target, label):
|
||||
"""Add a new arc to the current state.
|
||||
|
||||
Parameters:
|
||||
target (DFAState): The DFA state at the end of the arc.
|
||||
label (str): The label respresenting the token that must be consumed
|
||||
to perform this transition.
|
||||
"""
|
||||
assert isinstance(label, str)
|
||||
assert label not in self.arcs
|
||||
assert isinstance(target, DFAState)
|
||||
self.arcs[label] = target
|
||||
|
||||
def unifystate(self, old, new):
|
||||
"""Replace all arcs from the current node to *old* with *new*.
|
||||
|
||||
Parameters:
|
||||
old (DFAState): The DFA state to remove from all existing arcs.
|
||||
new (DFAState): The DFA state to replace in all existing arcs.
|
||||
"""
|
||||
for label, next_ in self.arcs.items():
|
||||
if next_ is old:
|
||||
self.arcs[label] = new
|
||||
|
||||
def __eq__(self, other):
|
||||
# The nfa_set does not matter for equality
|
||||
assert isinstance(other, DFAState)
|
||||
if self.is_final != other.is_final:
|
||||
return False
|
||||
# We cannot just return self.arcs == other.arcs because that
|
||||
# would invoke this method recursively if there are any cycles.
|
||||
if len(self.arcs) != len(other.arcs):
|
||||
return False
|
||||
for label, next_ in self.arcs.items():
|
||||
if next_ is not other.arcs.get(label):
|
||||
return False
|
||||
return True
|
||||
|
||||
__hash__ = None # For Py3 compatibility.
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s: %s is_final=%s>" % (
|
||||
self.__class__.__name__,
|
||||
self.rule_name,
|
||||
self.is_final,
|
||||
)
|
||||
Reference in New Issue
Block a user