Fixed #20: I can parse simple concepts
This commit is contained in:
@@ -0,0 +1,332 @@
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Literal
|
||||
|
||||
from common.utils import str_concept
|
||||
from core.ExecutionContext import ExecutionContext
|
||||
from core.concept import ConceptMetadata
|
||||
from parsers.ParserInput import ParserInput
|
||||
from parsers.parser_utils import UnexpectedEof, UnexpectedToken, get_text_from_tokens
|
||||
from parsers.tokenizer import Token
|
||||
|
||||
|
||||
@dataclass
|
||||
class MetadataToken:
|
||||
"""
|
||||
Class that represents a text that is recognized as a concept
|
||||
We keep track of the start and the end position
|
||||
"""
|
||||
metadata: ConceptMetadata
|
||||
start: int
|
||||
end: int
|
||||
resolution_method: Literal["name", "key", "id"]
|
||||
parser: str
|
||||
|
||||
def __repr__(self):
|
||||
return f"(MetadataToken metadata={str_concept(self.metadata, drop_name=True)}, " + \
|
||||
f"start={self.start}, end={self.end}, method={self.resolution_method}, origin={self.parser})"
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, MetadataToken):
|
||||
return False
|
||||
|
||||
return self.metadata.id == other.metadata.id \
|
||||
and self.start == other.start \
|
||||
and self.end == other.end \
|
||||
and self.parser == other.parser
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.metadata.id, self.start, self.end, self.parser))
|
||||
|
||||
|
||||
@dataclass
|
||||
class UnrecognizedToken:
|
||||
"""
|
||||
Class that represents a text that is not recognized
|
||||
We keep track of the start and the end position
|
||||
"""
|
||||
buffer: str
|
||||
start: int
|
||||
end: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class StateResult:
|
||||
next_state: str | None
|
||||
forks: list = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class ConceptToRecognize:
|
||||
"""
|
||||
Holds information about the concept to recognize
|
||||
"""
|
||||
metadata: ConceptMetadata
|
||||
expected_tokens: list
|
||||
resolution_method: Literal["name", "key", "id"] # which attribute was used to resolve the concept
|
||||
|
||||
|
||||
@dataclass
|
||||
class StateMachineContext:
|
||||
context: ExecutionContext
|
||||
parser_input: ParserInput
|
||||
get_metadata_from_first_token: Any
|
||||
buffer: list[Token] = field(default_factory=list)
|
||||
buffer_start_pos: int = -1
|
||||
concept_to_recognize: ConceptToRecognize | None = None
|
||||
result: list = field(default_factory=list)
|
||||
errors: list = field(default_factory=list)
|
||||
|
||||
def get_clones(self, concepts_to_recognize):
|
||||
return [StateMachineContext(self.context,
|
||||
self.parser_input.clone(),
|
||||
self.get_metadata_from_first_token,
|
||||
self.buffer.copy(),
|
||||
self.buffer_start_pos,
|
||||
concept,
|
||||
self.result.copy(),
|
||||
self.errors.copy())
|
||||
for concept in concepts_to_recognize]
|
||||
|
||||
def to_debug(self):
|
||||
return {"pos": self.parser_input.pos,
|
||||
"token": self.parser_input.token,
|
||||
"buffer": [token.value for token in self.buffer],
|
||||
"concept": str_concept(self.concept_to_recognize.metadata) if self.concept_to_recognize else None,
|
||||
"result": self.result.copy()}
|
||||
|
||||
|
||||
class State:
|
||||
def __init__(self, name, next_states):
|
||||
self.name = name
|
||||
self.next_states = next_states
|
||||
|
||||
def run(self, state_context: StateMachineContext) -> StateResult:
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def get_forks(next_state, states_contexts: list[StateMachineContext]):
|
||||
"""
|
||||
Create on fork item for every state context
|
||||
:param next_state:
|
||||
:type next_state:
|
||||
:param states_contexts:
|
||||
:type states_contexts:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return [(next_state, state_context) for state_context in states_contexts]
|
||||
|
||||
def __repr__(self):
|
||||
return f"(State '{self.name}' -> {self.next_states})"
|
||||
|
||||
|
||||
class Start(State):
|
||||
def run(self, state_context) -> StateResult:
|
||||
# Start state
|
||||
# give some logs and ask for the next state
|
||||
return StateResult(self.next_states[0])
|
||||
|
||||
def __repr__(self):
|
||||
return f"(StartState '{self.name}' -> '{self.next_states[0]}')"
|
||||
|
||||
|
||||
class PrepareReadTokens(State):
|
||||
def run(self, state_context: StateMachineContext) -> StateResult:
|
||||
state_context.buffer.clear()
|
||||
state_context.buffer_start_pos = state_context.parser_input.pos + 1
|
||||
return StateResult(self.next_states[0])
|
||||
|
||||
|
||||
class ReadTokens(State):
|
||||
def run(self, state_context) -> StateResult:
|
||||
if not state_context.parser_input.next_token(False):
|
||||
return StateResult("eof")
|
||||
|
||||
# try to get the possible concepts to recognize
|
||||
concepts = state_context.get_metadata_from_first_token(state_context.context,
|
||||
state_context.parser_input.token)
|
||||
|
||||
forks = self.get_forks("concepts found", state_context.get_clones(concepts)) if concepts else None
|
||||
|
||||
state_context.buffer.append(state_context.parser_input.token)
|
||||
return StateResult(self.name, forks)
|
||||
|
||||
|
||||
class ManageUnrecognized(State):
|
||||
def run(self, state_context) -> StateResult:
|
||||
if state_context.buffer:
|
||||
buffer_as_str = get_text_from_tokens(state_context.buffer)
|
||||
if len(state_context.result) > 0 and isinstance(old := state_context.result[-1], UnrecognizedToken):
|
||||
state_context.result[-1] = UnrecognizedToken(old.buffer + buffer_as_str,
|
||||
old.start,
|
||||
state_context.parser_input.pos - 1)
|
||||
else:
|
||||
state_context.result.append(UnrecognizedToken(buffer_as_str,
|
||||
state_context.buffer_start_pos,
|
||||
state_context.parser_input.pos - 1))
|
||||
|
||||
return StateResult(self.next_states[0])
|
||||
|
||||
|
||||
class ReadConcept(State):
|
||||
def run(self, state_context) -> StateResult:
|
||||
start = state_context.parser_input.pos
|
||||
|
||||
for expected in state_context.concept_to_recognize.expected_tokens:
|
||||
if not state_context.parser_input.next_token(False):
|
||||
# eof before the concept is recognized
|
||||
state_context.errors.append(UnexpectedEof(expected, state_context.parser_input.token))
|
||||
state_context.concept_to_recognize = None
|
||||
return StateResult(self.next_states[0])
|
||||
|
||||
token = state_context.parser_input.token
|
||||
if token.value != expected:
|
||||
# token mismatch
|
||||
state_context.errors.append(UnexpectedToken(token, expected))
|
||||
state_context.concept_to_recognize = None
|
||||
return StateResult(self.next_states[0])
|
||||
|
||||
state_context.result.append(MetadataToken(state_context.concept_to_recognize.metadata,
|
||||
start,
|
||||
state_context.parser_input.pos,
|
||||
state_context.concept_to_recognize.resolution_method,
|
||||
"simple"))
|
||||
|
||||
state_context.concept_to_recognize = None
|
||||
return StateResult(self.next_states[0])
|
||||
|
||||
|
||||
class End(State):
|
||||
def run(self, state_context) -> StateResult:
|
||||
return StateResult(None)
|
||||
|
||||
def __repr__(self):
|
||||
return f"(EndState '{self.name}')"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExecutionPathHistory:
|
||||
from_state: str
|
||||
execution_context_debug: dict
|
||||
to_state: str = ""
|
||||
forks: list[tuple] = None
|
||||
parents: list = None
|
||||
|
||||
def clone(self, parent_path_id):
|
||||
parents = self.parents.copy() if self.parents else []
|
||||
parents.append(parent_path_id)
|
||||
return ExecutionPathHistory(self.from_state,
|
||||
self.execution_context_debug.copy(),
|
||||
self.to_state,
|
||||
self.forks.copy() if self.forks else None,
|
||||
parents)
|
||||
|
||||
def __repr__(self):
|
||||
return "History(from '{0}', to '{1}', using {2}, forks={3}, parents={4}".format(
|
||||
self.from_state,
|
||||
self.to_state,
|
||||
self.execution_context_debug,
|
||||
len(self.forks) if self.forks else 0,
|
||||
self.parents)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExecutionPath:
|
||||
path_id: int
|
||||
execution_context: Any
|
||||
current_workflow: str
|
||||
current_state: str
|
||||
|
||||
history: list[ExecutionPathHistory]
|
||||
ended: bool = False
|
||||
|
||||
def clone(self, path_id, new_execution_path, new_workflow, new_state):
|
||||
return ExecutionPath(path_id,
|
||||
new_execution_path,
|
||||
new_workflow,
|
||||
new_state,
|
||||
[h.clone(self.path_id) for h in self.history],
|
||||
self.ended)
|
||||
|
||||
def __repr__(self):
|
||||
return f"(Path id={self.path_id}, workflow='{self.current_workflow}', state='{self.current_state}')"
|
||||
|
||||
def get_audit_trail(self):
|
||||
return [h.from_state for h in self.history]
|
||||
|
||||
|
||||
class StateMachine:
|
||||
|
||||
def __init__(self, workflows):
|
||||
self.workflows = workflows
|
||||
self.paths = None
|
||||
self.last_path_id = -1
|
||||
|
||||
def run(self, workflow_name: str, state_name: str, execution_context):
|
||||
"""
|
||||
Run the workflow from the state given in parameter
|
||||
:param workflow_name:
|
||||
:type workflow_name:
|
||||
:param state_name:
|
||||
:type state_name:
|
||||
:param execution_context:
|
||||
:type execution_context:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self.last_path_id = -1 # reset the path ids
|
||||
self.paths = [ExecutionPath(self._get_new_path_id(),
|
||||
execution_context,
|
||||
workflow_name,
|
||||
state_name,
|
||||
[],
|
||||
False)]
|
||||
|
||||
while True:
|
||||
to_review = [p for p in self.paths if not p.ended]
|
||||
if len(to_review) == 0:
|
||||
break
|
||||
|
||||
for path in to_review:
|
||||
# add traceability
|
||||
history = ExecutionPathHistory(f"{path.current_workflow}:{path.current_state}",
|
||||
path.execution_context.to_debug())
|
||||
path.history.append(history)
|
||||
|
||||
current_state = self.workflows[path.current_workflow][path.current_state]
|
||||
res = current_state.run(path.execution_context)
|
||||
|
||||
if res.next_state is None:
|
||||
path.ended = True
|
||||
continue # not possible to fork !
|
||||
|
||||
path.current_workflow, path.current_state = self._compute_next_workflow_and_state(path.current_workflow,
|
||||
res.next_state)
|
||||
|
||||
# update traceability
|
||||
history.to_state = f"{path.current_workflow}:{path.current_state}"
|
||||
|
||||
# add forks
|
||||
if res.forks:
|
||||
new_paths = []
|
||||
for next_state, next_execution_context in res.forks:
|
||||
next_workflow, next_state = self._compute_next_workflow_and_state(path.current_workflow,
|
||||
next_state)
|
||||
new_paths.append(path.clone(self._get_new_path_id(),
|
||||
next_execution_context,
|
||||
next_workflow,
|
||||
next_state))
|
||||
|
||||
self.paths.extend(new_paths)
|
||||
history.forks = [p.path_id for p in new_paths]
|
||||
|
||||
def _get_new_path_id(self):
|
||||
self.last_path_id += 1
|
||||
return self.last_path_id
|
||||
|
||||
@staticmethod
|
||||
def _compute_next_workflow_and_state(workflow, state):
|
||||
if state.startswith("#"):
|
||||
return state, "start"
|
||||
else:
|
||||
return workflow, state
|
||||
Reference in New Issue
Block a user