Working on #21 : Created classes
This commit is contained in:
@@ -0,0 +1,115 @@
|
||||
from core.concept import DefinitionType
|
||||
from evaluators.base_evaluator import MultipleChoices
|
||||
from parsers.state_machine import ConceptToRecognize, End, ManageUnrecognized, MetadataToken, PrepareReadTokens, \
|
||||
ReadConcept, ReadTokens, Start, StateMachine, StateMachineContext, UnrecognizedToken
|
||||
from parsers.tokenizer import Token, TokenKind, Tokenizer
|
||||
|
||||
|
||||
class SimpleConceptsParser:
|
||||
""""
|
||||
This class is to parse concepts with no parameter
|
||||
ex : def concept I am a new concept
|
||||
It parses a sequence of concepts
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
tokens_wkf = {
|
||||
Start("start", next_states=["prepare read tokens"]),
|
||||
PrepareReadTokens("prepare read tokens", next_states=["read tokens"]),
|
||||
ReadTokens("read tokens", next_states=["read tokens", "eof", "concepts found"]),
|
||||
ManageUnrecognized("eof", next_states=["end"]),
|
||||
ManageUnrecognized("concepts found", next_states=["#concept_wkf"]),
|
||||
End("end", next_states=None)
|
||||
}
|
||||
|
||||
concept_wkf = {
|
||||
Start("start", next_states=["read concept"]),
|
||||
ReadConcept("read concept", next_states=["#tokens_wkf"]),
|
||||
}
|
||||
|
||||
self.workflows = {
|
||||
"#tokens_wkf": {t.name: t for t in tokens_wkf},
|
||||
"#concept_wkf": {t.name: t for t in concept_wkf},
|
||||
}
|
||||
self.error_sink = []
|
||||
|
||||
@staticmethod
|
||||
def get_metadata_from_first_token(context, token: Token):
|
||||
def _get_expected_tokens(_metadata, attr):
|
||||
return [t.strip_quote for t in Tokenizer(getattr(_metadata, attr), yield_eof=False)][1:]
|
||||
|
||||
if token.type == TokenKind.CONCEPT:
|
||||
name, concept_id = token.value
|
||||
if concept_id:
|
||||
return [ConceptToRecognize(context.sheerka.get_by_id(concept_id), [], "id")]
|
||||
else:
|
||||
metadata = context.sheerka.get_by_name(name)
|
||||
return [ConceptToRecognize(metadata, [], "name")] if not isinstance(metadata, list) else \
|
||||
[ConceptToRecognize(m, [], "name") for m in metadata]
|
||||
|
||||
concepts_by_key = [ConceptToRecognize(m, _get_expected_tokens(m, "key"), "key")
|
||||
for m in context.sheerka.get_metadatas_from_first_token("key", token.value)
|
||||
if m.definition_type == DefinitionType.DEFAULT and len(m.parameters) == 0]
|
||||
|
||||
concepts_by_name = [ConceptToRecognize(m, _get_expected_tokens(m, "name"), "name")
|
||||
for m in context.sheerka.get_metadatas_from_first_token("name", token.value)]
|
||||
|
||||
return concepts_by_key + concepts_by_name
|
||||
|
||||
def parse(self, context, parser_input):
|
||||
sm = StateMachine(self.workflows)
|
||||
sm_context = StateMachineContext(context, parser_input, self.get_metadata_from_first_token)
|
||||
sm.run("#tokens_wkf", "start", sm_context)
|
||||
|
||||
selected = self.select_best_paths(sm)
|
||||
|
||||
return MultipleChoices(selected)
|
||||
|
||||
def select_best_paths(self, sm):
|
||||
"""
|
||||
Returns a list of sequence
|
||||
:param sm:
|
||||
:type sm:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
selected = []
|
||||
best_score = 1
|
||||
for path in sm.paths:
|
||||
if path.execution_context.errors:
|
||||
continue
|
||||
|
||||
score = self._compute_path_score(path)
|
||||
|
||||
if score > best_score:
|
||||
selected.clear()
|
||||
selected.append(path.execution_context.result)
|
||||
best_score = score
|
||||
elif score == best_score:
|
||||
selected.append(path.execution_context.result)
|
||||
return selected
|
||||
|
||||
@staticmethod
|
||||
def _compute_path_score(path):
|
||||
"""
|
||||
To compute the score of a path
|
||||
We look at the MetadataToken, that represent the concepts that are recognized
|
||||
The first idea was to look at the concepts that use the maximum of token in a row
|
||||
example :
|
||||
Concept("I am a concept") is better than Concept("I am") + Unrecognized(" a concept")
|
||||
|
||||
but :
|
||||
Concept("one two") should be equivalent to Concept("one") followed by Concept("two")
|
||||
:param path:
|
||||
:type path:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
score = 0
|
||||
for token in path.execution_context.result:
|
||||
if isinstance(token, MetadataToken):
|
||||
score += token.end - token.start + 1
|
||||
elif isinstance(token, UnrecognizedToken) and token.buffer.isspace():
|
||||
score += len(token.buffer)
|
||||
|
||||
return score
|
||||
Reference in New Issue
Block a user