Working on #21 : Created classes

This commit is contained in:
2023-07-09 19:02:56 +02:00
parent e66cdcce2d
commit a7043b1dd8
7 changed files with 84 additions and 9 deletions
+115
View File
@@ -0,0 +1,115 @@
from core.concept import DefinitionType
from evaluators.base_evaluator import MultipleChoices
from parsers.state_machine import ConceptToRecognize, End, ManageUnrecognized, MetadataToken, PrepareReadTokens, \
ReadConcept, ReadTokens, Start, StateMachine, StateMachineContext, UnrecognizedToken
from parsers.tokenizer import Token, TokenKind, Tokenizer
class SimpleConceptsParser:
""""
This class is to parse concepts with no parameter
ex : def concept I am a new concept
It parses a sequence of concepts
"""
def __init__(self):
tokens_wkf = {
Start("start", next_states=["prepare read tokens"]),
PrepareReadTokens("prepare read tokens", next_states=["read tokens"]),
ReadTokens("read tokens", next_states=["read tokens", "eof", "concepts found"]),
ManageUnrecognized("eof", next_states=["end"]),
ManageUnrecognized("concepts found", next_states=["#concept_wkf"]),
End("end", next_states=None)
}
concept_wkf = {
Start("start", next_states=["read concept"]),
ReadConcept("read concept", next_states=["#tokens_wkf"]),
}
self.workflows = {
"#tokens_wkf": {t.name: t for t in tokens_wkf},
"#concept_wkf": {t.name: t for t in concept_wkf},
}
self.error_sink = []
@staticmethod
def get_metadata_from_first_token(context, token: Token):
def _get_expected_tokens(_metadata, attr):
return [t.strip_quote for t in Tokenizer(getattr(_metadata, attr), yield_eof=False)][1:]
if token.type == TokenKind.CONCEPT:
name, concept_id = token.value
if concept_id:
return [ConceptToRecognize(context.sheerka.get_by_id(concept_id), [], "id")]
else:
metadata = context.sheerka.get_by_name(name)
return [ConceptToRecognize(metadata, [], "name")] if not isinstance(metadata, list) else \
[ConceptToRecognize(m, [], "name") for m in metadata]
concepts_by_key = [ConceptToRecognize(m, _get_expected_tokens(m, "key"), "key")
for m in context.sheerka.get_metadatas_from_first_token("key", token.value)
if m.definition_type == DefinitionType.DEFAULT and len(m.parameters) == 0]
concepts_by_name = [ConceptToRecognize(m, _get_expected_tokens(m, "name"), "name")
for m in context.sheerka.get_metadatas_from_first_token("name", token.value)]
return concepts_by_key + concepts_by_name
def parse(self, context, parser_input):
sm = StateMachine(self.workflows)
sm_context = StateMachineContext(context, parser_input, self.get_metadata_from_first_token)
sm.run("#tokens_wkf", "start", sm_context)
selected = self.select_best_paths(sm)
return MultipleChoices(selected)
def select_best_paths(self, sm):
"""
Returns a list of sequence
:param sm:
:type sm:
:return:
:rtype:
"""
selected = []
best_score = 1
for path in sm.paths:
if path.execution_context.errors:
continue
score = self._compute_path_score(path)
if score > best_score:
selected.clear()
selected.append(path.execution_context.result)
best_score = score
elif score == best_score:
selected.append(path.execution_context.result)
return selected
@staticmethod
def _compute_path_score(path):
"""
To compute the score of a path
We look at the MetadataToken, that represent the concepts that are recognized
The first idea was to look at the concepts that use the maximum of token in a row
example :
Concept("I am a concept") is better than Concept("I am") + Unrecognized(" a concept")
but :
Concept("one two") should be equivalent to Concept("one") followed by Concept("two")
:param path:
:type path:
:return:
:rtype:
"""
score = 0
for token in path.execution_context.result:
if isinstance(token, MetadataToken):
score += token.end - token.start + 1
elif isinstance(token, UnrecognizedToken) and token.buffer.isspace():
score += len(token.buffer)
return score