Refactored Caching, Refactored BnfNodeParser, Introduced Sphinx
This commit is contained in:
@@ -1,9 +1,9 @@
|
||||
import logging
|
||||
|
||||
from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts
|
||||
from parsers.BaseParser import BaseParser
|
||||
from core.tokenizer import Tokenizer, Keywords, TokenKind, LexerError
|
||||
from core.concept import VARIABLE_PREFIX
|
||||
from core.tokenizer import Keywords, TokenKind, LexerError
|
||||
from parsers.BaseParser import BaseParser
|
||||
|
||||
|
||||
class ExactConceptParser(BaseParser):
|
||||
@@ -11,10 +11,11 @@ class ExactConceptParser(BaseParser):
|
||||
Tries to recognize a single concept
|
||||
"""
|
||||
|
||||
MAX_WORDS_SIZE = 10
|
||||
MAX_WORDS_SIZE = 3
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
def __init__(self, max_word_size=None, **kwargs):
|
||||
BaseParser.__init__(self, "ExactConcept", 80)
|
||||
self.max_word_size = max_word_size
|
||||
|
||||
def parse(self, context, parser_input):
|
||||
"""
|
||||
@@ -33,11 +34,11 @@ class ExactConceptParser(BaseParser):
|
||||
context.log(f"Error found in tokenizer {e}", self.name)
|
||||
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=e))
|
||||
|
||||
if len(words) > self.MAX_WORDS_SIZE:
|
||||
if len(words) > (self.max_word_size or self.MAX_WORDS_SIZE):
|
||||
context.log(f"Max words reached. Stopping.", self.name)
|
||||
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=parser_input))
|
||||
|
||||
recognized = False
|
||||
recognized = [] # keep track of the concepts founds
|
||||
for combination in self.combinations(words):
|
||||
|
||||
concept_key = " ".join(combination)
|
||||
@@ -49,16 +50,23 @@ class ExactConceptParser(BaseParser):
|
||||
concepts = result if isinstance(result, list) else [result]
|
||||
|
||||
for concept in concepts:
|
||||
if concept.id in recognized:
|
||||
context.log(f"Recognized concept {concept} again. Skipping.", self.name)
|
||||
# example
|
||||
# if the input is foo a and a concept is defined as foo a
|
||||
# The will be two matches. One for 'foo a' and 'foo _var_0'
|
||||
# but it's the same concept foo a
|
||||
continue
|
||||
|
||||
context.log(f"Recognized concept {concept}.", self.name)
|
||||
# update the properties if needed
|
||||
need_validation = False
|
||||
for i, token in enumerate(combination):
|
||||
if token.startswith(VARIABLE_PREFIX):
|
||||
index = int(token[len(VARIABLE_PREFIX):])
|
||||
concept.def_prop_by_index(index, words[i])
|
||||
concept.def_var_by_index(index, words[i])
|
||||
concept.metadata.need_validation = True
|
||||
if self.verbose_log.isEnabledFor(logging.DEBUG):
|
||||
prop_name = list(concept.props.keys())[index]
|
||||
prop_name = concept.metadata.variables[index][0]
|
||||
context.log(
|
||||
f"Added property {index}: {prop_name}='{words[i]}'.",
|
||||
self.name)
|
||||
@@ -69,12 +77,13 @@ class ExactConceptParser(BaseParser):
|
||||
context.sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=parser_input if isinstance(parser_input, str) else self.get_text_from_tokens(parser_input),
|
||||
source=parser_input if isinstance(parser_input, str) else self.get_text_from_tokens(
|
||||
parser_input),
|
||||
body=concept,
|
||||
try_parsed=concept)))
|
||||
recognized = True
|
||||
recognized.append(concept.id)
|
||||
|
||||
if recognized:
|
||||
if len(recognized) > 0:
|
||||
if len(res) == 1:
|
||||
self.log_result(context, parser_input, res[0])
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user