Added simple form of concept composition
This commit is contained in:
@@ -1,8 +1,11 @@
|
||||
import ast
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.tokenizer import TokenKind
|
||||
from parsers.BaseParser import BaseParser
|
||||
from parsers.ConceptLexerParser import ConceptLexerParser, UnrecognizedTokensNode, ConceptNode
|
||||
from parsers.ConceptLexerParser import ConceptLexerParser, UnrecognizedTokensNode, ConceptNode, SourceCodeNode
|
||||
import core.utils
|
||||
from parsers.PythonParser import PythonParser
|
||||
|
||||
concept_lexer_parser = ConceptLexerParser()
|
||||
|
||||
@@ -18,6 +21,25 @@ class MultipleConceptsParser(BaseParser):
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, "MultipleConcepts", 45)
|
||||
|
||||
@staticmethod
|
||||
def finalize(nodes_found, unrecognized_tokens):
|
||||
if not unrecognized_tokens:
|
||||
return nodes_found, unrecognized_tokens
|
||||
|
||||
unrecognized_tokens.fix_source()
|
||||
if unrecognized_tokens.not_whitespace():
|
||||
nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
|
||||
|
||||
return nodes_found, None
|
||||
|
||||
@staticmethod
|
||||
def create_or_add(unrecognized_tokens, token, index):
|
||||
if unrecognized_tokens:
|
||||
unrecognized_tokens.add_token(token, index)
|
||||
else:
|
||||
unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
|
||||
return unrecognized_tokens
|
||||
|
||||
def parse(self, context, text):
|
||||
sheerka = context.sheerka
|
||||
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
|
||||
@@ -29,50 +51,42 @@ class MultipleConceptsParser(BaseParser):
|
||||
sheerka = context.sheerka
|
||||
nodes = text.value
|
||||
nodes_found = [[]]
|
||||
source = ""
|
||||
concepts_only = True
|
||||
|
||||
for node in nodes:
|
||||
if isinstance(node, UnrecognizedTokensNode):
|
||||
unrecognized_tokens = None
|
||||
for i, token in enumerate(node.tokens):
|
||||
index = node.start + i
|
||||
i = 0
|
||||
|
||||
if token.type == TokenKind.IDENTIFIER:
|
||||
# it may be a concept
|
||||
concept = context.new_concept(token.value)
|
||||
if hasattr(concept, "__iter__") or not sheerka.is_unknown(concept):
|
||||
# finish processing unrecognized_tokens
|
||||
if unrecognized_tokens:
|
||||
unrecognized_tokens.fix_source()
|
||||
source += unrecognized_tokens.source
|
||||
if unrecognized_tokens.not_whitespace():
|
||||
nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
|
||||
unrecognized_tokens = None
|
||||
while i < len(node.tokens):
|
||||
|
||||
source += token.value
|
||||
concepts = concept if hasattr(concept, "__iter__") else [concept]
|
||||
concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts]
|
||||
nodes_found = core.utils.product(nodes_found, concepts_nodes)
|
||||
continue
|
||||
else:
|
||||
# it cannot be a concept
|
||||
concepts_only &= token.type == TokenKind.WHITESPACE or token.type == TokenKind.NEWLINE
|
||||
token_index = node.start + i
|
||||
token = node.tokens[i]
|
||||
|
||||
if unrecognized_tokens:
|
||||
unrecognized_tokens.add_token(token, index)
|
||||
else:
|
||||
unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
|
||||
concepts_nodes = self.get_concepts_nodes(context, token_index, token)
|
||||
if concepts_nodes is not None:
|
||||
nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
|
||||
nodes_found = core.utils.product(nodes_found, concepts_nodes)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
if unrecognized_tokens:
|
||||
unrecognized_tokens.fix_source()
|
||||
source += unrecognized_tokens.source
|
||||
if unrecognized_tokens.not_whitespace():
|
||||
nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
|
||||
source_code_node = self.get_source_code_node(context, token_index, node.tokens[i:])
|
||||
if source_code_node:
|
||||
nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
|
||||
nodes_found = core.utils.product(nodes_found, [source_code_node])
|
||||
i += len(source_code_node.tokens)
|
||||
continue
|
||||
|
||||
# not a concept nor some source code
|
||||
unrecognized_tokens = self.create_or_add(unrecognized_tokens, token, token_index)
|
||||
concepts_only &= token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE)
|
||||
i += 1
|
||||
|
||||
# finish processing if needed
|
||||
nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
|
||||
|
||||
else:
|
||||
nodes_found = core.utils.product(nodes_found, [node])
|
||||
source += node.source
|
||||
|
||||
ret = []
|
||||
for choice in nodes_found:
|
||||
@@ -83,14 +97,68 @@ class MultipleConceptsParser(BaseParser):
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=source,
|
||||
source=text.source,
|
||||
body=choice,
|
||||
try_parsed=None))
|
||||
)
|
||||
|
||||
if len(ret) == 1:
|
||||
self.log_result(context, source, ret[0])
|
||||
self.log_result(context, text.source, ret[0])
|
||||
return ret[0]
|
||||
else:
|
||||
self.log_multiple_results(context, source, ret)
|
||||
self.log_multiple_results(context, text.source, ret)
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
def get_concepts_nodes(context, index, token):
|
||||
"""
|
||||
Tries to recognize a concept
|
||||
from the univers of all known concepts
|
||||
"""
|
||||
|
||||
if token.type != TokenKind.IDENTIFIER:
|
||||
return None
|
||||
|
||||
concept = context.new_concept(token.value)
|
||||
if hasattr(concept, "__iter__") or context.sheerka.is_known(concept):
|
||||
concepts = concept if hasattr(concept, "__iter__") else [concept]
|
||||
concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts]
|
||||
return concepts_nodes
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def get_source_code_node(context, index, tokens):
|
||||
"""
|
||||
Tries to recognize source code.
|
||||
For the time being, only Python is supported
|
||||
:param context:
|
||||
:param tokens:
|
||||
:param index:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if len(tokens) == 0 or (len(tokens) == 1 and tokens[0].type == TokenKind.EOF):
|
||||
return None
|
||||
|
||||
end_index = len(tokens)
|
||||
while end_index > 0:
|
||||
parser = PythonParser()
|
||||
tokens_to_parse = tokens[:end_index]
|
||||
res = parser.parse(context, tokens_to_parse)
|
||||
if res.status:
|
||||
# only expression are accepted
|
||||
ast_ = res.value.value.ast_
|
||||
if not isinstance(ast_, ast.Expression):
|
||||
return None
|
||||
try:
|
||||
compiled = compile(ast_, "<string>", "eval")
|
||||
eval(compiled, {}, {})
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
source = BaseParser.get_text_from_tokens(tokens_to_parse)
|
||||
return SourceCodeNode(res.value.value, index, index + end_index - 1, tokens_to_parse, source)
|
||||
end_index -= 1
|
||||
|
||||
return None
|
||||
|
||||
Reference in New Issue
Block a user