Added simple form of concept composition

This commit is contained in:
2020-01-15 18:38:29 +01:00
parent 51fa9629d0
commit 8152f82c6b
22 changed files with 1105 additions and 544 deletions
+104 -36
View File
@@ -1,8 +1,11 @@
import ast
from core.builtin_concepts import BuiltinConcepts
from core.tokenizer import TokenKind
from parsers.BaseParser import BaseParser
from parsers.ConceptLexerParser import ConceptLexerParser, UnrecognizedTokensNode, ConceptNode
from parsers.ConceptLexerParser import ConceptLexerParser, UnrecognizedTokensNode, ConceptNode, SourceCodeNode
import core.utils
from parsers.PythonParser import PythonParser
concept_lexer_parser = ConceptLexerParser()
@@ -18,6 +21,25 @@ class MultipleConceptsParser(BaseParser):
def __init__(self, **kwargs):
BaseParser.__init__(self, "MultipleConcepts", 45)
@staticmethod
def finalize(nodes_found, unrecognized_tokens):
if not unrecognized_tokens:
return nodes_found, unrecognized_tokens
unrecognized_tokens.fix_source()
if unrecognized_tokens.not_whitespace():
nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
return nodes_found, None
@staticmethod
def create_or_add(unrecognized_tokens, token, index):
if unrecognized_tokens:
unrecognized_tokens.add_token(token, index)
else:
unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
return unrecognized_tokens
def parse(self, context, text):
sheerka = context.sheerka
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
@@ -29,50 +51,42 @@ class MultipleConceptsParser(BaseParser):
sheerka = context.sheerka
nodes = text.value
nodes_found = [[]]
source = ""
concepts_only = True
for node in nodes:
if isinstance(node, UnrecognizedTokensNode):
unrecognized_tokens = None
for i, token in enumerate(node.tokens):
index = node.start + i
i = 0
if token.type == TokenKind.IDENTIFIER:
# it may be a concept
concept = context.new_concept(token.value)
if hasattr(concept, "__iter__") or not sheerka.is_unknown(concept):
# finish processing unrecognized_tokens
if unrecognized_tokens:
unrecognized_tokens.fix_source()
source += unrecognized_tokens.source
if unrecognized_tokens.not_whitespace():
nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
unrecognized_tokens = None
while i < len(node.tokens):
source += token.value
concepts = concept if hasattr(concept, "__iter__") else [concept]
concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts]
nodes_found = core.utils.product(nodes_found, concepts_nodes)
continue
else:
# it cannot be a concept
concepts_only &= token.type == TokenKind.WHITESPACE or token.type == TokenKind.NEWLINE
token_index = node.start + i
token = node.tokens[i]
if unrecognized_tokens:
unrecognized_tokens.add_token(token, index)
else:
unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
concepts_nodes = self.get_concepts_nodes(context, token_index, token)
if concepts_nodes is not None:
nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
nodes_found = core.utils.product(nodes_found, concepts_nodes)
i += 1
continue
if unrecognized_tokens:
unrecognized_tokens.fix_source()
source += unrecognized_tokens.source
if unrecognized_tokens.not_whitespace():
nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
source_code_node = self.get_source_code_node(context, token_index, node.tokens[i:])
if source_code_node:
nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
nodes_found = core.utils.product(nodes_found, [source_code_node])
i += len(source_code_node.tokens)
continue
# not a concept nor some source code
unrecognized_tokens = self.create_or_add(unrecognized_tokens, token, token_index)
concepts_only &= token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE)
i += 1
# finish processing if needed
nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
else:
nodes_found = core.utils.product(nodes_found, [node])
source += node.source
ret = []
for choice in nodes_found:
@@ -83,14 +97,68 @@ class MultipleConceptsParser(BaseParser):
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=source,
source=text.source,
body=choice,
try_parsed=None))
)
if len(ret) == 1:
self.log_result(context, source, ret[0])
self.log_result(context, text.source, ret[0])
return ret[0]
else:
self.log_multiple_results(context, source, ret)
self.log_multiple_results(context, text.source, ret)
return ret
@staticmethod
def get_concepts_nodes(context, index, token):
"""
Tries to recognize a concept
from the univers of all known concepts
"""
if token.type != TokenKind.IDENTIFIER:
return None
concept = context.new_concept(token.value)
if hasattr(concept, "__iter__") or context.sheerka.is_known(concept):
concepts = concept if hasattr(concept, "__iter__") else [concept]
concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts]
return concepts_nodes
return None
@staticmethod
def get_source_code_node(context, index, tokens):
"""
Tries to recognize source code.
For the time being, only Python is supported
:param context:
:param tokens:
:param index:
:return:
"""
if len(tokens) == 0 or (len(tokens) == 1 and tokens[0].type == TokenKind.EOF):
return None
end_index = len(tokens)
while end_index > 0:
parser = PythonParser()
tokens_to_parse = tokens[:end_index]
res = parser.parse(context, tokens_to_parse)
if res.status:
# only expression are accepted
ast_ = res.value.value.ast_
if not isinstance(ast_, ast.Expression):
return None
try:
compiled = compile(ast_, "<string>", "eval")
eval(compiled, {}, {})
except Exception:
return None
source = BaseParser.get_text_from_tokens(tokens_to_parse)
return SourceCodeNode(res.value.value, index, index + end_index - 1, tokens_to_parse, source)
end_index -= 1
return None