From 6e343ba996941f0733ee9f9fd45a0166fc24bacb Mon Sep 17 00:00:00 2001 From: Kodjo Sossouvi Date: Tue, 12 May 2020 17:21:10 +0200 Subject: [PATCH] Refactored Caching, Refactored BnfNodeParser, Introduced Sphinx --- .gitignore | 2 + Makefile | 3 + conftest.py | 12 + docs/Makefile | 20 + docs/make.bat | 35 + docs/{ => source/blog}/blog.rst | 62 +- docs/source/blog/concepts.rst | 42 + docs/source/blog/parsers.rst | 0 docs/source/blog/persistence.rst | 92 + docs/source/blog_old.rst | 1094 ++++++++ docs/source/conf.py | 55 + docs/source/index.rst | 30 + docs/source/tech/tech.rst | 4 + src/cache/BaseCache.py | 241 ++ src/cache/Cache.py | 31 + src/cache/CacheManager.py | 261 ++ src/cache/DictionaryCache.py | 53 + src/cache/IncCache.py | 18 + src/cache/ListCache.py | 43 + src/cache/ListIfNeededCache.py | 56 + src/cache/SetCache.py | 45 + src/cache/__init__.py | 0 src/core/ast/nodes.py | 24 +- src/core/ast/visitors.py | 16 +- src/core/builtin_concepts.py | 113 +- src/core/builtin_helpers.py | 59 +- src/core/concept.py | 225 +- src/core/profiling.py | 26 + src/core/sheerka/ExecutionContext.py | 19 +- .../Services/SheerkaCreateNewConcept.py | 144 +- src/core/sheerka/Services/SheerkaDump.py | 17 +- .../Services/SheerkaEvaluateConcept.py | 52 +- src/core/sheerka/Services/SheerkaExecute.py | 10 +- .../sheerka/Services/SheerkaModifyConcept.py | 70 +- .../sheerka/Services/SheerkaSetsManager.py | 157 +- .../Services/SheerkaVariableManager.py | 28 +- src/core/sheerka/Sheerka.py | 523 ++-- src/core/utils.py | 6 +- src/evaluators/AddConceptEvaluator.py | 16 +- src/evaluators/AddConceptInSetEvaluator.py | 2 +- src/evaluators/ConceptEvaluator.py | 4 +- src/evaluators/PrepareEvalEvaluator.py | 2 +- src/evaluators/PythonEvaluator.py | 22 +- src/parsers/AtomNodeParser.py | 62 +- src/parsers/BaseNodeParser.py | 260 +- src/parsers/BnfNodeParser.py | 942 ++++--- src/parsers/BnfParser.py | 10 +- src/parsers/ConceptsWithConceptsParser.py | 109 - src/parsers/DefaultParser.py | 3 +- src/parsers/ExactConceptParser.py | 33 +- src/parsers/ExplainParser.py | 2 +- src/parsers/MultipleConceptsParser.py | 163 -- src/parsers/PythonParser.py | 10 +- src/parsers/PythonWithConceptsParser.py | 1 - src/parsers/SyaNodeParser.py | 284 +- src/parsers/UnrecognizedNodeParser.py | 11 +- src/parsers/_BnfNodeParser_Old.py | 912 +++++++ src/parsers/_ConceptsWithConceptsParser.py | 108 + src/parsers/_MultipleConceptsParser.py | 163 ++ src/printer/Formatter.py | 5 + src/printer/SheerkaPrinter.py | 5 + src/sdp/sheerkaDataProvider.py | 947 ++----- src/sdp/sheerkaDataProviderIO.py | 6 + src/sdp/sheerkaDataProvider_Old.py | 1087 ++++++++ src/sheerkapickle/sheerka_handlers.py | 42 +- tests/BaseTest.py | 49 +- tests/TestUsingFileBasedSheerka.py | 16 +- tests/TestUsingMemoryBasedSheerka.py | 24 +- tests/cache/__init__.py | 0 tests/cache/test_cache.py | 534 ++++ tests/cache/test_cache_manager.py | 111 + tests/core/test_SheerkaCreateNewConcept.py | 245 +- tests/core/test_SheerkaEvaluateConcept.py | 328 ++- tests/core/test_SheerkaHistoryManager.py | 3 +- tests/core/test_SheerkaModifyConcept.py | 131 +- tests/core/test_SheerkaSetsManager.py | 276 +- tests/core/test_SheerkaVariableManager.py | 84 +- tests/core/test_ast.py | 37 +- tests/core/test_builtin_helpers.py | 4 +- tests/core/test_concept.py | 71 +- tests/core/test_sheerka.py | 321 ++- tests/core/test_sheerka_call_evaluators.py | 6 + tests/core/test_sheerka_call_parsers.py | 5 + tests/core/test_sheerka_printer.py | 29 +- tests/core/test_utils.py | 6 +- tests/evaluators/test_AddConceptEvaluator.py | 32 +- .../test_AddConceptInSetEvaluator.py | 36 +- tests/evaluators/test_ConceptEvaluator.py | 24 +- tests/evaluators/test_EvalEvaluator.py | 2 +- tests/evaluators/test_LexerNodeEvaluator.py | 33 +- tests/evaluators/test_PythonEvaluator.py | 2 +- tests/non_reg/test_sheerka_non_reg.py | 290 +-- tests/parsers/parsers_utils.py | 77 +- tests/parsers/test_AtomsParser.py | 65 +- tests/parsers/test_BaseNodeParser.py | 257 ++ tests/parsers/test_BnfNodeParser.py | 1837 +++++-------- tests/parsers/test_BnfNodeParser_Old.py | 1305 ++++++++++ tests/parsers/test_BnfParser.py | 70 +- .../test_ConceptsWithConceptsParser.py | 387 ++- tests/parsers/test_DefaultParser.py | 97 +- tests/parsers/test_ExactConceptParser.py | 61 +- tests/parsers/test_MultipleConceptsParser.py | 432 +-- tests/parsers/test_PythonParser.py | 14 +- .../parsers/test_PythonWithConceptsParser.py | 2 +- ...ptLexerParser.py => test_SyaNodeParser.py} | 593 +++-- tests/parsers/test_UnrecognizedNodeParser.py | 63 +- tests/sdp/test_sheerkaDataProvider.py | 2151 ++------------- tests/sdp/test_sheerkaDataProvider_Old.py | 2314 +++++++++++++++++ tests/sdp/test_sheerkaSerializer.py | 4 +- tests/sheerkapickle/test_sheerka_handlers.py | 78 +- 110 files changed, 13865 insertions(+), 7540 deletions(-) create mode 100644 conftest.py create mode 100644 docs/Makefile create mode 100644 docs/make.bat rename docs/{ => source/blog}/blog.rst (95%) create mode 100644 docs/source/blog/concepts.rst create mode 100644 docs/source/blog/parsers.rst create mode 100644 docs/source/blog/persistence.rst create mode 100644 docs/source/blog_old.rst create mode 100644 docs/source/conf.py create mode 100644 docs/source/index.rst create mode 100644 docs/source/tech/tech.rst create mode 100644 src/cache/BaseCache.py create mode 100644 src/cache/Cache.py create mode 100644 src/cache/CacheManager.py create mode 100644 src/cache/DictionaryCache.py create mode 100644 src/cache/IncCache.py create mode 100644 src/cache/ListCache.py create mode 100644 src/cache/ListIfNeededCache.py create mode 100644 src/cache/SetCache.py create mode 100644 src/cache/__init__.py create mode 100644 src/core/profiling.py delete mode 100644 src/parsers/ConceptsWithConceptsParser.py delete mode 100644 src/parsers/MultipleConceptsParser.py create mode 100644 src/parsers/_BnfNodeParser_Old.py create mode 100644 src/parsers/_ConceptsWithConceptsParser.py create mode 100644 src/parsers/_MultipleConceptsParser.py create mode 100644 src/sdp/sheerkaDataProvider_Old.py create mode 100644 tests/cache/__init__.py create mode 100644 tests/cache/test_cache.py create mode 100644 tests/cache/test_cache_manager.py create mode 100644 tests/parsers/test_BaseNodeParser.py create mode 100644 tests/parsers/test_BnfNodeParser_Old.py rename tests/parsers/{test_SyaConceptLexerParser.py => test_SyaNodeParser.py} (72%) create mode 100644 tests/sdp/test_sheerkaDataProvider_Old.py diff --git a/.gitignore b/.gitignore index 22028c9..6a68cec 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,10 @@ venv .pytest_cache .idea +.vscode __pycache__ build +_build prof tests/_concepts.txt tests/**/*result_test \ No newline at end of file diff --git a/Makefile b/Makefile index a9a1b7f..a14c952 100644 --- a/Makefile +++ b/Makefile @@ -6,6 +6,9 @@ test: clean clean: rm -rf build + rm -rf docs/build + rm -rf docs/source/_build rm -rf prof + rm -rf tests/prof find . -name '.pytest_cache' -exec rm -rf {} + find . -name '__pycache__' -exec rm -rf {} + diff --git a/conftest.py b/conftest.py new file mode 100644 index 0000000..838358d --- /dev/null +++ b/conftest.py @@ -0,0 +1,12 @@ +collect_ignore = [ + "setup.py" +] + +collect_ignore_glob = [ + # "tests/core/", + #"tests/evaluators/", + # "tests/non_reg/", + #"tests/parsers/", + #"tests/sdp/", + #"tests/sheerkapickle/", +] diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d0c3cbf --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..6247f7e --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/blog.rst b/docs/source/blog/blog.rst similarity index 95% rename from docs/blog.rst rename to docs/source/blog/blog.rst index 9985393..ee284be 100644 --- a/docs/blog.rst +++ b/docs/source/blog/blog.rst @@ -1,4 +1,10 @@ -.. contents:: +.. toctree:: + :maxdepth: 1 + + concepts + parsers + persistence + 2019-10-30 ********** @@ -55,35 +61,12 @@ An **history** is a triplet of .. _git: https://git-scm.com/ Personally, i have taken this way of tracking modification from how it's done on git_, -I guess Linux Torvarlds took it from somewhere. +I guess Linux Torvalds took it from somewhere. -2019-10-31 -********** - -More on Concepts -"""""""""""""""" -To define a new concept - -:: - - def concept hello a as "hello" + a -Note that the traditional quotes that would surround 'hello' and 'a' are not necessary. -In this example 'a' is a variable, as it appears as variable in the 'as' section (while hello -appears as a string) - -So, you could call the concept by - -:: - - hello kodjo - hello my friend - -They will produce the strings "hello kodjo" or "hello my friend" - -About versionning of the information +About versioning of the information """"""""""""""""""""""""""""""""""""" As I said previously, I mimic how git_ versions its objects. @@ -161,27 +144,7 @@ For example, for the exercise called "The descent" you will find It will be great if Sheerka is able to produce some code from these instructions :-) -Some words on data persistence -""""""""""""""""""""""""""""""""""""""""" -As I previously said (or not), the main difference between Sheerka and other languages, -is that Sheerka has a memory of its (her ? :-) previous interactions with the users. -The **Concepts**, as well as the **Events** or the **Rules** are persisted. Because of -that, I think that the more Sheerka is used, the more easier it will be to use it. - -So my first focus was to decide which database to use. - -There are tons of different databases already on the market. Unfortunately for me, I'm not -a database expert. But, I already know that I was not looking for a traditional -relational database (SGDB) as the structure will evolve and I didn't want to spend -my time on redesigning the schemas and the constraints. - -As I was learning Python, it could have been a good idea to also start looking at an -already existing NoSql database. I started to look at MongoDB, but I got lazy. I knew that -the top feature that I needed was that management of the history (the way git does it), -and it was not provided by Mongo, or I didn't notice it in my first readings on the subject. - -So I decided to design and implement my own database. SheerkaDataProvider (sdp) @@ -1130,3 +1093,10 @@ include <=, >=, = and != as well, once for all. Sorting things according to thes human naturally do. +2020-05-01 +********** + +Blog +"""""" +Hi, I have the feeling that I am almost there with the parsers part. I have + diff --git a/docs/source/blog/concepts.rst b/docs/source/blog/concepts.rst new file mode 100644 index 0000000..b87f007 --- /dev/null +++ b/docs/source/blog/concepts.rst @@ -0,0 +1,42 @@ +Concepts +======== + + + +Basic definition +**************** +To define a new concept + +:: + + > def concept hello a as "hello" + a + + +Note that if the left part of the keyword 'as', the traditional quotes that would surround 'hello' and 'a' are not necessary. +In this example 'a' is a variable, as it appears as variable in the 'as' section (while hello +appears as a string) + +You have just defined a concept name 'hello'. Sheerka now can understand + +>>> hello kodjo +>>> hello my friend + +When you do so, an instance of the concept will be created in memory. In the first case, +it will a variable ``'a'`` filled with the value ``'kodjo'``, in the second case, the variable +will be filled with the value ``'my friend'`` + + +Another example: + +:: + + > def concept one as 1 + > def concept two as 2 + > def concept a plus b as a + b + +That's enough to define the addition + +>>> one plus two +3 + + diff --git a/docs/source/blog/parsers.rst b/docs/source/blog/parsers.rst new file mode 100644 index 0000000..e69de29 diff --git a/docs/source/blog/persistence.rst b/docs/source/blog/persistence.rst new file mode 100644 index 0000000..09ff634 --- /dev/null +++ b/docs/source/blog/persistence.rst @@ -0,0 +1,92 @@ +Data Persistence +================= + + +The basic idea +"""""""""""""" + +Everything starts with a basic and simple idea. + +My simple idea for the persistence is that **everything** should be persisted. +The actual main difference between an human being and a computer is that we have the +ability to remember almost everything (at least everything that we have not forgotten). + +On the contrary, we only allow computer to remember specific stuff that we thing (as of +today) will be relevant in the future. + +There are two majors issues with that: + +1. The obvious one, is that we don't know what will be needed in the future +2. We prevent the computer to remember things like an human being would do + +I think I will come back to the second point some day as it's more subtle than it looks like +(at least to me). + +Anyway, I need + +1. A persistence mechanism that can save my main object (**Concepts**, as well as the **Events** or the **Rules**), but also the current state of the system. +2. I also need to have the ability do bo back in time, to see what were the values of theses objects in the past. +3. And of course, I need traceability on theses objects. Eg, the ability to prove that the data was not altered not corrupted + + + +There are tons of different databases already on the market. Unfortunately for me, I'm not +a database expert. But, I already know that I was not looking for a traditional +relational database (SGDB) as the structure will evolve and I didn't want to spend +my time on redesigning the schemas and the constraints. + +.. _git: https://git-scm.com/ + +As I was learning Python, it could have been a good idea to also start looking at an +already existing NoSql database. I started to look at MongoDB, but I got lazy. I knew that +the top feature that I needed was that management of the history (the way git_ does it), +and it was not provided by Mongo, or I didn't notice it in my first readings on the subject. + +So I decided to design and implement my own database. + + +Versioning the information +""""""""""""""""""""""""""""""""""""" +As I said previously, I want a system that mimics how git_ versions its objects. + +:: + + Obj v0 : parents = [] + user name = + modification date = + digest = xxxxx + + Obj v1: parents = [xxxxx] + user name = + modification date = + digest = yyyyy + + Obj v1: parents = [yyyyy] + user name = + modification date = + digest = zzzzz + + and so on... + +I always keep a reference to the last version of the object, so I can navigate through +the versions using the ``parents`` attribute of the object + +In git_, there are basically two types of objects : + +- **content** (file content, or directory structure) +- **reference** to content (commit or tags) + +The hash a **content** only depends on it, while the hash of a **reference** also depends +on the user name, the modification date and the parents. In both cases, the hash is +computed on the whole object. So the hash can also be used to check the integrity +of an object. + +For my objects, I need to decide how I compute the hash. + +**Concepts** have history, if I decide to include the history in the hash, +as the modification date is :code:`datetime.now()`, a new version will be created +even if the **Concept** has not changed. If I don't include it, the integrity of the +what is saved is no longer guaranteed. + +I choose to value identity over integrity. The hash code of the **Concepts** does not depend +on his history. We will see what the future will say about this. \ No newline at end of file diff --git a/docs/source/blog_old.rst b/docs/source/blog_old.rst new file mode 100644 index 0000000..d544e3f --- /dev/null +++ b/docs/source/blog_old.rst @@ -0,0 +1,1094 @@ + +2019-10-30 +********** + +What is Sheerka ? +""""""""""""""""" + +Sheerka is a *communication* language, +as opposed to the traditional *programming* languages. Its +purpose is to ease the communication between the (wo)man and the machine, +ultimately using the voice. I will first use it to program faster, and maybe +more easily. + +.. _ulysse31: https://fr.wikipedia.org/wiki/Ulysse_31 + +Where does the name Sheerka came from ? +""""""""""""""""""""""""""""""""""""""" +Sheerka is my misspell of Shyrka, from my childhood anime ulysse31_. +For those you don't know this old cartoon, it's the Odyssey story from Homer, +ported in the 31st century. Ulysses has a spacecraft with an AI named Shyrka + +I was a great fan of this cartoon when I was young. I thought that the idea of +bringing the ancient story of Ulysses in the future was bright. + +Ever since then, Sheerka was my reference for any sophisticated computer. Unfortunately +for me, at that time there was no wikipedia to tell the the correct spelling. + +Model v0 +"""""""" +In my view, the beginning of everything are the **Events**. Basically, they are the commands (ie requests) +entered by the users. + +The events are parsed, to understand what is required, so they produce a new **State**. +The state is a like a big dictionary that holds everything that is known by the system. + +Most of the elements saved in the **State** are the **Concepts**. In this first version, +it's a little bit complicated to define what is the **Concept** as it can have several +usages. To make it simple, I will say that a **Concept** is an idea that can be +manipulated by the rest of the system. +I am pretty sure that its form and usage will evolve as I will manipulate +them + +- Each **State** has a reference to the event(s) that trigger this state +- Each **State** has an **history** +- Each **Concept** has an **history** + + +An **history** is a triplet of + +- user name +- modification date +- digest of the parent + +.. _git: https://git-scm.com/ + +Personally, i have taken this way of tracking modification from how it's done on git_, +I guess Linux Torvarlds took it from somewhere. + + +2019-10-31 +********** + +More on Concepts +"""""""""""""""" +To define a new concept + +:: + + def concept hello a as "hello" + a + + +Note that the traditional quotes that would surround 'hello' and 'a' are not necessary. +In this example 'a' is a variable, as it appears as variable in the 'as' section (while hello +appears as a string) + +So, you could call the concept by + +:: + + hello kodjo + hello my friend + +They will produce the strings "hello kodjo" or "hello my friend" + + + +2019-11-01 +********** + +Inspired by CodinGames +"""""""""""""""""""""" + + +.. _codingame: https://www.codingame.com/home + +I am trying to teach my little kid how to code. He is 12 years old and it was his very +first time. + +Rather than trying a standard formal approach, we went on the codingame_ web site. There +are some pro and cons to use this platform, specially for the very beginners, but +I like the visual output of the programs. It's really like coding a game ! + +What I haven't noticed previously, is that (at least for the first programs), the solution +is given in human language. + +For example, for the exercise called "The descent" you will find + +:: + + For each round of play : + Reset the variables containing the index of the highest mountain and its height to 0 + For each mountain index (from 0 to 7 included) : + Read the height of the mountain (variable 'mountainH') from stdin + If it's higher than the highest known mountain, save its index and height + Returns the index of the highest mountain on stdout + +It will be great if Sheerka is able to produce some code from these instructions :-) + +Some words on data persistence +""""""""""""""""""""""""""""""""""""""""" +As I previously said (or not), the main difference between Sheerka and other languages, +is that Sheerka has a memory of its (her ? :-) previous interactions with the users. + +The **Concepts**, as well as the **Events** or the **Rules** are persisted. Because of +that, I think that the more Sheerka is used, the more easier it will be to use it. + +So my first focus was to decide which database to use. + +There are tons of different databases already on the market. Unfortunately for me, I'm not +a database expert. But, I already know that I was not looking for a traditional +relational database (SGDB) as the structure will evolve and I didn't want to spend +my time on redesigning the schemas and the constraints. + +As I was learning Python, it could have been a good idea to also start looking at an +already existing NoSql database. I started to look at MongoDB, but I got lazy. I knew that +the top feature that I needed was that management of the history (the way git does it), +and it was not provided by Mongo, or I didn't notice it in my first readings on the subject. + +So I decided to design and implement my own database. + + +SheerkaDataProvider (sdp) +""""""""""""""""""""""""" +Not I great name, I confess. But who care ? + +What are the main design constraints? + +:: + + 1. No adherence with the filesystem. + We must not care about where the data are stored. + The first implementation will be file based, but it has to be extensible. + The final target will be to have a decentralized persistence system + 2. CRUD operations are designed according to my needs + I don't want standard CRUD operations that I will have tweak. + The direct consequence is that sdp won't fit any other purpose + 3. History management for State and other objects for free. + + +sdp, like many modern database systems, is a dictionary. A big list of key-value pairs. +The key is a string, the value can be almost anything. Actually, for my needs, I guess +that I only need strings, numbers and list (of strings and numbers :-) + +Json also provide, true, false and null. So I guess that I will also need them. + +I need at least one level of categorization. That means that my objects can be grouped. +The basic signature to add a new element :code:`add(entry, obj)`. + +with + +:: + + entry : is the group / category where I want to put the object + object : object to persist + +With :code:`add("All_Concepts", "foo")` the database, let's call it **State** once for all, will be updated like this: + +.. code-block:: json + + {"All_Concepts" : "foo"} + +If I want to have another entry, I don't want to care about what was previously done. I +need the second call :code:`add("All_Concepts", "bar")` to produce + +.. code-block:: json + + {"All_Concepts" : ["foo", "bar"]} + + +So we are no longer in the usual way of implementing a CRUD. + + + +2019-11-06 +********** + +Input processing +""""""""""""""""" +The basic processing flow should be + +:: + + 1. parsers + 2. evaluators + 3. printers + +So, for each new input, all known parsers will try to recognize the input. Each parser will +return a triplet of :code:`(status, concept found (or node found), text message)` + +This list of triplet is given to the evaluators. In the same way, there should be multiple +types of evaluators. There will be the rules that will be introduced later. + +All evaluators will provide a list (a guess it will be triplets as well) to the printers. + +Python processing +""""""""""""""""" +Sheerka natively understand Python. So it will be able to execute Python code. +I will manage later on the issues caused by the different version of Python, or the fact +that some external modules must remain isolated (maybe using virtualenv) + +My first problem is to correctly implement the :code:`eval / exec` function. + +I don't know why, by Python has two similar function to do the same thing. One must use +eval to evaluate expression, or use exec to execute code. There must be an explanation but, +as for know, it seems to be a complication for nothing. + +The next issue that I will have to tackle is that Sheerka is not a REPL. After the execution +of the input, the system stops. Nothing is kept in memory (eg RAM). +The whole idea is to make Sheerka 'remember', even something that happened a long time ago. +So I should find a way to 'freeze the time' + +To better explain what I have in mind. let's say that I want to pretty print an object + +.. code-block:: python + + import pprint + pp = pprint.PrettyPrinter(indent=4) + pp.pprint(stuff) + +I need three line in oder to be able to pretty print. I will first try by dumping the +globals(), using pickle and load it back whenever needed. + +If it does not work as expected, I can find a way to save the commands a exec everything +when needed. (first time, I exec import... second time I exec import + pp == and the last +time I exec the three statements). + +2019-11-07 +********** + +Back on data persistence +""""""""""""""""""""""""" +Last time, I talked on how to add new entries in the **State**. I only need the name of +the category, on the object. If I add several objects under the same entry, +they don't override each other, they are kept as a list. + +.. code-block:: python + + add("All_Concepts", "foo") + add("All_Concepts", "bar") + +will produce something like + +.. code-block:: json + + {"All_Concepts" : ["foo", "bar"]} + +The reason behind this chose is that, in the human world, the same name can refer to +several concepts. The first obvious cases are the synonyms. Same word, but different +meaning. There are also some other case where the meaning of the world depend on the context. +Rather than forcing the user to spend some time to find another way to express the concept, +(as the name already exists), I prefer allow the storage under the same key. +The choice of the correct item to use in the list will be done on execution. + +I also need sdp to manage the key of my object. So 'entry' will be used to group object, +and the key will help to quick access to it. + +I don't want the signature :code:`add(entry, key, object)` because sometimes there is a key, +but keys are not mandatory. So I keep the signature :code:`add(entry, object)` + +To manage the key, the object either is a key/value entry :code:`{key: value}` (Python dict) or +has an attribute :code:`key`, or has a method :code:`get_key()` + +For example **Concepts** have a method :code:`get_key()`, so if the key of 'concept' is "foo", +the code + +.. code-block:: python + + add("All_Concepts", concept) + +will produce something like + +.. code-block:: python + + {"All_Concepts" : {"foo" : concept}} + +If I add another concept (concept2) which has tke key "bar", I will have + +.. code-block:: python + + {"All_Concepts" : {"foo" : concept, "bar": concept2}} + +and so on.. + +So under the 'All_Concepts' group, I have a quick access to the concept "foo" + +Note that, if for some reason, I end up with several concepts this the same key, they will +be just stack as list. I don't loose information. + +We will talk again about sdp later + +Status +"""""" +As of today, I have a first implementation of several main functionalities of the system + + +1. I have a good implementation of sdp + * When I say good, I talk about the coverage of the functionalities, not the efficiency of the code + * I can add object to the state + * The objects can be saved as reference (will be explained later) + * I manage events + * I manage history + * I manage several types of serialisation +2. I have two parsers + * DefaultParser : to detect sheerka specific language (like def concept) + * PythonParser : to parse Python code. + * There are called for every new event. +3. I have a first version of the evaluators + * These have piece of code that recognize a result and process it + * The current algo is not finished, but it works for simple cases + * I can create a new concept + * I can evaluate simple Python expression +4. I don't have the printers, but it's ok, I just dump the result of processing + +so I can type + +:: + + def concept hello name as "hello" + name + 1 + 1 + sheerka.test() + +I will now work on how to call an already defined concept. + + +2019-11-11 +********** + +Maintaining the blog +"""""""""""""""""""" +It's not very easy to maintain this blog. Every time I have some time to work on **Sheerka**, +I must choose between expressing my ideas in this blog and coding. + +I have plenty of ideas that I would like to express, sometimes just to put the idea down, +but I lack of time. It would be great if I can find a tool that will allow me to just to +dictate my words. I know that there are plenty out there, I need to spend some time to test +them and choose one. + +2019-11-15 +********** + +Managing concepts resolutions +""""""""""""""""""""""""""""" +I am a little stuck on the algorithm I must use to derive (resolve) concepts. This is +one of this day I strongly regret to have someone I can discuss with :-( + +Let's write the problem down, sometimes, it helps figure out the best approach. + +:: + + def concept one as 1 + one + +The concept is first define (it returns the number 1), and then it's called. +During the call + +1. During parsing, + Both Python parser and concept parser will recognize 'one' +2. During Evaluation, + * Python Evaluator will fail (one is not know by python) + * Concept Evaluator will success. My question is what should it return ? + +The two option are: +1. Python node, to let the Python Evaluator work and return one, in the next row +2. Returns '1' directly + +I as write it down, it is obvious that it must return 1, since the purpose of any +evaluation is to give a result, not the path to find the result. + +Plus, if don"t resolve the body in the Concept Evaluator, I will loose where the +'1' comes from. + +I don't know if I was clear. I don't even know if I will be able to re-read myself. +But I think that I have my solution. + + +2019-11-16 +********** + +ExactConceptParser limitation +""""""""""""""""""""""""""""" + +From the beginning, my simplest example is to show that addition can be simply +explained to Sheerka + +:: + + def concept a plus b as a + b + def concept one as 1 + def concept two as 2 + one plus two + +The :code:`one plus two` is perfectly recognized, and the result is 3. +:code:`two plus one` also work (with the correct response). + +But I was quite surprised to see that :code:`one plus one` was not recognized !! + +Indeed, the **ExactConceptParser** looks for :code:`__var0__ plus __var1__`. So +the first operand and the second have to be different. + +It's unexpected :-( + +Do I need to enhance the parser to recognize it, or no I need to build another parser ? + +If I tell the parser that :code:`a plus b`, how do I handle the cases where 'a 'and 'b' +MUST be different ? How I handle when the explicitly have to be the same ? + +I seems that the purpose of the **ExactConceptParser** is to find exact match. +I need another way to express that 'a' and 'b' can be the same. + +2019-11-21 +********** + +MemoryFS, is it a joke ? +""""""""""""""""""""""""""""" + +I spent this day working on a improving the test performances. By default Sheerka +persists its data on the file system (even if I said that where the data is saved) +is not important for the sdp module. + +For each test, a folder in initialized to hold concepts information. And this folder +is destroyed after usage. For almost every single test ! + +So I decided to implement fs.MemoryFS. Information in memory is supposed to be +faster than on the disk ! + +I was very disappointed, after a afternoon of refactoring that it is actually slower +than the native io implementation. + +Even now that I am writing it, I just can't believe it. I must I have implemented +it wrong. But the profiling shows that the time is lost in the under layers of the +FS library. + +It's a shame ! + +2019-12-01 +********** + +Using BNF to define concept +""""""""""""""""""""""""""""" + +I always knew that there will be several ways to define the body of a concept (same +goes for the 'pre', 'post' and 'where' parts). It can be defined as Python code, +or something that is related to concepts. It can even be a new language that I will +design. The important point, is that contrarily to traditional development languages, +Sheerka must remain extensible. + +Same goes for the definition of the name. + +The traditional form is: + +:: + + def concept boo bar baz as ... + +So the concept is defined by the sequence 'foo', then 'bar' then 'baz'. In this order. + +Another way is + +:: + def concept a plus b where a,b as ... + +In this form, a and b are supposed to be variables. +It will be matched against :code:`one plus two`. + +The concept name is 'a plus b'. It is a quick way to declare a concept with variable, +but if someone define another concept + +:: + + def concept number1 plus number2 where number1,number2 as ... + +This will produce another concept (with the same key although). I guess that, at +some point, Sheerka will be able to detect that the concepts are the same, but +the name of the concept includes its variables. Which may be annoying in some +situations. + +Plus, it's not possible to define rules precedences in this way. For example, + +:: + + def concept a plus b as ... + def concept a times b as ... + +How do you express that multiplications have a higher priority in for example +:code:`one plus two times three` ? + +The only right answer, at least to me, is to implement something that is inspired +by the BNF definition of a grammar. + +So the definition of the concept will look like + +:: + + def concept term as factor (('+' | '-') term)? + def concept factor as number (('*' | '/') factor)? + def number where number in ['one', 'two', 'three'] as match(body, 'one', 1, 'two', 2, 'three', 3) + +This form seems great, but in the definition of term and factor, there is no more +room for the real body. ie once the components are recognized, what do we do with them ? + +So we can try + +:: + + def concept factor (('+') factor)* as factor[0] + factor[i] + def concept number (('*') number)? as number[0] + number[i] + def number where number in ['one', 'two', 'three'] as match(body, 'one', 1, 'two', 2, 'three', 3) + +The body is defined, but the name of concept is to complicated ex: factor (('+') factor)* +It's quite impossible to reference a concept that is defined in this way. + +So my last proposal, with marry the two ideas, is to introduce the two keyword 'using' 'bnf' + +.. _bnf : https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form + +:: + + def concept term using bnf factor (('+' | '-') term)? as factor + (or -) term + def concept factor using bnf number (('*' | '/') factor)? as number * (or /) factor + def number where number in ['one', 'two', 'three'] as match(body, 'one', 1, 'two', 2, 'three', 3) + +In my implementation: + +* Terminals are between quotes +* Sequences are separated by whitespaces +* '|' (vertical bar) is used for alternatives + +Like in regular expressions, you will also find + +* '*' (star) is used to express zero or many +* '+' (plus) to express one or many +* '?' (question mark) to expression zero or one + +For those who doesn't know that BNF stands for, please have a look at the bnf_ +wikipedia page. + +I guess that I will need a complete chapter to explain how you retrieve what was parsed + +2019-12-21 +********** + +Implementing Inheritance +"""""""""""""""""""""""" + +Except that it is not inheritance, at least the way it is seen in modern programing languages. + +I think that I should first express what I am trying to do. I guess that it will help me +have a better understanding myself. + +:: + + def concept one as 1 + def concept two as 2 + one is a number + two is a number + +When I enter :code:`one`, the result should be :code:`1` + +But I should be able to express other concepts by using + +:: + + def concept a plus b where a is a number and b is a number as a + b + +Just by reading what I have just written, we can see that 'is a' has two separate meanings. +In the first usage, it's an affirmation, in the latter one, it's a question. + +Should we consider them as the same concept, with two usages, or as two separate concepts, +which are somehow linked ? + +As of now, there is only one usage to all concepts, which is the property 'BODY', but I have +prepared the property 'PRE' which can be used for that. + +I am a little bit making a digression. The original subject was on how I can express that a +concept is an element of another concept. We may focus on the implementation later. + +So saying that 'one' is a 'number' means that there is a set called 'number' +in which 'one' belong. + +The simple implementation will be to create an entry 'all_number' in sdp and to add 'one' in it. +The two issue that I foresee are: + +* What about infinite sets ? (my set 'number' can never be completed if I put the item one by one) +* What if the same name refers to different set (I don't have any example in mind, but I guess that synonyms of sets do exist) + + +For the two questions, I will first try the simple implementations and see there I go from there. ie : + +* on the top of the entry all_numbers which lists the known numbers, you can define concepts :code:`is a number` + that can be also used to detect the the concept is part of the set +* the entry in sdp will not be all_number, but all_id_of_number. I will use the concept id instead of its name + + +2019-24-12 +********** + +Going back on BNF implementation. As it's Christmas eve today, I won't stay very long. + +So, the implementation lies in the class BnfNodeParser, a it's a lexer not for token, but for concept. +The purpose of this class is to recognize a sequence of Concept. + +So if we defines the following concepts + +:: + + def concept foo from bnf one two three + def concept bar form bnf four five + +when you input + +:: + + one two three four five + +the list of :code:`[foo, bar]` will be returned by the BnfNodeParser (as return values) + +How does it works ? + +As explained in the code, my implementation is highly inspired by Arpegio project. To define your grammar, you +use **ParsingExpressions**. There are several types + +* some use to recognize tokens StrMatch, ConceptExpression +* other use to tell how to recognize Sequence, OrderedChoice, Optional, OneOrMore, ZeroOrMore... + +Some example : + +:: + + to recognize 'foo' -> StrMatch("foo') + to recognize 'foo bar' -> Sequence(StrMatch("foo'), StrMatch("bar')) + to recognize 'foo' or 'bar' -> OrderedChoice(StrMatch("foo'), StrMatch("bar')) + + and so on... + +So when a concept is defined using its bnf definition, I use the **BnfParser** to create the grammar, and then +I use the **BnfNodeParser** to recognize the concepts + +The current implementation to recognize a concept is not very efficient. All the definitions are in a dictionary +and I go thru the whole dictionary to see if some concepts are recognized. Once a concept is found, I loop again +on the whole dictionary to find the next concept. + +| -> I need a btree to order the concept +| -> I need a predictive algorithm to guess the next concept + +But it is for later. + +So once the parsing is effective, I return a **ConceptNode** object + +.. code-block:: python + + class ConceptNode(LexerNode): + """ + Returned by the BnfNodeParser + It represents a recognized concept + """ + + def __init__(self, concept, start, end, tokens=None, source=None, underlying=None): + super().__init__(start, end, tokens, source) + self.concept = concept + self.underlying = underlying + + if self.source is None: + self.source = BaseParser.get_text_from_tokens(self.tokens) + + +concept + | Remember that all grammars are listed in a dictionary of . + | So when a parsing expression is verified, it's easy to link it with the concept +start + position first of the token +end + position of the last token +tokens + list of tokens that are recognized +underling + **NonTerminalNode** or **TerminalNode** that wraps the underlying **ParsingExpression** used to recognize the concept +source + | The source is deduced from the tokens + | But in the unit tests, they are directly given for speed up and simplicity + +What is the difference between the **[Non]TerminalNode** and the **ParsingExpression** ? + +The ParsingExpression + defines how to recognize a concept + +The [Non]TerminalNode + represents what was found. So similarly to the ConceptNode, you will find the start, end and token attributes + +That's all for today ! + +2019-27-12 +********** + +How to manage variables resolutions +""""""""""""""""""""""""""""""""""" + +I have to admit that I am a little bit stuck with how to manage variable resolution with PythonEvaluator. +What is expected by the expression depends on the expression itself. + +Let's see an example + +:: + + def concept one as 1 + def concept two as 2 + + eval one + two + +In this situation, I expect PythonEvaluator to resolve the concepts 'one' and 'two' and to return 1 + 2, hence 3 + +In this other situation + +:: + + def concept one as 1 + def concept desc a as sheerka.desc(a) + desc one + +I expect Python evaluator NOT to resolve the concept one and to pass it strait to the function. + +Unfortunately for me, in the current implementation. 'a' is resolved to the concept 'one', which is resolved to its +body "1". So the call failed, as there is not concept 1 (moreover, 1 is an integer, it's not even the string "1"). + +There also be some cases where 'sheerka.desc()' expects the name of a concept (and the resolution of the concept +will be done inside the function). In this case, it's not the body nor the concept itself that is required, but the name +of the concept. + +So here are three cases where the behaviour of PythonEvaluator is required to be different. I cannot hard code theses +behaviours as they depend on the context. + +The global idea, to resolve this situation is to give to Sheerka a memory. What I am currently working on is the possibility +**to create** and **to recognize** concepts. As a recall : + +You can create simple concepts + +:: + + def concept one as 1 + +or concept using bnf + +:: + + def concept twenties from bnf twenty (one | two | three...)=unit as 20 + unit + + +Both can be recognised. +But if I define + +:: + + def a plus b as a + 1 + + +:code:`one + two` will be recognized but twenty two plus one is not correctly implemented yet. + +To go back on my issue with the variables resolutions with PythonEvaluator, the idea is to implement rules that will +recognize the concept, so you will tell Sheerka if the value, the concept or the name is expected. + +I am far from implementing the rules. To be honest, I don't even know now how they will look like. + +So I am going to introduce the keyword :code:`concept:name:` or :code:`c:name:` + +It will means that the concept is required. + +If the name is required, you can use :code:`"'name'"` or :code:`'"name"'`. +It's already working. There is nothing to do for this one. + +2020-07-01 +********** + +How do we perform the parsing ? +""""""""""""""""""""""""""""""" + +The basic flow of an execution is : + +* Parse the data -> Nodes +* Evaluate the nodes -> Concepts +* Display the results + +The theories says that there can exist as many parsers as necessary. Each one of them will +be specialized to recognize a specific pattern. They will then send there information to +the evaluators. + +As of now, I have implemented the following parsers: + +* EmptyStringParser + To recognize empty strings and react accordingly + +* PythonParser + To recognize Python source code + +* ExactConceptParser + To recognize simple form of concepts + +* DefaultParser (the name is not accurate) + To recognize builtin syntax (like 'def concept' or 'isa') + +* BnfNodeParser + To recognize concept defined with BNF language + +All theses parsers are executed in the row (the order in not very important) + +The first observation is that there is lot of CPU waste. Most of the time (at least as of +now, when a there is a match with one parser, the others fail). So there is no need to +execute them. + +The second point is that there is now way for a parser to use the result of another. +My idea is to have parsers that can be chained, each one of them will do the little thing +it is capable of before leaving the rest to some more powerful parser. + +I don't want to bring out the big guns for every single user input. And I certainly +don't want a massive and over complex parser that will be capable (in theory) of everything + +Why ? + +| First of all, monolithic code is bad :-) +| Then I have to keep in mind that the process will be somehow distributed +| And last, but not least. I don't have (and I certainly will never have) the full completion + of all possible parsing situation. So what I need is a plug and play system where I can add + and remove and chain parsers, depending of the input. + +So, + +* I'll give all parsers a priority +* The parsers with the highest priority will be executed first +* The parsers with the same priority will be executed at the same time (The order does matter) +* If, for a given priority there is a match, the parser with a lower priority won't be executed +* A parser has access to the output of the parsers of higher priorities (which were executed before it) + +2020-01-11 +********** + +Status +"""""" + +Last status was back in October. At that time I could + +:: + + def concept hello name as "hello" + name + 1 + 1 + sheerka.test() + +1. I can evaluate concepts + +:: + + def concept hello a where a + hello kodjo + +2. I have worked on BNF definition of the concept + +:: + + def concept twenties from bnf 'twenty' (one | two | three)=unit as 20 + unit + twenty one + eval twenty one + +3. I can mix complex concepts (concepts with more than one word) and Python + +:: + + twenty one + twenty two + twenty one + one does not work :-( + + +4. I have a basic implementation for logging. With control of the verbosity + +5. The result of an user input evaluation is now persisted, alongside with the event +that was used for it. + + + +2020-04-18 +********** + +Blog +"""""" + +It's been a (very) long time since I have written in this blog. + +The main reason is that I found reStructured markup too complicated. I'm still not used to how directives are +supposed to work. There are so many way to do the same thing ! + +I guess that it's also because I don't have the proper tool to write this doc. +I use PyCharm and thought a have the basic rendering, I cannot easily navigate between +the articles + +In need to install Sphinx. I want it in a docker. For sure it's not mandatory, but I'm must practice my +docker skill if I don't want to forget everything + +Parsers +""""""" +As I keep repeating, parsing expression is a very big part of what I want to achieve (alongside with the +rule engine and the speech recognition) +It as to be very easy to expression a new concept + +:: + + def concept one as 1 + def concept two as 2 + +That's it ! +I should now can do + +:: + + one + one + one + two + +Now, I can decide that plus is also a concept + +:: + + def concept a plus b as a + b + +So basically, every time Sheerka will parse something 'plus' something else, it will recognize the concept a plus b + +:: + + one plus two + +worked, but + +:: + + one plus one + +doesn't. Because 'a' and 'b' are two different letters, so it was looking for two different values. That was +an unexpected side effect of my first naive implementation. + +Let's put that aside for the moment and keep on our exercise to model the world. + +After an addition, it will be good to have the multiplication. Easy + +:: + + def concept a mult b as a * b + +So I can try + +:: + + one plus two mult three + +Of course, this one does now work by magic. The precedence (priority ?) between addition and multiplication +was not respected. + +.. _bnf: https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form + +The first idea was the bnf_ parser in order to be able to write something like + +:: + + def concept plus from bnf mult ('plus' mult)* + def concept mult from bnf number ('mult' number)* + def concept number from bnf one|two + +Expressing recursive concepts was simple. I was proud of this implementation :code:`one plus two mult three` +was understood in the correct way. + + +But it started to become complicated when I wanted to define the body. In ':code:`mult (plus mult)*`' where +is the left part? where is the right part ? + +Ok, let's try something like + +:: + + def concept plus from bnf mult=a ('plus' mult)*=b + def concept mult from bnf number=a ('mult' number)*=b + def concept number from bnf one|two + +We now have 'a' for the left part, and a potential list of 'b' for the right part. +The full definition of the concept :code:`plus` will look like + +:: + + def concept plus from bnf mult=a ('plus' mult)*=b as: + res = a + for value in b: + res += value + return res + +This should work fine. In my current implementation, 'a' is an instance of the concept 'mult', correctly +initialized with concept one or a concept two, and likewise 'b' is a list of concept 'mult'. + +So it should work. + +It's just that I have never been this far in the tests. I just couldn't. THIS IS WAY MORE TOO COMPLICATED +TO DEFINE A SIMPLE ADDITION !!! + +**Note** that you must have quote surrounding the 'plus' in the definition, to make the difference between +the concept and the literal. It's necessary, but when you start to do that, you start to narrow the usage +of your system to developers only. So, even if there is no other way, I didn't really liked that. + +.. _IronPython: https://ironpython.net/ +.. _parsec: https://github.com/jparsec/jparsec +.. _Holy Grail: //www.youtube.com/watch?v=YxG5mDItkGU +.. _one: https://en.wikipedia.org/wiki/Shunting-yard_algorithm + +So I am done ? Is this the end ? There should be another way to express the priority (precedence ?) between the concept. + +Luckily for me, I remembered that I have once seen a implementation of the Python parser (IronPython_ I think) were they +used numbers to evaluate the precedence between additions and multiplications. And there were also something +like that when I used parsec_ parser. + +So I went back on internet and found my `Holy Grail`_, well not this one, this one_. + +**The Shunting Yard Algorithm** + +I took me a few days to understand it and implement it in its basic form (which a already too long), +but it took me one entire month to adapt it to the concepts. I know, I am not quick :-) + +As a matter of fact, the sya (Shunting Yard Algorithm) is designed for binary operators and functions where the number +of arguments is known. You can support unary operators, but there is nothing explained to ternary and more. +Dealing with concepts that can be expressed as :code:`'foo a b'` (suffixed concept) or :code:`'a b bar'` +(prefixed concept) was a interesting challenge! + +Anyway, I am now in position where I can simply define my addition and my multiplication + +:: + + > def concept a plus b as a + b + > def concept a mult b as a * b + > eval one plus two mult three + > 7 + +That's it ! + +At least in theory. The definition and the parsing of the concepts is done and fully tested when you +programmatically set the precedences, I now need a way to define/express the priorities + +What I surely don't want is to write something like: + +:: + + plus.precedence = 1 + mult.precedence = 2 + +or + +:: + + set_precedence(plus, 1) + set_precedence(mult, 2) + +Any solution where you have to give the actual value of the precedence is a bad solution. I would like to +have something like + +:: + + precedence mult > precedence plus + +or + +:: + + mult.precedence > plus.precedence + + +It means that I now have to implement a partitioning algorithm with simple constraints (<, >). I think that I will +include <=, >=, = and != as well, once for all. Sorting things according to these constraints is something +human naturally do. + + +2020-05-01 +********** + +Blog +"""""" +Hi, I have the feeling that I am almost there with the parsers part. I have + diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..74a67cb --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,55 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + + +# -- Project information ----------------------------------------------------- + +project = 'Sheerka' +copyright = '2020, Kodjo Sossouvi' +author = 'Kodjo Sossouvi' + +# The full version, including alpha/beta/rc tags +release = '0.0.1' + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'alabaster' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] \ No newline at end of file diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..a10f22e --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,30 @@ +Sheerka's documentation! +=================================== + +Hi, welcome the the documentation page of Sheerka. +There will be two types of documentation + + * The first one will be more like a blog, where I will express my feelings ;-) + * A more standard Technical Design documentation (if it does not take too much time :-) + + + +.. toctree:: + :maxdepth: 1 + :caption: Blog: + + blog/blog + +.. toctree:: + :maxdepth: 1 + :caption: Technical Design: + + tech/tech + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/source/tech/tech.rst b/docs/source/tech/tech.rst new file mode 100644 index 0000000..a617196 --- /dev/null +++ b/docs/source/tech/tech.rst @@ -0,0 +1,4 @@ +Technical Design +================ + +I need to put some stuff here \ No newline at end of file diff --git a/src/cache/BaseCache.py b/src/cache/BaseCache.py new file mode 100644 index 0000000..d1bf48e --- /dev/null +++ b/src/cache/BaseCache.py @@ -0,0 +1,241 @@ +from threading import RLock + + +class BaseCache: + """ + An in memory FIFO cache object + When the max_size is reach the first element that was put is removed + When you put the same key twice, the previous element is overridden + """ + + def __init__(self, max_size=None, default=None, extend_exists=None): + self._cache = {} + self._max_size = max_size + self._default = default # default value to return when key is not found. It can be a callable of key + self._extend_exists = extend_exists # search in remote + self._lock = RLock() + self._current_size = 0 + self._initialized_keys = set() + + self.to_add = set() + self.to_remove = set() + + def __len__(self): + """ + Return the number of items in the cache + :return: + """ + with self._lock: + return self._current_size + + def __contains__(self, key): + with self._lock: + return key in self._cache + + def __iter__(self): + with self._lock: + keys = self._cache.copy() + yield from keys + + def __next__(self): + return next(iter(self._cache)) + + def __repr__(self): + return f"{self.__class__.__name__}(size={self._current_size}, #keys={len(self._cache)})" + + def configure(self, max_size=None, default=None, extend_exists=None): + if max_size is not None: + self._max_size = max_size + + if default is not None: + self._default = default + + if extend_exists is not None: + self._extend_exists = extend_exists + + def disable_default(self): + self._default = None + + def put(self, key, value): + """ + Add a new entry in cache + :param key: + :param value: + :return: + """ + with self._lock: + if self._max_size and self._current_size >= self._max_size: + self.evict(self._max_size - self._current_size + 1) + + if self._put(key, value): + self._current_size += 1 + + def get(self, key): + """ + Retrieve an entry from the cache + If the entry does not exist, will use the 'default' value or delegate + :param key: + :return: + """ + with self._lock: + self._initialized_keys.add(key) + return self._get(key) + + def inner_get(self, key): + return self._cache[key] + + def update(self, old_key, old_value, new_key, new_value): + """ + Update an entry in the cache + :param old_key: key of the previous version of the entry + :param old_value: previous version of the entry + :param new_key: key of the entry + :param new_value: new value + :return: + """ + with self._lock: + self._update(old_key, old_value, new_key, new_value) + + def delete(self, key, value=None): + with self._lock: + try: + self._delete(key, value) + except KeyError: + pass + + def has(self, key): + """ + Return True if the key is in the cache + Never use extend_exist + :param key: + :return: + """ + with self._lock: + return key in self._cache + + def exists(self, key): + """ + Return True if the key is in the cache + Can use extend_exist + :param key: + :return: + """ + with self._lock: + if key in self._cache: + return True + + return self._extend_exists(key) if self._extend_exists else False + + def evict(self, nb_items): + """ + Remove nb_items from the cache, using the replacement policy + :return: + """ + with self._lock: + nb_items = self._current_size if self._current_size < nb_items else nb_items + nb_to_delete = nb_items + while nb_items > 0: + key = next(iter(self._cache)) + del (self._cache[key]) + try: + self._initialized_keys.remove(key) + except KeyError: + pass + nb_items -= 1 + + self._current_size -= nb_to_delete + + return nb_to_delete + + def clear(self): + with self._lock: + self._cache.clear() + self._current_size = 0 + self._initialized_keys.clear() + self.to_add.clear() + self.to_remove.clear() + + def dump(self): + with self._lock: + return { + "current_size": self._current_size, + "cache": self._cache.copy() + } + + def copy(self): + with self._lock: + return self._cache.copy() + + def init_from(self, dump): + with self._lock: + self._current_size = dump["current_size"] + self._cache = dump["cache"].copy() + return self + + def reset_events(self): + with self._lock: + self.to_add.clear() + self.to_remove.clear() + + def _sync(self, *keys): + for key in keys: + if key not in self._initialized_keys and self._default: + # to keep sync with the remote repo is needed + self.get(key) + + def _add_to_add(self, key): + self.to_add.add(key) + try: + self.to_remove.remove(key) + except KeyError: + pass + + def _add_to_remove(self, key): + self.to_remove.add(key) + try: + self.to_add.remove(key) + except KeyError: + pass + + def _get(self, key): + try: + value = self._cache[key] + except KeyError: + if callable(self._default): + value = self._default(key) + if value is not None: + self._cache[key] = value + + # update _current_size + if isinstance(value, (list, set)): + self._current_size += len(value) + else: + self._current_size += 1 + else: + value = self._default + + return value + + def _put(self, key, value): + pass + + def _update(self, old_key, old_value, new_key, new_value): + pass + + def _delete(self, key, value): + raise NotImplementedError() + + # def _put(self, key, value): + # self._cache[key] = value + # self._add_to_add(key) + # return True + # + + # + # def _update(self, old_key, old_value, new_key, new_value): + # self._cache[new_key] = new_value + # self._add_to_add(new_key) + # + # if new_key != old_key: + # del (self._cache[old_key]) + # self._add_to_remove(old_key) diff --git a/src/cache/Cache.py b/src/cache/Cache.py new file mode 100644 index 0000000..423bee6 --- /dev/null +++ b/src/cache/Cache.py @@ -0,0 +1,31 @@ +from threading import RLock + +from cache.BaseCache import BaseCache + + +class Cache(BaseCache): + """ + An in memory FIFO cache object + When the max_size is reach the first element that was put is removed + When you put the same key twice, the previous element is overridden + """ + + def _put(self, key, value): + res = key not in self._cache + self._cache[key] = value + self._add_to_add(key) + return res + + def _update(self, old_key, old_value, new_key, new_value): + self._cache[new_key] = new_value + self._add_to_add(new_key) + + if new_key != old_key: + self._sync(old_key) + del (self._cache[old_key]) + self._add_to_remove(old_key) + + def _delete(self, key, value): + del(self._cache[key]) + self._add_to_remove(key) + diff --git a/src/cache/CacheManager.py b/src/cache/CacheManager.py new file mode 100644 index 0000000..ff9cfaf --- /dev/null +++ b/src/cache/CacheManager.py @@ -0,0 +1,261 @@ +from dataclasses import dataclass, field +from threading import RLock +from typing import Callable + +from cache.Cache import Cache +from core.concept import Concept + + +class MultipleEntryError(Exception): + """ + Exception raised when trying to alter an entry with multiple element + without giving the origin of the element + """ + + def __init__(self, key): + self.key = key + + +@dataclass +class CacheDefinition: + cache: Cache + use_ref: bool + get_key: Callable[[Concept], str] = field(repr=False) + persist: bool = True + + +class CacheManager: + """ + Single class to manage all the caches + """ + + def __init__(self, cache_only): + self.cache_only = cache_only # if true disable all remote access when key not found + self.caches = {} + self.concept_caches = [] + self.is_dirty = False # to indicate that the value of a cache has changed + + self._lock = RLock() + + def register_concept_cache(self, name, cache, get_key, use_ref): + """ + Define which type of cache along with how to compute the key + :param name: + :param cache: + :param get_key: + :param use_ref: + :return: + """ + with self._lock: + if self.cache_only: + cache.disable_default() + self.caches[name] = CacheDefinition(cache, use_ref, get_key) + self.concept_caches.append(name) + + def register_cache(self, name, cache, persist=True, use_ref=False): + """ + Define which type of cache along with how to compute the key + :param name: + :param cache: + :param persist: + :param use_ref: + :return: + """ + with self._lock: + if self.cache_only: + cache.disable_default() + self.caches[name] = CacheDefinition(cache, use_ref, None, persist) + + def add_concept(self, concept): + """ + We need multiple indexes to retrieve a concept + So the new concept is dispatched into multiple caches + :param concept: + :return: + """ + with self._lock: + for name in self.concept_caches: + cache_def = self.caches[name] + key = cache_def.get_key(concept) + cache_def.cache.put(key, concept) + + self.is_dirty = True + + def update_concept(self, old, new): + """ + Update a concept. + :param old: old version of the concept + :param new: new version of the concept + :return: + """ + with self._lock: + for cache_name in self.concept_caches: + cache_def = self.caches[cache_name] + + old_key = cache_def.get_key(old) + new_key = cache_def.get_key(new) + + cache_def.cache.update(old_key, old, new_key, new) + + self.is_dirty = True + + # how can you update an entry it the key may have changed ? + # You need to have an invariant. By convention the keys in the first cache cannot change + # with self._lock: + # iter_cache_def = iter(self.caches) + # + # cache_def = next(iter_cache_def) + # old_key = cache_def.get_key(concept) + # + # try: + # while True: + # items = cache_def.cache[old_key] + # if isinstance(items, (list, set)): + # for item in items: + # if item.id == concept.id: + # break + # else: + # raise IndexError(f"{old_key=}, id={concept.id}") + # + # cache_def.cache.update(old_key, item, cache_def.get_key(concept), concept) + # + # else: + # cache_def.cache.update(old_key, items, cache_def.get_key(concept), concept) + # + # cache_def = next(iter_cache_def) + # except StopIteration: + # pass + # self.is_dirty = True + + def get(self, cache_name, key): + """ + From concept cache, get an entry + :param cache_name: + :param key: + :return: + """ + with self._lock: + return self.caches[cache_name].cache.get(key) + + def copy(self, cache_name): + """ + get a copy the content of the whole cache as a dictionary + :param self: + :param cache_name: + :return: + """ + return self.caches[cache_name].cache.copy() + + def put(self, cache_name, key, value): + """ + Add to a cache + :param cache_name: + :param key: + :param value: + :return: + """ + with self._lock: + self.caches[cache_name].cache.put(key, value) + self.is_dirty = True + + def delete(self, cache_name, key, value=None): + """ + Delete an entry from the cache + :param cache_name: + :param key: + :param value: + :return: + """ + with self._lock: + self.caches[cache_name].cache.delete(key, value) + self.is_dirty = True + + def has(self, cache_name, key): + """ + True if the value is in cache only. Never try to look in a remote repository + :param cache_name: + :param key: + :return: + """ + with self._lock: + return self.caches[cache_name].cache.has(key) + + def exists(self, cache_name, key): + """ + True if the value is in cache. + If not found, may search in a remote repository + :param cache_name: + :param key: + :return: + """ + if self.cache_only: + return self.has(cache_name, key) + + with self._lock: + return self.caches[cache_name].cache.exists(key) + + def commit(self, context): + """ + Persist all the caches into a physical persistence storage + :param context: + :return: + """ + + def update_full_serialisation(items, value): + # Take care, infinite recursion is not handled !! + if isinstance(items, (list, set, tuple)): + for item in items: + update_full_serialisation(item, value) + elif isinstance(items, dict): + for values in items.values(): + update_full_serialisation(values, value) + elif isinstance(items, Concept): + items.metadata.full_serialization = value + + if self.cache_only: + return + + with self._lock: + with context.sheerka.sdp.get_transaction(context.event.get_digest()) as transaction: + for cache_name, cache_def in self.caches.items(): + if not cache_def.persist: + continue + + for key in cache_def.cache.to_remove: + transaction.remove(cache_name, key) + + for key in cache_def.cache.to_add: + if key == "*self*": + transaction.add(cache_name, None, cache_def.cache.dump()["cache"]) + else: + to_save = cache_def.cache.inner_get(key) + update_full_serialisation(to_save, True) + transaction.add(cache_name, key, to_save, cache_def.use_ref) + update_full_serialisation(to_save, False) + + cache_def.cache.reset_events() + self.is_dirty = False + + def clear(self, cache_name=None): + with self._lock: + if cache_name: + self.caches[cache_name].cache.clear() + else: + for cache_def in self.caches.values(): + cache_def.cache.clear() + + def dump(self): + with self._lock: + res = {} + for cache_name, cache_def in self.caches.items(): + res[cache_name] = cache_def.cache.dump() + + return res + + def init_from(self, dump): + with self._lock: + for cache_name, content in dump.items(): + if cache_name in self.caches: + self.caches[cache_name].cache.init_from(content) + + return self diff --git a/src/cache/DictionaryCache.py b/src/cache/DictionaryCache.py new file mode 100644 index 0000000..7ae79cd --- /dev/null +++ b/src/cache/DictionaryCache.py @@ -0,0 +1,53 @@ +from cache.BaseCache import BaseCache + + +class DictionaryCache(BaseCache): + def _get(self, key): + """ + Management of the default is different + :param key: + :return: + """ + try: + value = self._cache[key] + return value + except KeyError: + if callable(self._default): + self._cache = self._default(key) or {} + else: + self._cache = self._default.copy() if self._default else {} + + self._count_items() + return self._cache[key] if key in self._cache else None + + def _put(self, key, value): + """ + Adds a whole dictionary + :param key: True to append, false to reset + :param value: dictionary + :return: + """ + if not isinstance(key, bool): + raise KeyError + + if not isinstance(value, dict): + raise ValueError + + if key: + if self._cache is None: + self._cache = value.copy() + else: + self._cache.update(value) + else: + self._cache = value + + self._count_items() + + # special meaning for to_add + self._add_to_add("*self*") + return False + + def _count_items(self): + self._current_size = 0 + for v in self._cache.values(): + self._current_size += len(v) if hasattr(v, "__len__") and not isinstance(v, str) else 1 diff --git a/src/cache/IncCache.py b/src/cache/IncCache.py new file mode 100644 index 0000000..11ada16 --- /dev/null +++ b/src/cache/IncCache.py @@ -0,0 +1,18 @@ +from cache.Cache import Cache + + +class IncCache(Cache): + """ + Increment the value of the key every time it's accessed + """ + + def _get(self, key): + value = super()._get(key) or 0 + value += 1 + self._put(key, value) + return value + + def _put(self, key, value): + self._cache[key] = value + self._add_to_add(key) + return True diff --git a/src/cache/ListCache.py b/src/cache/ListCache.py new file mode 100644 index 0000000..0e874b8 --- /dev/null +++ b/src/cache/ListCache.py @@ -0,0 +1,43 @@ +from cache.Cache import BaseCache + + +class ListCache(BaseCache): + """ + An in memory FIFO cache object + When the max_size is reach the first element that was put is removed + Items of this cache are list + """ + + def _put(self, key, value): + if key in self._cache: + self._cache[key].append(value) + else: + self._sync(key) + + if key in self._cache: + self._cache[key].append(value) + else: + self._cache[key] = [value] + + self._add_to_add(key) + return True + + def _update(self, old_key, old_value, new_key, new_value): + self._sync(old_key, new_key) + + if old_key != new_key: + self._cache[old_key].remove(old_value) + if len(self._cache[old_key]) == 0: + del (self._cache[old_key]) + self._add_to_remove(old_key) + else: + self._add_to_add(old_key) + + self._put(new_key, new_value) + self._add_to_add(new_key) + else: + for i in range(len(self._cache[new_key])): + if self._cache[new_key][i] == old_value: + self._cache[new_key][i] = new_value # avoid add and remove in dict + break # only the first one is affected + self._add_to_add(new_key) diff --git a/src/cache/ListIfNeededCache.py b/src/cache/ListIfNeededCache.py new file mode 100644 index 0000000..3afbfec --- /dev/null +++ b/src/cache/ListIfNeededCache.py @@ -0,0 +1,56 @@ +from cache.Cache import BaseCache + + +class ListIfNeededCache(BaseCache): + """ + An in memory FIFO cache object + When the max_size is reach the first element that was put is removed + When you put the same key twice, you now have a list of two elements + """ + + def _put(self, key, value): + if key in self._cache: + if isinstance(self._cache[key], list): + self._cache[key].append(value) + else: + self._cache[key] = [self._cache[key], value] + else: + self._sync(key) + + if key in self._cache: + if isinstance(self._cache[key], list): + self._cache[key].append(value) + else: + self._cache[key] = [self._cache[key], value] + else: + self._cache[key] = value + self._add_to_add(key) + return True + + def _update(self, old_key, old_value, new_key, new_value): + + self._sync(old_key, new_key) + + if old_key != new_key: + if isinstance(self._cache[old_key], list): + self._cache[old_key].remove(old_value) + if len(self._cache[old_key]) == 0: + del (self._cache[old_key]) + self._add_to_remove(old_key) + else: + self._add_to_add(old_key) + else: + del (self._cache[old_key]) + self._add_to_remove(old_key) + + self._put(new_key, new_value) + self._add_to_add(new_key) + else: + if isinstance(self._cache[new_key], list): + for i in range(len(self._cache[new_key])): + if self._cache[new_key][i] == old_value: + self._cache[new_key][i] = new_value # avoid add and remove in dict + break + else: + self._cache[new_key] = new_value + self._add_to_add(new_key) diff --git a/src/cache/SetCache.py b/src/cache/SetCache.py new file mode 100644 index 0000000..3e3ba28 --- /dev/null +++ b/src/cache/SetCache.py @@ -0,0 +1,45 @@ +from cache.Cache import BaseCache + + +class SetCache(BaseCache): + """ + An in memory FIFO cache object + When the max_size is reach the first element that was put is removed + You can use the same key multiple times, but the elements under this key will be unique + When there are multiple elements, a python set is used + """ + + def _put(self, key, value): + if key in self._cache: + if value in self._cache[key]: + return False + self._cache[key].add(value) + else: + self._sync(key) + + if key in self._cache: + self._cache[key].add(value) + else: + self._cache[key] = {value} + + self._add_to_add(key) + return True + + def _update(self, old_key, old_value, new_key, new_value): + self._sync(old_key, new_key) + + if old_key != new_key: + if isinstance(self._cache[old_key], set): + self._cache[old_key].remove(old_value) + if len(self._cache[old_key]) == 0: + del (self._cache[old_key]) + self._add_to_remove(old_key) + else: + self._add_to_add(old_key) + + self._put(new_key, new_value) + self._add_to_add(new_key) + else: + self._cache[new_key].remove(old_value) + self._put(new_key, new_value) + self._add_to_add(new_key) diff --git a/src/cache/__init__.py b/src/cache/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/core/ast/nodes.py b/src/core/ast/nodes.py index 49d5420..71846fd 100644 --- a/src/core/ast/nodes.py +++ b/src/core/ast/nodes.py @@ -1,5 +1,5 @@ from core.builtin_concepts import BuiltinConcepts, ListConcept -from core.concept import Concept +from core.concept import Concept, ConceptParts import ast import core.utils @@ -65,12 +65,12 @@ class GenericNodeConcept(NodeConcept): def get_node_type(self): return self.node_type - def get_value(self): + def get_obj_value(self): if self.node_type == "Name": - return self.get_prop("id") + return self.get_value("id") if self.node_type == "arg": - return self.get_prop("arg") + return self.get_value("arg") return self.body @@ -78,7 +78,7 @@ class GenericNodeConcept(NodeConcept): class IdentifierNodeConcept(NodeConcept): def __init__(self, parent, name): super().__init__(BuiltinConcepts.IDENTIFIER_NODE, "Name", parent) - self.body = name + self.set_value(ConceptParts.BODY, name) class CallNodeConcept(NodeConcept): @@ -86,7 +86,7 @@ class CallNodeConcept(NodeConcept): super().__init__(BuiltinConcepts.IDENTIFIER_NODE, "Call", parent) def get_args_names(self, sheerka): - return sheerka.get_values(self.get_prop("args")) + return sheerka.objvalues(self.get_value("args")) def python_to_concept(python_node): @@ -105,16 +105,16 @@ def python_to_concept(python_node): continue value = getattr(node, field) - concept.def_prop(field) + concept.def_var(field) if isinstance(value, list): lst = ListConcept().init_key() for i in value: lst.append(_transform(i, NodeParent(concept, field))) - concept.set_prop(field, lst) + concept.set_value(field, lst) elif isinstance(value, ast.AST): - concept.set_prop(field, _transform(value, NodeParent(concept, field))) + concept.set_value(field, _transform(value, NodeParent(concept, field))) else: - concept.set_prop(field, value) + concept.set_value(field, value) concept.metadata.is_evaluated = True return concept @@ -132,11 +132,11 @@ def concept_to_python(concept_node): def _transform(node): node_type = node.get_node_type() ast_object = core.utils.new_object("_ast." + node_type) - for field in node.props: + for field in node.values: if field not in ast_object._fields: continue - value = node.get_prop(field) + value = node.get_value(field) if isinstance(value, list) or isinstance(value, Concept) and value.key == str(BuiltinConcepts.LIST): lst = [] for i in value.body: diff --git a/src/core/ast/visitors.py b/src/core/ast/visitors.py index d274261..415552c 100644 --- a/src/core/ast/visitors.py +++ b/src/core/ast/visitors.py @@ -29,7 +29,7 @@ class ConceptNodeVisitor: self.visit(value) def visit_Constant(self, node): - value = node.get_prop("value") + value = node.get_value("value") type_name = _const_node_type_names.get(type(value)) if type_name is None: for cls, name in _const_node_type_names.items(): @@ -66,10 +66,10 @@ class UnreferencedNamesVisitor(ConceptNodeVisitor): if ("Assign", "targets") in parents: # variable which is assigned return - if self.can_be_discarded(self.sheerka.value(node), parents): + if self.can_be_discarded(self.sheerka.objvalue(node), parents): return - self.names.add(self.sheerka.value(node)) + self.names.add(self.sheerka.objvalue(node)) def can_be_discarded(self, variable_name, parents): @@ -77,14 +77,14 @@ class UnreferencedNamesVisitor(ConceptNodeVisitor): if node is None: return False - if node.get_node_type() == "For" and self.sheerka.value(node.get_prop("target")) == variable_name: + if node.get_node_type() == "For" and self.sheerka.objvalue(node.get_value("target")) == variable_name: # variable used by the loop return True if node.get_node_type() == "FunctionDef": # variable defined as a function parameter - args = node.get_prop("args") - args_values = list(self.sheerka.get_values(args.get_prop("args"))) + args = node.get_value("args") + args_values = list(self.sheerka.objvalues(args.get_value("args"))) if variable_name in args_values: return True @@ -112,8 +112,8 @@ def get_parents(node): def iter_props(node): - for p in node.props: - yield p, node.props[p].value + for p in [p for p in node.values if isinstance(p, str)]: + yield p, node.get_value(p) _const_node_type_names = { diff --git a/src/core/builtin_concepts.py b/src/core/builtin_concepts.py index bc49fa0..112b88a 100644 --- a/src/core/builtin_concepts.py +++ b/src/core/builtin_concepts.py @@ -114,8 +114,8 @@ It's mainly to ease the usage class UserInputConcept(Concept): def __init__(self, text=None, user_name=None): super().__init__(BuiltinConcepts.USER_INPUT, True, False, BuiltinConcepts.USER_INPUT) - self.set_metadata_value(ConceptParts.BODY, text) - self.set_prop("user_name", user_name) + self.set_value(ConceptParts.BODY, text) + self.set_value("user_name", user_name) self.metadata.is_evaluated = True @property @@ -124,7 +124,7 @@ class UserInputConcept(Concept): @property def user_name(self): - return self.props["user_name"].value + return self.get_value("user_name") def __repr__(self): return f"({self.id}){self.name}: '{self.body}'" @@ -133,7 +133,7 @@ class UserInputConcept(Concept): class ErrorConcept(Concept): def __init__(self, error=None): super().__init__(BuiltinConcepts.ERROR, True, False, BuiltinConcepts.ERROR) - self.set_metadata_value(ConceptParts.BODY, error) + self.set_value(ConceptParts.BODY, error) self.metadata.is_evaluated = True def __repr__(self): @@ -143,7 +143,7 @@ class ErrorConcept(Concept): class UnknownConcept(Concept): def __init__(self, metadata=None): super().__init__(BuiltinConcepts.UNKNOWN_CONCEPT, True, False, BuiltinConcepts.UNKNOWN_CONCEPT) - self.set_metadata_value(ConceptParts.BODY, metadata) + self.set_value(ConceptParts.BODY, metadata) self.metadata.is_evaluated = True def __repr__(self): @@ -158,28 +158,28 @@ class ReturnValueConcept(Concept): def __init__(self, who=None, status=None, value=None, message=None, parents=None): super().__init__(BuiltinConcepts.RETURN_VALUE, True, False, BuiltinConcepts.RETURN_VALUE) - self.set_metadata_value(ConceptParts.BODY, value) - self.set_prop("who", who) - self.set_prop("status", status) - self.set_prop("message", message) - self.set_prop("parents", parents) + self.set_value(ConceptParts.BODY, value) + self.set_value("who", who) + self.set_value("status", status) + self.set_value("message", message) + self.set_value("parents", parents) self.metadata.is_evaluated = True @property def who(self): - return self.props["who"].value + return self.get_value("who") @who.setter def who(self, value): - self.set_prop("who", value) + self.set_value("who", value) @property def status(self): - return self.props["status"].value + return self.get_value("status") @status.setter def status(self, value): - self.set_prop("status", value) + self.set_value("status", value) @property def value(self): @@ -187,23 +187,23 @@ class ReturnValueConcept(Concept): @value.setter def value(self, value): - self.set_metadata_value(ConceptParts.BODY, value) + self.set_value(ConceptParts.BODY, value) @property def message(self): - return self.props["message"].value + return self.get_value("message") @message.setter def message(self, value): - self.set_prop("message", value) + self.set_value("message", value) @property def parents(self): - return self.props["parents"].value + return self.get_value("parents") @parents.setter def parents(self, value): - self.set_prop("parents", value) + self.set_value("parents", value) def __repr__(self): return f"ReturnValue(who={self.who}, status={self.status}, value={self.value}, message={self.message})" @@ -233,8 +233,8 @@ class UnknownPropertyConcept(Concept): def __init__(self, property_name=None, concept=None): super().__init__(BuiltinConcepts.UNKNOWN_PROPERTY, True, False, BuiltinConcepts.UNKNOWN_PROPERTY) - self.set_metadata_value(ConceptParts.BODY, property_name) - self.set_prop("concept", concept) + self.set_value(ConceptParts.BODY, property_name) + self.set_value("concept", concept) self.metadata.is_evaluated = True def __repr__(self): @@ -242,7 +242,7 @@ class UnknownPropertyConcept(Concept): @property def concept(self): - return self.props["concept"].value + return self.get_value("concept") @property def property_name(self): @@ -256,16 +256,16 @@ class ParserResultConcept(Concept): def __init__(self, parser=None, source=None, tokens=None, value=None, try_parsed=None): super().__init__(BuiltinConcepts.PARSER_RESULT, True, False, BuiltinConcepts.PARSER_RESULT) - self.set_metadata_value(ConceptParts.BODY, value) - self.set_prop("parser", parser) - self.set_prop("source", source) - self.set_prop("tokens", tokens) - self.set_prop("try_parsed", try_parsed) # in case of error, what was found before the error + self.set_value(ConceptParts.BODY, value) + self.set_value("parser", parser) + self.set_value("source", source) + self.set_value("tokens", tokens) + self.set_value("try_parsed", try_parsed) # in case of error, what was found before the error self.metadata.is_evaluated = True def __repr__(self): - text = f"ParserResult(parser={self.props['parser'].value}" - source = self.props['source'].value + text = f"ParserResult(parser={self.get_value('parser')}" + source = self.get_value('source') text += f", source='{source}')" if source else f", body='{self.body}')" return text @@ -287,15 +287,15 @@ class ParserResultConcept(Concept): @property def try_parsed(self): - return self.props["try_parsed"].value + return self.get_value("try_parsed") @property def source(self): - return self.props["source"].value + return self.get_value("source") @property def parser(self): - return self.props["parser"].value + return self.get_value("parser") class InvalidReturnValueConcept(Concept): @@ -311,8 +311,8 @@ class InvalidReturnValueConcept(Concept): True, False, BuiltinConcepts.INVALID_RETURN_VALUE) - self.set_metadata_value(ConceptParts.BODY, return_value) - self.set_prop("evaluator", evaluator) + self.set_value(ConceptParts.BODY, return_value) + self.set_value("evaluator", evaluator) self.metadata.is_evaluated = True @@ -322,9 +322,9 @@ class ConceptEvalError(Concept): True, False, BuiltinConcepts.CONCEPT_EVAL_ERROR) - self.set_metadata_value(ConceptParts.BODY, error) - self.set_prop("concept", concept) - self.set_prop("property_name", property_name) + self.set_value(ConceptParts.BODY, error) + self.set_value("concept", concept) + self.set_value("property_name", property_name) self.metadata.is_evaluated = True def __repr__(self): @@ -336,17 +336,17 @@ class ConceptEvalError(Concept): @property def concept(self): - return self.props["concept"].value + return self.get_value("concept") @property def property_name(self): - return self.props["property_name"].value + return self.get_value("property_name") class EnumerationConcept(Concept): def __init__(self, iteration=None): super().__init__(BuiltinConcepts.ENUMERATION, True, False, BuiltinConcepts.ENUMERATION) - self.set_metadata_value(ConceptParts.BODY, iteration) + self.set_value(ConceptParts.BODY, iteration) self.metadata.is_evaluated = True # def __iter__(self): @@ -356,7 +356,7 @@ class EnumerationConcept(Concept): class ListConcept(Concept): def __init__(self, items=None): super().__init__(BuiltinConcepts.LIST, True, False, BuiltinConcepts.LIST) - self.set_metadata_value(ConceptParts.BODY, items or []) + self.set_value(ConceptParts.BODY, items or []) self.metadata.is_evaluated = True def append(self, obj): @@ -381,9 +381,10 @@ class ListConcept(Concept): class FilteredConcept(Concept): def __init__(self, filtered=None, iterable=None, predicate=None): super().__init__(BuiltinConcepts.FILTERED, True, False, BuiltinConcepts.FILTERED) - self.set_metadata_value(ConceptParts.BODY, filtered) - self.def_prop("iterable", iterable) - self.def_prop("predicate", predicate) + self.set_value(ConceptParts.BODY, filtered) + self.set_value("iterable", iterable) + self.set_value("predicate", predicate) + self.metadata.is_evaluated = True class ConceptAlreadyInSet(Concept): @@ -392,8 +393,8 @@ class ConceptAlreadyInSet(Concept): True, False, BuiltinConcepts.CONCEPT_ALREADY_IN_SET) - self.set_metadata_value(ConceptParts.BODY, concept) - self.set_prop("concept_set", concept_set) + self.set_value(ConceptParts.BODY, concept) + self.set_value("concept_set", concept_set) self.metadata.is_evaluated = True def __repr__(self): @@ -405,7 +406,7 @@ class ConceptAlreadyInSet(Concept): @property def concept_set(self): - return self.props["concept_set"].value + return self.get_value("concept_set") class WhereClauseFailed(Concept): @@ -414,7 +415,7 @@ class WhereClauseFailed(Concept): True, False, BuiltinConcepts.WHERE_CLAUSE_FAILED) - self.set_metadata_value(ConceptParts.BODY, concept) + self.set_value(ConceptParts.BODY, concept) self.metadata.is_evaluated = True def __repr__(self): @@ -431,12 +432,12 @@ class NotForMeConcept(Concept): True, False, BuiltinConcepts.NOT_FOR_ME) - self.set_metadata_value(ConceptParts.BODY, source) - self.def_prop("reason", reason) + self.set_value(ConceptParts.BODY, source) + self.set_value("reason", reason) self.metadata.is_evaluated = True def __repr__(self): - return f"NotForMeConcept(source={self.body}, reason={self.get_prop('reason')})" + return f"NotForMeConcept(source={self.body}, reason={self.get_value('reason')})" class ExplanationConcept(Concept): @@ -445,9 +446,9 @@ class ExplanationConcept(Concept): True, False, BuiltinConcepts.EXPLANATION) - self.def_prop("digest", digest) # event digest - self.def_prop("command", command) # explain command parameters - self.def_prop("title", title) # a title to the explanation - self.def_prop("instructions", instructions) # instructions for SheerkaPrint - self.set_metadata_value(ConceptParts.BODY, execution_result) # list of results + self.set_value("digest", digest) # event digest + self.set_value("command", command) # explain command parameters + self.set_value("title", title) # a title to the explanation + self.set_value("instructions", instructions) # instructions for SheerkaPrint + self.set_value(ConceptParts.BODY, execution_result) # list of results self.metadata.is_evaluated = True diff --git a/src/core/builtin_helpers.py b/src/core/builtin_helpers.py index 2949518..68fd5c6 100644 --- a/src/core/builtin_helpers.py +++ b/src/core/builtin_helpers.py @@ -30,11 +30,11 @@ def is_same_success(context, return_values): evaluated = context.sheerka.evaluate_concept(sub_context, ret_val.body) if evaluated.key != ret_val.body.key: raise Exception("Failed to evaluate evaluate") - return context.sheerka.value(evaluated) + return context.sheerka.objvalue(evaluated) else: - return context.sheerka.value(ret_val.body) + return context.sheerka.objvalue(ret_val.body) else: - return context.sheerka.value(ret_val) + return context.sheerka.objvalue(ret_val) try: reference = _get_value(return_values[0]) @@ -280,8 +280,8 @@ def get_lexer_nodes(return_values, start, tokens): for ret_val in return_values: if ret_val.who == "parsers.Python": - if ret_val.body.source.strip().isalnum() and not ret_val.body.source.strip().isnumeric(): - # Discard SourceCodeNode which seems to be a concept + if ret_val.body.source.strip().isidentifier(): + # Discard SourceCodeNode which seems to be a concept name # It may be a wrong idea, so let's see continue @@ -309,6 +309,41 @@ def get_lexer_nodes(return_values, start, tokens): return lexer_nodes +def ensure_evaluated(context, concept): + """ + Evaluate a concept is not already evaluated + :param context: + :param concept: + :return: + """ + if concept.metadata.is_evaluated: + return concept + + with context.push(desc=f"Evaluating concept {concept}") as sub_context: + sub_context.local_hints.add(BuiltinConcepts.EVAL_BODY_REQUESTED) + evaluated = context.sheerka.evaluate_concept(sub_context, concept) + sub_context.add_values(return_values=evaluated) + + return evaluated + +def get_lexer_nodes_from_unrecognized(context, unrecognized_tokens_node, parsers): + """ + Using parsers, try to recognize concepts from source + :param context: + :param unrecognized_tokens_node: + :param parsers: + :return: + """ + + res = parse_unrecognized(context, unrecognized_tokens_node.source, parsers) + res = only_parsers_results(context, res) + + if not res.status: + return None + + return get_lexer_nodes(res.body.body, unrecognized_tokens_node.start, unrecognized_tokens_node.tokens) + + def get_names(sheerka, concept_node): """ Finds all the names referenced by the concept_node @@ -352,7 +387,7 @@ def extract_predicates(sheerka, expression, variables_to_include, variables_to_e return NotImplementedError() concept_node = core.ast.nodes.python_to_concept(node) - main_op = concept_node.get_prop("body") + main_op = concept_node.get_value("body") return _get_predicates(_extract_predicates(sheerka, main_op, variables_to_include, variables_to_exclude)) @@ -370,14 +405,14 @@ def _extract_predicates(sheerka, node, variables_to_include, variables_to_exclud return _res if node.node_type == "Compare": - if node.get_prop("left").node_type == "Name": + if node.get_value("left").node_type == "Name": """Simple case of one comparison""" - comparison_name = sheerka.value(node.get_prop("left")) + comparison_name = sheerka.objvalue(node.get_value("left")) if comparison_name in variables_to_include and comparison_name not in variables_to_exclude: predicates.append(node) else: """The left part is an expression""" - res = _extract_predicates(sheerka, node.get_prop("left"), variables_to_include, variables_to_exclude) + res = _extract_predicates(sheerka, node.get_value("left"), variables_to_include, variables_to_exclude) if len(res) > 0: predicates.append(node) elif node.node_type == "Call": @@ -386,9 +421,9 @@ def _extract_predicates(sheerka, node, variables_to_include, variables_to_exclud args = list(call_node.get_args_names(sheerka)) if _matches(args, variables_to_include, variables_to_exclude): predicates.append(node) - elif node.node_type == "UnaryOp" and node.get_prop("op").node_type == "Not": + elif node.node_type == "UnaryOp" and node.get_value("op").node_type == "Not": """Simple case of negation""" - res = _extract_predicates(sheerka, node.get_prop("operand"), variables_to_include, variables_to_exclude) + res = _extract_predicates(sheerka, node.get_value("operand"), variables_to_include, variables_to_exclude) if len(res) > 0: predicates.append(node) elif node.node_type == "BinOp": @@ -398,7 +433,7 @@ def _extract_predicates(sheerka, node, variables_to_include, variables_to_exclud elif node.node_type == "BoolOp": all_op = True temp_res = [] - for op in node.get_prop("values").body: + for op in node.get_value("values").body: res = _extract_predicates(sheerka, op, variables_to_include, variables_to_exclude) if len(res) == 0: all_op = False diff --git a/src/core/concept.py b/src/core/concept.py index 1f0b6b3..5364855 100644 --- a/src/core/concept.py +++ b/src/core/concept.py @@ -1,17 +1,19 @@ import hashlib from collections import namedtuple +from copy import deepcopy from dataclasses import dataclass from enum import Enum -from core.sheerka_logger import get_logger +from typing import Union import core.utils +from core.sheerka_logger import get_logger from core.tokenizer import Tokenizer, TokenKind PROPERTIES_FOR_DIGEST = ("name", "key", "definition", "definition_type", "is_builtin", "is_unique", "where", "pre", "post", "body", - "desc", "props") + "desc", "props", "variables") PROPERTIES_TO_SERIALIZE = PROPERTIES_FOR_DIGEST + tuple(["id"]) PROPERTIES_FOR_NEW = ("where", "pre", "post", "body", "desc") VARIABLE_PREFIX = "__var__" @@ -48,15 +50,13 @@ class ConceptMetadata: definition_type: str # definition can be done with something else than regex desc: str # possible description for the concept id: str # unique identifier for a concept. The id will never be modified (but the key can) - props: list # list properties, with their default values + props: dict # hashmap of properties, values + variables: list # list of concept variables, with their default values is_evaluated: bool = False # True is the concept is evaluated by sheerka.eval_concept() need_validation = False # True if the properties of the concept need to be validated full_serialization: bool = False # If True, the full object will be serialized, rather than just the diff -simplec = namedtuple("concept", "name body") # for simple concept (tests purposes only) - - class Concept: """ Default concept object @@ -76,7 +76,8 @@ class Concept: definition_type=None, desc=None, id=None, - props=None): + props=None, + variables=None): metadata = ConceptMetadata( str(name) if name else None, @@ -91,17 +92,17 @@ class Concept: definition_type, desc, id, - props or [] + props or {}, + variables or [] ) self.metadata = metadata - self.compiled = {} # cached ast for the where, pre, post and body parts - self.values = {} # values of metadata once resolved - self.props = {} # resolved properties of this concept - self.bnf = None + self.compiled = {} # cached ast for the where, pre, post and body parts and variables + self.values = {} # resolved values. As compiled, it's used both for metadata and variables + self.bnf = None # parsing expression self.log = get_logger("core." + self.__class__.__name__) self.init_log = get_logger("init.core." + self.__class__.__name__) - self.original_definition_hash = None + self.original_definition_hash = None # concept hash before any alteration of the metadata def __repr__(self): return f"({self.metadata.id}){self.metadata.name}" @@ -117,6 +118,9 @@ class Concept: if isinstance(other, CC): return other == self + if isinstance(other, CB): + return other == self + if not isinstance(other, Concept): return False @@ -147,15 +151,8 @@ class Concept: if len(self.values) != len(other.values): return False - for metadata in self.values: - if self.get_metadata_value(metadata) != other.get_metadata_value(metadata): - return False - - if len(self.props) != len(other.props): - return False - - for prop in self.props: - if self.get_prop(prop) != other.get_prop(prop): + for name in self.values: + if self.get_value(name) != other.get_value(name): return False return True @@ -166,28 +163,36 @@ class Concept: def __getattr__(self, item): # I have this complicated implementation because of the usage of Pickle - if 'props' in vars(self) and item in self.props: - return self.props[item].value + if 'values' in vars(self) and item in self.values: + return self.get_value(item) name = self.name if 'metadata' in vars(self) else 'Concept' raise AttributeError(f"'{name}' concept has no attribute '{item}'") - def def_prop(self, prop_name, default_value=None): + def def_var(self, var_name, default_value=None): """ Adds a property to the metadata - :param prop_name: name or concept + :param var_name: name or concept :param default_value: :return: """ - assert default_value is None or isinstance(default_value, str) # default properties will have to be evaluated - self.metadata.props.append((prop_name, default_value)) - self.props[prop_name] = Property(prop_name, None) # do not set the default value - # why not setting props to the default values ? - # Because it may not be the real values, as metadata.props need to be evaluated + # this assert in not a functional requirement + # It's just to control what I put in the default value of properties + # You can allow more type if it's REALLY needed. + # - str are for standard definition + # - list of concepts is used by ISA + assert default_value is None or isinstance(default_value, str) + + self.metadata.variables.append((var_name, default_value)) + + self.set_value(var_name, None) # do not set the default value + # why not setting variables to the default values ? + # Because it may not be the real values, as metadata.variables need to be evaluated + return self - def def_prop_by_index(self, index: int, value): + def def_var_by_index(self, index: int, value): """ Re-assign a value to a property (mainly used by ExactConceptParser) :param index: @@ -195,8 +200,8 @@ class Concept: :return: """ assert value is None or isinstance(value, str) # default properties will have to be evaluated - prop = self.metadata.props[index] - self.metadata.props[index] = (prop[0], value) + var_name = self.metadata.variables[index] + self.metadata.variables[index] = (var_name[0], value) # change the default value return self @property @@ -229,7 +234,7 @@ class Concept: else: tokens = list(Tokenizer(self.metadata.name)) - variables = [p[0] for p in self.metadata.props] if len(core.utils.strip_tokens(tokens, True)) > 1 else [] + variables = [p[0] for p in self.metadata.variables] if len(core.utils.strip_tokens(tokens, True)) > 1 else [] key = "" first = True @@ -252,7 +257,7 @@ class Concept: @property def body(self): - return self.values[ConceptParts.BODY] if ConceptParts.BODY in self.values else None + return self.get_value(ConceptParts.BODY) def get_origin(self): """ @@ -284,8 +289,12 @@ class Concept: """ props_to_use = props_to_use or PROPERTIES_TO_SERIALIZE - - props_as_dict = dict((prop, getattr(self.metadata, prop)) for prop in props_to_use) + props_as_dict = {} + for prop in props_to_use: + if prop == "props": # no need to copy variables as the ref won't be used in from_dict + props_as_dict[prop] = deepcopy(getattr(self.metadata, prop)) + else: + props_as_dict[prop] = getattr(self.metadata, prop) return props_as_dict def from_dict(self, as_dict): @@ -296,19 +305,20 @@ class Concept: """ for prop in PROPERTIES_TO_SERIALIZE: if prop in as_dict: - if prop == "props": + if prop == "variables": for name, value in as_dict[prop]: - self.def_prop(name, value) + self.def_var(name, value) else: setattr(self.metadata, prop, as_dict[prop]) return self - def update_from(self, other): + def update_from(self, other, update_value=True): """ Update self using the properties of another concept This method is to mimic the class to instance pattern 'other' is the class, the template, and 'self' is a new instance :param other: + :param update_value: :return: """ if other is None: @@ -321,12 +331,9 @@ class Concept: self.from_dict(other.to_dict()) # update values - for k, v in other.values.items(): - self.values[k] = v - - # update properties - for k, v in other.props.items(): - self.set_prop(k, v.value) + if update_value: + for k in other.values: + self.set_value(k, other.get_value(k)) # origin from sdp.sheerkaSerializer import Serializer @@ -335,54 +342,53 @@ class Concept: return self - def set_prop(self, prop_name, prop_value): + def add_prop(self, concept_key, value): """ - Set the value of a property (not the metadata) - :param prop_name: Name the property or another concept - :param prop_value: - :return: - """ - self.props[prop_name] = Property(prop_name, prop_value) - return self - - def get_prop(self, prop_name: str): - """ - Gets the value of a property - :param prop_name: name or concept - :return: - """ - return self.props[prop_name].value - - def set_prop_by_index(self, index: int, value): - """ - Set the value of a property (not the metadata) using the index - :param index: Name the property or another concept + Set or add a behaviour to a concept + A behaviour is a value from another concept (ex BuiltinConcepts.ISA + :param concept_key: Concept key :param value: :return: """ - prop_name = list(self.props.keys())[index] - self.props[prop_name].value = value + if concept_key in self.metadata.props: + self.metadata.props[concept_key].add(value) + else: + self.metadata.props[concept_key] = {value} # a set return self - def set_metadata_value(self, metadata: ConceptParts, value): + def get_prop(self, concept_key): """ - Set the resolved value of a metadata (not the metadata itself) - :param metadata: + Gets a behaviour of a concept + :param concept_key: name of the behaviour + :return: + """ + return self.metadata.props[concept_key] if concept_key in self.metadata.props else None + + def set_value(self, name, value): + """ + Set the resolved value of a metadata or a variable (not the metadata itself) + :param name: :param value: :return: """ - self.values[metadata] = value + if name in self.values: + self.values[name].value = value + else: + self.values[name] = Property(name, value) return self - def get_metadata_value(self, metadata: ConceptParts): + def get_value(self, prop_name): """ Gets the resolved value of a metadata - :param metadata: + :param prop_name: :return: """ - if metadata not in self.values: + if prop_name not in self.values: return None - return self.values[metadata] + return self.values[prop_name].value + + def variables(self): + return dict([(k, v) for k, v in self.values.items() if isinstance(k, str)]) def auto_init(self): """ @@ -398,10 +404,10 @@ class Concept: for metadata in ConceptParts: value = getattr(self.metadata, metadata.value) if value is not None: - self.values[metadata] = value + self.set_value(metadata, value) - for prop, value in self.metadata.props: - self.set_prop(prop, value) + for var, value in self.metadata.variables: + self.set_value(var, value) self.metadata.is_evaluated = True return self @@ -419,9 +425,10 @@ class Concept: And it removes the visibility from the other attributes/methods """ bag = {} - for prop in self.props: - bag[prop] = self.get_prop(prop) - bag["prop." + prop] = self.get_prop(prop) + for var in self.values: + if isinstance(var, str): + bag[var] = self.get_value(var) + bag["var." + var] = self.get_value(var) for prop in ("id", "name", "key", "body"): bag[prop] = getattr(self, prop) return bag @@ -469,10 +476,17 @@ class InfiniteRecursionResolved: """This class is used to when we managed to break an infinite recursion concept definition""" value: object - def get_value(self): + def get_obj_value(self): return self.value +# ################################ +# +# Class created for tests purpose +# +# ################################ + + class CC: """ Concept class for test purpose @@ -484,13 +498,14 @@ class CC: # The other properties (concept, source, start and end) # are used in tests/parsers/parsers_utils.py to help creating helper objects - def __init__(self, concept, source=None, **kwargs): + def __init__(self, concept, source=None, exclude_body=False, **kwargs): self.concept_key = concept.key if isinstance(concept, Concept) else concept self.compiled = kwargs self.concept = concept if isinstance(concept, Concept) else None self.source = source # to use when the key is different from the sub str to search when filling start and stop self.start = None # for debug purpose, indicate where the concept starts self.end = None # for debug purpose, indicate where the concept ends + self.exclude_body = exclude_body def __eq__(self, other): if id(self) == id(other): @@ -499,13 +514,19 @@ class CC: if isinstance(other, Concept): if other.key != self.concept_key: return False - return self.compiled == other.compiled + if self.exclude_body: + to_compare = {k: v for k, v in other.compiled.items() if k != ConceptParts.BODY} + else: + to_compare = other.compiled + return self.compiled == to_compare if not isinstance(other, CC): return False - return self.concept_key == other.concept_key and \ - self.compiled == other.compiled + if self.concept_key != other.concept_key: + return False + + return self.compiled == other.compiled def __hash__(self): if self.concept: @@ -536,3 +557,29 @@ class CC: if self.end is None or end > self.end: self.end = end return self + + +@dataclass() +class CB: + """ + Concept with body only + Test class that test only the body of the concept + """ + concept: Union[str, Concept] + body: object + + def __eq__(self, other): + if isinstance(other, Concept): + key = self.concept if isinstance(self.concept, str) else self.concept.key + return key == other.key and self.body == other.body + + if not isinstance(other, CB): + return False + + return self.concept == other.concept and self.body == other.body + + def __hash__(self): + return hash((self.concept, self.body)) + + +simplec = namedtuple("concept", "name body") # for simple concept (tests purposes only) diff --git a/src/core/profiling.py b/src/core/profiling.py new file mode 100644 index 0000000..f4cdccc --- /dev/null +++ b/src/core/profiling.py @@ -0,0 +1,26 @@ +# ############################ +# from github: nealtodd/decorator.py +# ############################ + +import pstats +from cProfile import Profile + + +def profile(sort_args=None, print_args=None): + sort_args = sort_args or ['cumulative'] + print_args = print_args or [10] + profiler = Profile() + + def decorator(fn): + def inner(*args, **kwargs): + result = None + try: + result = profiler.runcall(fn, *args, **kwargs) + finally: + stats = pstats.Stats(profiler) + stats.strip_dirs().sort_stats(*sort_args).print_stats(*print_args) + return result + + return inner + + return decorator diff --git a/src/core/sheerka/ExecutionContext.py b/src/core/sheerka/ExecutionContext.py index 122c3c1..838314c 100644 --- a/src/core/sheerka/ExecutionContext.py +++ b/src/core/sheerka/ExecutionContext.py @@ -144,9 +144,9 @@ class ExecutionContext: def add_preprocess(self, name, **kwargs): preprocess = self.sheerka.new(BuiltinConcepts.EVALUATOR_PRE_PROCESS) - preprocess.set_prop("name", name) + preprocess.set_value("name", name) for k, v in kwargs.items(): - preprocess.set_prop(k, v) + preprocess.set_value(k, v) if not self.preprocess: self.preprocess = [] @@ -168,9 +168,9 @@ class ExecutionContext: if isinstance(self.obj, Concept): if self.obj.key == key: return self.obj - for prop in self.obj.props: - if prop == key: - value = self.obj.props[prop].value + for var_name in self.obj.values: + if var_name == key: + value = self.obj.get_value(var_name) if isinstance(value, Concept): return value @@ -180,16 +180,16 @@ class ExecutionContext: if k == key: return c - return self.sheerka.get(key) + return self.sheerka.get_by_key(key) def new_concept(self, key, **kwargs): # search in obj if self.obj: if self.obj.key == key: return self.sheerka.new_from_template(self.obj, key, **kwargs) - for prop in self.obj.props: - if prop == key: - value = self.obj.props[prop].value + for var_name in self.obj.values: + if var_name == key: + value = self.obj.get_value(var_name) if isinstance(value, Concept): return self.sheerka.new_from_template(value, key, **kwargs) else: @@ -327,6 +327,7 @@ class ExecutionContext: bag[prop] = getattr(self, prop) bag["status"] = self.get_status() bag["elapsed"] = self.elapsed + bag["elapsed_str"] = self.elapsed_str bag["digest"] = self.event.get_digest() if self.event else None return bag diff --git a/src/core/sheerka/Services/SheerkaCreateNewConcept.py b/src/core/sheerka/Services/SheerkaCreateNewConcept.py index 8b5ddf9..533d50f 100644 --- a/src/core/sheerka/Services/SheerkaCreateNewConcept.py +++ b/src/core/sheerka/Services/SheerkaCreateNewConcept.py @@ -1,9 +1,9 @@ +import core.utils from core.builtin_concepts import BuiltinConcepts, ErrorConcept from core.concept import Concept -from sdp.sheerkaDataProvider import SheerkaDataProviderDuplicateKeyError, SheerkaDataProviderRef -import core.utils +from sdp.sheerkaDataProvider_Old import SheerkaDataProviderDuplicateKeyError -BNF_NODE_PARSER_CLASS = "parsers.BnfNodeParser.BnfNodeParser" +BNF_NODE_PARSER_CLASS = "parsers.BnfNodeParser_Old.BnfNodeParser" BASE_NODE_PARSER_CLASS = "parsers.BaseNodeParser.BaseNodeParser" @@ -15,122 +15,80 @@ class SheerkaCreateNewConcept: def __init__(self, sheerka): self.sheerka = sheerka self.logger_name = self.create_new_concept.__name__ - self.base_lexer_parser = core.utils.get_class(BASE_NODE_PARSER_CLASS)("BaseNodeParser", 0) + self.bnp = core.utils.get_class(BASE_NODE_PARSER_CLASS) # BaseNodeParser def create_new_concept(self, context, concept: Concept): """ Adds a new concept to the system :param context: :param concept: DefConceptNode - :param logger :return: digest of the new concept """ + sheerka = self.sheerka + concept.init_key() - concepts_definitions = None init_bnf_ret_value = None - sdp = self.sheerka.sdp + cache_manager = sheerka.cache_manager - # checks for duplicate concepts - # TODO checks if it exists in cache first - - if sdp.exists(self.sheerka.CONCEPTS_BY_HASH_ENTRY, concept.get_definition_hash()): - error = SheerkaDataProviderDuplicateKeyError(self.sheerka.CONCEPTS_ENTRY + "." + concept.key, concept) - return self.sheerka.ret( + if cache_manager.exists(sheerka.CONCEPTS_BY_HASH_ENTRY, concept.get_definition_hash()): + error = SheerkaDataProviderDuplicateKeyError(sheerka.CONCEPTS_BY_KEY_ENTRY + "." + concept.key, + concept) + return sheerka.ret( self.logger_name, False, - self.sheerka.new(BuiltinConcepts.CONCEPT_ALREADY_DEFINED, body=concept), + sheerka.new(BuiltinConcepts.CONCEPT_ALREADY_DEFINED, body=concept), error.args[0]) # set id before saving in db - self.sheerka.set_id_if_needed(concept, False) + sheerka.set_id_if_needed(concept, False) - # add the BNF if known - if concept.bnf: - concepts_definitions = self.sheerka.get_concepts_definitions(context) - concepts_definitions[concept] = concept.bnf + # update the dictionary of concepts by first key + init_ret_value = self.bnp.get_concepts_by_first_keyword(context, [concept], True) + if not init_ret_value.status: + return sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value)) + concepts_by_first_keyword = init_ret_value.body - # check if it's a valid BNF or whether it breaks the known rules - bnf_lexer_parser = self.sheerka.parsers[BNF_NODE_PARSER_CLASS]() - with context.push(self.sheerka.name, desc=f"Initializing concept definition for {concept}") as sub_context: - sub_context.concepts[concept.key] = concept # the concept is not in the real cache yet - init_bnf_ret_value = bnf_lexer_parser.initialize(sub_context, concepts_definitions) - sub_context.add_values(return_values=init_bnf_ret_value) - if not init_bnf_ret_value.status: - return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_bnf_ret_value.value)) + # update resolved dictionary + init_ret_value = self.bnp.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword) + if not init_ret_value.status: + return sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value)) + resolved_concepts_by_first_keyword = init_ret_value.body # update concept definition by key - init_sya_ret_value = self.base_lexer_parser.initialize(context, [concept], use_sheerka=True) - if not init_sya_ret_value.status: - return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_sya_ret_value.value)) - concepts_by_first_keyword = init_sya_ret_value.body + # init_sya_ret_value = self.bnp.initialize(context, [concept], use_sheerka=True) + # if not init_sya_ret_value.status: + # return sheerka.ret(self.logger_name, False, ErrorConcept(init_sya_ret_value.value)) + # concepts_by_first_keyword = init_sya_ret_value.body concept.freeze_definition_hash() - # save the new concept in sdp - try: - # TODO : needs to make these calls atomic (or at least one single call) - # save the new concept - concept.metadata.full_serialization = True - result = sdp.add( - context.event.get_digest(), - self.sheerka.CONCEPTS_ENTRY, - concept, - use_ref=True) - concept.metadata.full_serialization = False + cache_manager.add_concept(concept) + cache_manager.put(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword) + cache_manager.put(sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword) - # update the concept (I hope that it's enough) - concept.set_origin(result.digest) - - # save it by id - sdp.add( - context.event.get_digest(), - self.sheerka.CONCEPTS_BY_ID_ENTRY, - SheerkaDataProviderRef(concept.id, result.digest)) - - # save it by name - sdp.add( - context.event.get_digest(), - self.sheerka.CONCEPTS_BY_NAME_ENTRY, - SheerkaDataProviderRef(concept.name, result.digest)) - - # records the hash - sdp.add( - context.event.get_digest(), - self.sheerka.CONCEPTS_BY_HASH_ENTRY, - SheerkaDataProviderRef(concept.get_definition_hash(), result.digest)) - - # update the definition table - if concepts_definitions is not None: - sdp.set( - context.event.get_digest(), - self.sheerka.CONCEPTS_DEFINITIONS_ENTRY, - bnf_lexer_parser.encode_grammar(init_bnf_ret_value.body), - use_ref=True) - self.sheerka.concepts_definitions_cache = None # invalidate cache - - # update the concepts by first keyword - sdp.set(context.event.get_digest(), - self.sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, - concepts_by_first_keyword) - - except SheerkaDataProviderDuplicateKeyError as error: - context.log_error("Failed to create a new concept.", who=self.logger_name) - return self.sheerka.ret( - self.logger_name, - False, - self.sheerka.new(BuiltinConcepts.CONCEPT_ALREADY_DEFINED, body=concept), - error.args[0]) - - # Updates the caches - self.sheerka.cache_by_key[concept.key] = sdp.get_safe(self.sheerka.CONCEPTS_ENTRY, concept.key) - self.sheerka.cache_by_name[concept.name] = sdp.get_safe(self.sheerka.CONCEPTS_BY_NAME_ENTRY, concept.name) - self.sheerka.cache_by_id[concept.id] = concept - if init_bnf_ret_value is not None and init_bnf_ret_value.status: - self.sheerka.concepts_grammars = init_bnf_ret_value.body - self.sheerka.concepts_by_first_keyword = concepts_by_first_keyword + if concept.bnf and init_bnf_ret_value is not None and init_bnf_ret_value.status: + sheerka.cache_manager.clear(sheerka.CONCEPTS_GRAMMARS_ENTRY) # process the return if needed - ret = self.sheerka.ret(self.logger_name, True, self.sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept)) + ret = sheerka.ret(self.logger_name, True, sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept)) return ret + + # def load_concepts_nodes_definitions(self, context): + # """ + # Gets from sdp what is need to parse nodes + # :return: + # """ + # sdp = self.sheerka.sdp + # + # concepts_by_first_keyword = sdp.get( + # self.sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, + # load_origin=False) or {} + # + # init_ret_value = self.bnp.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword) + # if not init_ret_value.status: + # return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value)) + # resolved_concepts_by_first_keyword = init_ret_value.body + # + # return concepts_by_first_keyword, resolved_concepts_by_first_keyword diff --git a/src/core/sheerka/Services/SheerkaDump.py b/src/core/sheerka/Services/SheerkaDump.py index 4636e6d..7d89c82 100644 --- a/src/core/sheerka/Services/SheerkaDump.py +++ b/src/core/sheerka/Services/SheerkaDump.py @@ -1,9 +1,10 @@ +import os +import pprint + from core.builtin_concepts import BuiltinConcepts from core.concept import Concept from core.sheerka.ExecutionContext import ExecutionContext from sdp.sheerkaDataProvider import SheerkaDataProvider, Event -import pprint -import os def get_pp(): @@ -17,7 +18,7 @@ class SheerkaDump: self.sheerka = sheerka def dump_concepts(self): - lst = self.sheerka.sdp.list(self.sheerka.CONCEPTS_ENTRY) + lst = self.sheerka.sdp.list(self.sheerka.CONCEPTS_BY_KEY_ENTRY) for item in lst: if hasattr(item, "__iter__"): for i in item: @@ -25,10 +26,6 @@ class SheerkaDump: else: self.sheerka.log.info(item) - def dump_definitions(self): - defs = self.sheerka.sdp.get(self.sheerka.CONCEPTS_DEFINITIONS_ENTRY) - self.sheerka.log.info(defs) - def dump_desc(self, *concept_names, eval=False): first = True event = Event(f"Dumping description", "") @@ -37,7 +34,7 @@ class SheerkaDump: if isinstance(concept_name, Concept): concepts = concept_name else: - concepts = self.sheerka.get(concept_name) + concepts = self.sheerka.get_by_key(concept_name) if self.sheerka.isinstance(concepts, BuiltinConcepts.UNKNOWN_CONCEPT): self.sheerka.log.error(f"Concept '{concept_name}' is unknown") return False @@ -59,8 +56,8 @@ class SheerkaDump: self.sheerka.log.info(f"where : {c.metadata.where}") if eval: self.sheerka.log.info(f"value : {value}") - for p in c.props: - self.sheerka.log.info(f"{p}: {c.get_prop(p)}") + for v in c.values: + self.sheerka.log.info(f"{v}: {c.get_value(v)}") else: self.sheerka.log.info("No property") diff --git a/src/core/sheerka/Services/SheerkaEvaluateConcept.py b/src/core/sheerka/Services/SheerkaEvaluateConcept.py index 222184f..9c4ad2e 100644 --- a/src/core/sheerka/Services/SheerkaEvaluateConcept.py +++ b/src/core/sheerka/Services/SheerkaEvaluateConcept.py @@ -1,6 +1,6 @@ from core.builtin_concepts import BuiltinConcepts -from core.concept import Concept, DoNotResolve, ConceptParts, InfiniteRecursionResolved from core.builtin_helpers import expect_one +from core.concept import Concept, DoNotResolve, ConceptParts, InfiniteRecursionResolved CONCEPT_EVALUATION_STEPS = [ BuiltinConcepts.BEFORE_EVALUATION, @@ -91,32 +91,27 @@ class SheerkaEvaluateConcept: concept.compiled[part_key] = res sub_context.add_values(return_values=res) - for prop, default_value in concept.metadata.props: - if prop in concept.compiled: + for var_name, default_value in concept.metadata.variables: + if var_name in concept.compiled: continue if default_value is None or not isinstance(default_value, str): continue if default_value.strip() == "": - concept.compiled[prop] = DoNotResolve(default_value) + concept.compiled[var_name] = DoNotResolve(default_value) else: - with context.push(desc=f"Initializing AST for property {prop}") as sub_context: + with context.push(desc=f"Initializing AST for property {var_name}") as sub_context: sub_context.add_inputs(source=default_value) to_parse = self.sheerka.ret(context.who, True, self.sheerka.new(BuiltinConcepts.USER_INPUT, body=default_value)) res = self.sheerka.execute(context, to_parse, steps) - concept.compiled[prop] = res + concept.compiled[var_name] = res sub_context.add_values(return_values=res) # Updates the cache of concepts when possible - if concept.key in self.sheerka.cache_by_key: - entry = self.sheerka.cache_by_key[concept.key] - if isinstance(entry, list): - # TODO : manage when there are multiple entries - pass - else: - self.sheerka.cache_by_key[concept.key].compiled = concept.compiled + if self.sheerka.has_id(concept.id): + self.sheerka.get_by_id(concept.id).compiled = concept.compiled def resolve(self, context, to_resolve, current_prop, current_concept, force_evaluation): if isinstance(to_resolve, DoNotResolve): @@ -198,7 +193,6 @@ class SheerkaEvaluateConcept: It means that if the where clause is True, will evaluate the body :param context: :param concept: - :param evaluate_body: If false, only evaluate body when necessary :return: value of the evaluation or error """ @@ -208,26 +202,26 @@ class SheerkaEvaluateConcept: self.initialize_concept_asts(context, concept) # to make sure of the order, it don't use ConceptParts.get_parts() - # props must be evaluated first, body must be evaluated before where + # variables must be evaluated first, body must be evaluated before where all_metadata_to_eval = self.choose_metadata_to_eval(context, concept) for metadata_to_eval in all_metadata_to_eval: - if metadata_to_eval == "props": - for prop_name in (p for p in concept.props if p in concept.compiled): - prop_ast = concept.compiled[prop_name] + if metadata_to_eval == "variables": + for var_name in (v for v in concept.variables() if v in concept.compiled): + prop_ast = concept.compiled[var_name] if isinstance(prop_ast, list): # Do not send the current concept for the properties - resolved = self.resolve_list(context, prop_ast, prop_name, None, True) + resolved = self.resolve_list(context, prop_ast, var_name, None, True) else: # Do not send the current concept for the properties - resolved = self.resolve(context, prop_ast, prop_name, None, True) + resolved = self.resolve(context, prop_ast, var_name, None, True) if isinstance(resolved, Concept) and not context.sheerka.is_success(resolved): - resolved.set_prop("concept", concept) # since current concept was not sent + resolved.set_value("concept", concept) # since current concept was not sent return resolved else: - concept.set_prop(prop_name, resolved) + concept.set_value(var_name, resolved) else: part_key = ConceptParts(metadata_to_eval) @@ -245,7 +239,7 @@ class SheerkaEvaluateConcept: if isinstance(resolved, Concept) and not context.sheerka.is_success(resolved): return resolved else: - concept.values[part_key] = self.get_infinite_recursion_resolution(resolved) or resolved + concept.set_value(part_key, self.get_infinite_recursion_resolution(resolved) or resolved) # # TODO : Validate the PRE condition @@ -253,8 +247,8 @@ class SheerkaEvaluateConcept: # validate where clause if ConceptParts.WHERE in concept.values: - where_value = concept.values[ConceptParts.WHERE] - if not (where_value is None or self.sheerka.value(where_value)): + where_value = concept.get_value(ConceptParts.WHERE) + if not (where_value is None or self.sheerka.objvalue(where_value)): return self.sheerka.new(BuiltinConcepts.WHERE_CLAUSE_FAILED, body=concept) # @@ -267,7 +261,7 @@ class SheerkaEvaluateConcept: def choose_metadata_to_eval(self, context, concept): if context.in_context(BuiltinConcepts.EVAL_BODY_REQUESTED): - return ["pre", "post", "props", "body", "where"] + return ["pre", "post", "variables", "body", "where"] metadata = ["pre", "post"] if context.in_context(BuiltinConcepts.EVAL_WHERE_REQUESTED) or concept.metadata.need_validation: @@ -310,9 +304,9 @@ class SheerkaEvaluateConcept: if not isinstance(return_value.body.source, str): continue - for prop_name in (p[0] for p in concept.metadata.props): - if prop_name in return_value.body.source: - needed.append("props") + for var_name in (p[0] for p in concept.metadata.variables): + if var_name in return_value.body.source: + needed.append("variables") break if "self" in return_value.body.source: diff --git a/src/core/sheerka/Services/SheerkaExecute.py b/src/core/sheerka/Services/SheerkaExecute.py index 7aecd8e..de8957e 100644 --- a/src/core/sheerka/Services/SheerkaExecute.py +++ b/src/core/sheerka/Services/SheerkaExecute.py @@ -230,12 +230,12 @@ class SheerkaExecute: for preprocess in context.preprocess: for e in parsers_or_evaluators: - if self.matches(e.name, preprocess.get_prop("name")): - for prop, value in preprocess.props.items(): - if prop == "name": + if self.matches(e.name, preprocess.get_value("name")): + for var_name in preprocess.values: + if var_name == "name": continue - if hasattr(e, prop): - setattr(e, prop, value.value) + if hasattr(e, var_name): + setattr(e, var_name, preprocess.get_value(var_name)) return parsers_or_evaluators[0] if single_one else parsers_or_evaluators @staticmethod diff --git a/src/core/sheerka/Services/SheerkaModifyConcept.py b/src/core/sheerka/Services/SheerkaModifyConcept.py index f9033bd..21938a9 100644 --- a/src/core/sheerka/Services/SheerkaModifyConcept.py +++ b/src/core/sheerka/Services/SheerkaModifyConcept.py @@ -1,5 +1,4 @@ from core.builtin_concepts import BuiltinConcepts -from sdp.sheerkaDataProvider import SheerkaDataProviderRef class SheerkaModifyConcept: @@ -8,52 +7,35 @@ class SheerkaModifyConcept: self.logger_name = self.modify_concept.__name__ def modify_concept(self, context, concept): + old_version = self.sheerka.get_by_id(concept.id) - sdp = self.sheerka.sdp - - try: - # modify the entry - concept.metadata.full_serialization = True - result = sdp.modify( - context.event.get_digest(), - self.sheerka.CONCEPTS_ENTRY, - concept.key, - concept) - concept.metadata.full_serialization = False - - # update reference entry - sdp.modify( - context.event.get_digest(), - self.sheerka.CONCEPTS_BY_ID_ENTRY, - concept.id, - SheerkaDataProviderRef(concept.id, result.digest, concept.get_origin())) - - # update name entry - sdp.modify( - context.event.get_digest(), - self.sheerka.CONCEPTS_BY_NAME_ENTRY, - concept.name, - SheerkaDataProviderRef(concept.name, result.digest, concept.get_origin())) - - # update the hash entry - sdp.modify( - context.event.get_digest(), - self.sheerka.CONCEPTS_BY_HASH_ENTRY, - concept.get_original_definition_hash(), - SheerkaDataProviderRef(concept.get_definition_hash(), result.digest, concept.get_origin())) - - except IndexError as error: - context.log_error(f"Failed to update concept '{concept}'.", who=self.logger_name) + if old_version is None: + # nothing found in cache return self.sheerka.ret( - self.logger_name, - False, - self.sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=concept), - error.args[0]) + self.logger_name, False, + self.sheerka.new( + BuiltinConcepts.UNKNOWN_CONCEPT, + body=[("key", concept.key), ("id", concept.id)])) - # update cache - self.sheerka.cache_by_key[concept.key] = sdp.get_safe(self.sheerka.CONCEPTS_ENTRY, concept.key) - self.sheerka.cache_by_name[concept.name] = sdp.get_safe(self.sheerka.CONCEPTS_BY_NAME_ENTRY, concept.name) - self.sheerka.cache_by_id[concept.id] = concept + if not self.sheerka.is_success(old_version) and concept.key != old_version.key: + # an error concept is returned + return self.sheerka.ret( + self.logger_name, False, + old_version) + + if old_version == concept: + # the concept is not modified + return self.sheerka.ret( + self.logger_name, False, + self.sheerka.new( + BuiltinConcepts.CONCEPT_ALREADY_DEFINED, + body=concept)) + + self.sheerka.cache_manager.update_concept(old_version, concept) + + # TODO : update concept by first keyword + # TODO : update resolved by first keyword + # TODO : update concets grammars ret = self.sheerka.ret(self.logger_name, True, self.sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept)) return ret diff --git a/src/core/sheerka/Services/SheerkaSetsManager.py b/src/core/sheerka/Services/SheerkaSetsManager.py index 84fd182..8d188a2 100644 --- a/src/core/sheerka/Services/SheerkaSetsManager.py +++ b/src/core/sheerka/Services/SheerkaSetsManager.py @@ -1,7 +1,7 @@ -from core.ast.nodes import python_to_concept -from core.builtin_concepts import BuiltinConcepts, ErrorConcept -from core.concept import Concept, ConceptParts import core.builtin_helpers +from core.ast.nodes import python_to_concept +from core.builtin_concepts import BuiltinConcepts +from core.concept import Concept, ConceptParts GROUP_PREFIX = 'All_' @@ -20,13 +20,16 @@ class SheerkaSetsManager: :return: """ - context.log(f"Setting that concept {concept} is a {concept_set}", who=self.logger_name) + context.log(f"Setting concept {concept} is a {concept_set}", who=self.logger_name) - isa = [] if BuiltinConcepts.ISA not in concept.props else concept.get_prop(BuiltinConcepts.ISA) - if concept_set not in isa: - isa.append(concept_set) + if BuiltinConcepts.ISA in concept.metadata.props and concept_set in concept.metadata.props[BuiltinConcepts.ISA]: + return self.sheerka.ret( + self.logger_name, + False, + self.sheerka.new(BuiltinConcepts.CONCEPT_ALREADY_IN_SET, body=concept, concept_set=concept_set)) + + concept.add_prop(BuiltinConcepts.ISA, concept_set) - concept.set_prop(BuiltinConcepts.ISA, isa) res = self.sheerka.modify_concept(context, concept) if not res.status: return res @@ -47,32 +50,34 @@ class SheerkaSetsManager: assert concept.id assert concept_set.id - try: - result = self.sheerka.sdp.add_unique(context.event.get_digest(), GROUP_PREFIX + concept_set.id, concept.id) - if result.already_exists: # concept already in set - return self.sheerka.ret( - self.logger_name, - False, - self.sheerka.new(BuiltinConcepts.CONCEPT_ALREADY_IN_SET, body=concept, concept_set=concept_set)) - else: - return self.sheerka.ret(self.logger_name, True, self.sheerka.new(BuiltinConcepts.SUCCESS)) - except Exception as error: - context.log_error("Failed to add to set.", who=self.logger_name) - return self.sheerka.ret(self.logger_name, False, ErrorConcept(error), error.args[0]) + set_elements = self.sheerka.cache_manager.get(self.sheerka.CONCEPTS_GROUPS_ENTRY, concept_set.id) + if set_elements and concept.id in set_elements: + return self.sheerka.ret( + self.logger_name, + False, + self.sheerka.new(BuiltinConcepts.CONCEPT_ALREADY_IN_SET, body=concept, concept_set=concept_set)) + + self.sheerka.cache_manager.put(self.sheerka.CONCEPTS_GROUPS_ENTRY, concept_set.id, concept.id) + return self.sheerka.ret(self.logger_name, True, self.sheerka.new(BuiltinConcepts.SUCCESS)) def add_concepts_to_set(self, context, concepts, concept_set): """Adding multiple concepts at the same time""" context.log(f"Adding concepts {concepts} to set {concept_set}", who=self.logger_name) - previous = self.sheerka.sdp.get_safe(GROUP_PREFIX + concept_set.id) + already_in_set = [] + for concept in concepts: + res = self.add_concept_to_set(context, concept, concept_set) + if self.sheerka.isinstance(res.body, BuiltinConcepts.CONCEPT_ALREADY_IN_SET): + already_in_set.append(res.body.body) - new_ids = [c.id for c in concepts] if previous is None else previous + [c.id for c in concepts] - try: - self.sheerka.sdp.set(context.event.get_digest(), GROUP_PREFIX + concept_set.id, new_ids) - return self.sheerka.ret(self.logger_name, True, self.sheerka.new(BuiltinConcepts.SUCCESS)) - except Exception as error: - context.log_error("Failed to add to set.", who=self.logger_name) - return self.sheerka.ret(self.logger_name, False, ErrorConcept(error), error.args[0]) + if already_in_set: + body = self.sheerka.new(BuiltinConcepts.CONCEPT_ALREADY_IN_SET, + body=already_in_set, + concept_set=concept_set) + else: + body = self.sheerka.new(BuiltinConcepts.SUCCESS) + + return self.sheerka.ret(self.logger_name, len(already_in_set) != len(concepts), body) def get_set_elements(self, context, concept): """ @@ -83,38 +88,41 @@ class SheerkaSetsManager: :return: """ - # noinspection PyShadowingNames - def _get_set_elements(context, concept, sub_concept): - if not (isinstance(sub_concept, Concept) and sub_concept.id): + def _get_set_elements(sub_concept): + if not self.isaset(context, sub_concept): return self.sheerka.new(BuiltinConcepts.NOT_A_SET, body=concept) - ids = self.sheerka.sdp.get_safe(GROUP_PREFIX + sub_concept.id) - if ids: - if concept.metadata.where: - new_condition = self._validate_where_clause(concept) - if not new_condition: - return self.sheerka.new(BuiltinConcepts.WHERE_CLAUSE_FAILED, body=concept) - else: - # This methods sucks, but I don't have enough tools (like proper AST manipulation functions) - # to do it properly now. It will be enhanced later - concepts = self._get_concepts(context, ids, True) - globals_ = {"xx__concepts__xx": concepts, "sheerka": self.sheerka} - locals_ = {} - exec(new_condition, globals_, locals_) - return locals_["result"] - else: - return self._get_concepts(context, ids, False) + # first, try to see if sub_context has it's own group entry + ids = self.sheerka.cache_manager.get(self.sheerka.CONCEPTS_GROUPS_ENTRY, sub_concept.id) + concepts = self._get_concepts(context, ids, True) - # it may be a concept that references a set - if not sub_concept.metadata.is_evaluated: - with context.push(desc=f"Evaluating concept {sub_concept}") as sub_context: - sub_context.local_hints.add(BuiltinConcepts.EVAL_BODY_REQUESTED) - evaluated = self.sheerka.evaluate_concept(sub_context, sub_concept) - if evaluated.key != concept.key: - return False - return _get_set_elements(context, concept, sub_concept.body) + # aggregate with en entries from its body + sub_concept = core.builtin_helpers.ensure_evaluated(context, sub_concept) + if not self.sheerka.is_success(sub_concept): + return sub_concept - return _get_set_elements(context, concept, concept) + if self.isaset(context, sub_concept.body): + other_concepts = _get_set_elements(sub_concept.body) + if not self.sheerka.is_success(other_concepts): + return other_concepts + concepts.extend(other_concepts) + + # apply the where clause if any + if sub_concept.metadata.where: + new_condition = self._validate_where_clause(sub_concept) + if not new_condition: + return self.sheerka.new(BuiltinConcepts.WHERE_CLAUSE_FAILED, body=sub_concept) + + # This methods sucks, but I don't have enough tools (like proper AST manipulation functions) + # to do it properly now. It will be enhanced later + globals_ = {"xx__concepts__xx": concepts, "sheerka": self.sheerka} + locals_ = {} + exec(new_condition, globals_, locals_) + concepts = locals_["result"] + + return concepts + + return _get_set_elements(concept) def isinset(self, a, b): """ @@ -135,17 +143,15 @@ class SheerkaSetsManager: if not (a.id and b.id): return False - if self.sheerka.sdp.exists(GROUP_PREFIX + b.id, a.id): - return True - - return False + group_elements = self.sheerka.cache_manager.get(self.sheerka.CONCEPTS_GROUPS_ENTRY, b.id) + return group_elements and a.id in group_elements def isa(self, a, b): - if BuiltinConcepts.ISA not in a.props: + if BuiltinConcepts.ISA not in a.metadata.props: return False - for c in a.get_prop(BuiltinConcepts.ISA): + for c in a.metadata.props[BuiltinConcepts.ISA]: if c == b: return True if self.isa(c, b): @@ -163,21 +169,19 @@ class SheerkaSetsManager: """""" if not (isinstance(concept, Concept) and concept.id): - return None + return False + + # check if it has a group + # TODO: use cache instead of directly requesting sdp + if self.sheerka.cache_manager.get(self.sheerka.CONCEPTS_GROUPS_ENTRY, concept.id): + return True # it may be a concept that references a set - if not concept.metadata.is_evaluated: - with context.push(desc=f"Evaluating concept {concept}") as sub_context: - sub_context.local_hints.add(BuiltinConcepts.EVAL_BODY_REQUESTED) - evaluated = self.sheerka.evaluate_concept(sub_context, concept) - if evaluated.key != concept.key: - return False + concept = core.builtin_helpers.ensure_evaluated(context, concept) + if not context.sheerka.is_success(concept): + return False - if concept.body: - return self.isaset(context, concept.body) - - res = self.sheerka.sdp.get_safe(GROUP_PREFIX + concept.id) - return res is not None + return self.isaset(context, concept.body) def _validate_where_clause(self, concept): python_parser_result = [r for r in concept.compiled[ConceptParts.WHERE] if r.who == "parsers.Python"] @@ -190,7 +194,7 @@ class SheerkaSetsManager: if len(names) != 1 or names[0] != concept.metadata.body: return None - condition = concept.metadata.where.replace(concept.metadata.body, "sheerka.value(x)") + condition = concept.metadata.where.replace(concept.metadata.body, "sheerka.objvalue(x)") expression = f""" result=[] for x in xx__concepts__xx: @@ -218,10 +222,11 @@ for x in xx__concepts__xx: result = [] with context.push(desc=f"Evaluating concepts of a set") as sub_context: + sub_context.add_inputs(ids=ids) sub_context.local_hints.add(BuiltinConcepts.EVAL_BODY_REQUESTED) for element_id in ids: concept = self.sheerka.get_by_id(element_id) evaluated = self.sheerka.evaluate_concept(sub_context, concept) result.append(evaluated) - + sub_context.add_inputs(return_value=result) return result diff --git a/src/core/sheerka/Services/SheerkaVariableManager.py b/src/core/sheerka/Services/SheerkaVariableManager.py index fabc033..558fc62 100644 --- a/src/core/sheerka/Services/SheerkaVariableManager.py +++ b/src/core/sheerka/Services/SheerkaVariableManager.py @@ -20,35 +20,29 @@ class Variable: class SheerkaVariableManager: - VARIABLES_ENTRY = "All_Variables" # to store all the concepts def __init__(self, sheerka): self.sheerka = sheerka def record(self, context, who, key, value): - """Persist a variable""" - # first check if there is a previous version of the variable - try: - old = self.sheerka.sdp.get(self.VARIABLES_ENTRY, who + "." + key) - if old.value == value: - return + """ - parent = getattr(old, Serializer.ORIGIN) - except IndexError: - parent = None + :param context: + :param who: entity that owns the key (acts as a namespace) + :param key: + :param value: + :return: + """ - variable = Variable(context.event.get_digest(), who, key, value, [parent] if parent else None) - self.sheerka.sdp.set(context.event.get_digest(), self.VARIABLES_ENTRY, variable, use_ref=True) + variable = Variable(context.event.get_digest(), who, key, value, None) + self.sheerka.cache_manager.put(self.sheerka.VARIABLES_ENTRY, variable.get_key(), variable) def load(self, who, key): - variable = self.sheerka.sdp.get_safe(self.VARIABLES_ENTRY, who + "." + key) + variable = self.sheerka.cache_manager.get(self.sheerka.VARIABLES_ENTRY, who + "." + key) if variable is None: return None return variable.value def delete(self, context, who, key): - self.sheerka.sdp.remove( - context.event.get_digest(), - self.VARIABLES_ENTRY, - lambda _key, _var: _key == who + "." + key) + self.sheerka.cache_manager.delete(self.sheerka.VARIABLES_ENTRY, who + "." + key) diff --git a/src/core/sheerka/Sheerka.py b/src/core/sheerka/Sheerka.py index d93d850..d0dafe2 100644 --- a/src/core/sheerka/Sheerka.py +++ b/src/core/sheerka/Sheerka.py @@ -2,6 +2,12 @@ import logging import core.builtin_helpers import core.utils +from cache.Cache import Cache +from cache.CacheManager import CacheManager +from cache.DictionaryCache import DictionaryCache +from cache.IncCache import IncCache +from cache.ListIfNeededCache import ListIfNeededCache +from cache.SetCache import SetCache from core.builtin_concepts import BuiltinConcepts, ErrorConcept, ReturnValueConcept, BuiltinErrors, BuiltinUnique, \ UnknownConcept from core.concept import Concept, ConceptParts, PROPERTIES_FOR_NEW @@ -18,8 +24,7 @@ from core.sheerka_logger import console_handler from printer.SheerkaPrinter import SheerkaPrinter from sdp.sheerkaDataProvider import SheerkaDataProvider, Event -CONCEPT_LEXER_PARSER_CLASS = "parsers.BnfNodeParser.BnfNodeParser" -BNF_PARSER_CLASS = "parsers.BnfParser.BnfParser" +BASE_NODE_PARSER_CLASS = "parsers.BaseNodeParser.BaseNodeParser" CONCEPTS_FILE = "_concepts.txt" @@ -28,45 +33,36 @@ class Sheerka(Concept): Main controller for the project """ - CONCEPTS_ENTRY = "All_Concepts" # to store all the concepts - CONCEPTS_BY_ID_ENTRY = "Concepts_By_ID" + CONCEPTS_BY_ID_ENTRY = "Concepts_By_ID" # to store all the concepts + CONCEPTS_BY_KEY_ENTRY = "Concepts_By_Key" CONCEPTS_BY_NAME_ENTRY = "Concepts_By_Name" CONCEPTS_BY_HASH_ENTRY = "Concepts_By_Hash" # store hash of concepts definitions (not values) - CONCEPTS_DEFINITIONS_ENTRY = "Concepts_Definitions" # to store definitions (bnf) of concepts - CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "Concepts_By_First_Keyword" - CONCEPTS_SYA_DEFINITION_ENTRY = "Concepts_Sya_Definitions" + CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "Concepts_By_First_Keyword" + RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "Resolved_Concepts_By_First_Keyword" + CONCEPTS_SYA_DEFINITION_ENTRY = "Concepts_Sya_Definitions" + RESOLVED_CONCEPTS_SYA_DEFINITION_ENTRY = "Resolved_Concepts_Sya_Definitions" + CONCEPTS_GRAMMARS_ENTRY = "Concepts_Grammars" + + CONCEPTS_GROUPS_ENTRY = "Concepts_Groups" + VARIABLES_ENTRY = "Variables" # entry for admin or internal variables + + CONCEPTS_KEYS_ENTRY = "Concepts_Keys" BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts" # sequential key for builtin concepts USER_CONCEPTS_KEYS = "User_Concepts" # sequential key for user defined concepts - def __init__(self, skip_builtins_in_db=False, debug=False, loggers=None): + def __init__(self, cache_only=False, debug=False, loggers=None): self.init_logging(debug, loggers) self.loggers = loggers super().__init__(BuiltinConcepts.SHEERKA, True, True, BuiltinConcepts.SHEERKA) self.log.debug("Starting Sheerka.") - # cache of the most used concepts - # Note that these are only templates - # They are used as a footprint for instantiation - # Except of source when the concept is supposed to be unique - # key is the key of the concept (not the name or the id) - self.cache_by_key = {} - self.cache_by_id = {} - self.cache_by_name = {} + self.bnp = None # reference to the BaseNodeParser class (to compute first keyword token) - # cache for concept definitions, - # Primarily used for unit test that does not have access to sdp - self.concepts_definitions_cache = {} - - # - # cache for concepts grammars - # a grammar is a resolved BNF - self.concepts_grammars = {} - - # cache for SYA concepts - self.concepts_by_first_keyword = {} - self.sya_definitions = {} + # # Cache for concepts grammars + # # To be shared between BNFNode parsers instances + # self.concepts_grammars = {} # a concept can be instantiated # ex: File is a concept, but File('foo.txt') is an instance @@ -78,6 +74,8 @@ class Sheerka(Concept): self.rules = [] self.sdp: SheerkaDataProvider = None # SheerkaDataProvider + self.cache_manager = CacheManager(cache_only) + self.builtin_cache = {} # cache for builtin concepts self.parsers = {} # cache for builtin parsers self.evaluators = [] # cache for builtin evaluators @@ -85,8 +83,6 @@ class Sheerka(Concept): self.evaluators_prefix: str = None self.parsers_prefix: str = None - self.skip_builtins_in_db = skip_builtins_in_db - self.execute_handler = SheerkaExecute(self) self.create_new_concept_handler = SheerkaCreateNewConcept(self) self.modify_concept_handler = SheerkaModifyConcept(self) @@ -100,36 +96,65 @@ class Sheerka(Concept): self.during_restore = False self._builtins_classes_cache = None - def initialize(self, root_folder: str = None): + self.save_execution_context = True + + @property + def resolved_concepts_by_first_keyword(self): + """ + We return the cache as we will be interested by statistics + :return: + """ + return self.cache_manager.caches[self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY].cache + + @property + def resolved_sya_def(self): + """ + + :return: + """ + return self.cache_manager.caches[self.RESOLVED_CONCEPTS_SYA_DEFINITION_ENTRY].cache + + @property + def concepts_grammars(self): + return self.cache_manager.caches[self.CONCEPTS_GRAMMARS_ENTRY].cache + + def initialize(self, root_folder: str = None, save_execution_context=True): """ Starting Sheerka Loads the current configuration Notes that when it's the first time, it also create the needed working folders :param root_folder: root configuration folder + :param save_execution_context: :return: ReturnValue(Success or Error) """ + self.save_execution_context = save_execution_context + try: from sheerkapickle.sheerka_handlers import initialize_pickle_handlers initialize_pickle_handlers() self.sdp = SheerkaDataProvider(root_folder, self) - if self.sdp.first_time: - self.sdp.set_key(self.USER_CONCEPTS_KEYS, 1000) + self.initialize_caching() - event = Event("Initializing Sheerka.", user=self.name) + event = Event("Initializing Sheerka.", user_id=self.name) self.sdp.save_event(event) with ExecutionContext(self.key, event, self, "Initializing Sheerka.", self.init_log) as exec_context: + if self.sdp.first_time: + self.first_time_initialisation(exec_context) - self.initialize_builtin_concepts() self.initialize_builtin_parsers() self.initialize_builtin_evaluators() - self.initialize_bnf_parsing(exec_context) - self.initialize_sya_parsing() + self.initialize_builtin_concepts() + self.initialize_concept_node_parsing(exec_context) res = ReturnValueConcept(self, True, self) exec_context.add_values(return_values=res) - if not self.skip_builtins_in_db: + + if self.cache_manager.is_dirty: + self.cache_manager.commit(exec_context) + + if save_execution_context: self.sdp.save_result(exec_context, is_admin=True) self.init_log.debug(f"Sheerka successfully initialized") @@ -138,6 +163,59 @@ class Sheerka(Concept): return res + def initialize_caching(self): + + def params(cache_name): + return { + 'default': lambda k: self.sdp.get(cache_name, k), + 'extend_exists': lambda k: self.sdp.exists(cache_name, k) + } + + cache = IncCache(default=lambda k: self.sdp.get(self.CONCEPTS_KEYS_ENTRY, k)) + self.cache_manager.register_cache(self.CONCEPTS_KEYS_ENTRY, cache) + + register_concept_cache = self.cache_manager.register_concept_cache + + cache = Cache(**params(self.CONCEPTS_BY_ID_ENTRY)) + register_concept_cache(self.CONCEPTS_BY_ID_ENTRY, cache, lambda c: c.id, True) + + cache = ListIfNeededCache(**params(self.CONCEPTS_BY_KEY_ENTRY)) + register_concept_cache(self.CONCEPTS_BY_KEY_ENTRY, cache, lambda c: c.key, True) + + cache = ListIfNeededCache(**params(self.CONCEPTS_BY_NAME_ENTRY)) + register_concept_cache(self.CONCEPTS_BY_NAME_ENTRY, cache, lambda c: c.name, True) + + cache = ListIfNeededCache(**params(self.CONCEPTS_BY_HASH_ENTRY)) + register_concept_cache(self.CONCEPTS_BY_HASH_ENTRY, cache, lambda c: c.get_definition_hash(), True) + + cache = DictionaryCache(default=lambda k: self.sdp.get(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, k)) + self.cache_manager.register_cache(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, cache) + self.cache_manager.get(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, None) # to init from sdp + + cache = DictionaryCache(default=lambda k: self.sdp.get(self.CONCEPTS_SYA_DEFINITION_ENTRY, k)) + self.cache_manager.register_cache(self.CONCEPTS_SYA_DEFINITION_ENTRY, cache) + self.cache_manager.get(self.CONCEPTS_SYA_DEFINITION_ENTRY, None) # to init from sdp + + cache = SetCache(default=lambda k: self.sdp.get(self.CONCEPTS_GROUPS_ENTRY, k)) + self.cache_manager.register_cache(self.CONCEPTS_GROUPS_ENTRY, cache) + + cache = Cache(default=lambda k: self.sdp.get(self.VARIABLES_ENTRY, k)) + self.cache_manager.register_cache(self.VARIABLES_ENTRY, cache, True, True) + + cache = DictionaryCache() + self.cache_manager.register_cache(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, cache, persist=False) + + cache = DictionaryCache() + self.cache_manager.register_cache(self.RESOLVED_CONCEPTS_SYA_DEFINITION_ENTRY, cache, persist=False) + + cache = Cache() + self.cache_manager.register_cache(self.CONCEPTS_GRAMMARS_ENTRY, cache, persist=False) + + def first_time_initialisation(self, context): + + self.cache_manager.put(self.CONCEPTS_KEYS_ENTRY, self.USER_CONCEPTS_KEYS, 1000) + self.variable_handler.record(context, self.name, "save_execution_context", True) + def initialize_builtin_concepts(self): """ Initializes the builtin concepts @@ -160,18 +238,16 @@ class Sheerka(Concept): if not concept.metadata.is_unique and str(key) in builtins_classes: self.builtin_cache[key] = builtins_classes[str(key)] - if not self.skip_builtins_in_db: - from_db = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.metadata.key) - if from_db is None: - self.init_log.debug(f"'{concept.name}' concept is not found in db. Adding.") - self.set_id_if_needed(concept, True) - concept.metadata.full_serialization = True - self.sdp.add("init", self.CONCEPTS_ENTRY, concept, use_ref=True) - else: - self.init_log.debug(f"Found concept '{from_db}' in db. Updating.") - concept.update_from(from_db) + from_db = self.cache_manager.get(self.CONCEPTS_BY_KEY_ENTRY, concept.metadata.key) + if from_db is None: + self.init_log.debug(f"'{concept.name}' concept is not found in db. Adding.") + self.set_id_if_needed(concept, True) + self.cache_manager.add_concept(concept) + else: + self.init_log.debug(f"Found concept '{from_db}' in db. Updating.") + concept.update_from(from_db) - self.add_in_cache(concept) + return def initialize_builtin_parsers(self): """ @@ -187,17 +263,23 @@ class Sheerka(Concept): if parser.__module__ == base_class.__module__: continue - if parser.__module__ in modules_to_skip: - continue - qualified_name = core.utils.get_full_qualified_name(parser) self.init_log.debug(f"Adding builtin parser '{qualified_name}'") temp_result[qualified_name] = parser + # keep a reference to base_node_parser + self.bnp = temp_result[BASE_NODE_PARSER_CLASS] + # Now we sort the parser by name. # It's not important for the logic of their usage as they have their priority anyway, # We do that for the unit tests. They are to complicated to write otherwise for name in sorted(temp_result.keys()): + parser = temp_result[name] + + if parser.__module__ in modules_to_skip: + # base node parser module does not contains any valid parser + continue + self.parsers[name] = temp_result[name] def initialize_builtin_evaluators(self): @@ -214,55 +296,39 @@ class Sheerka(Concept): self.init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'") self.evaluators.append(evaluator) - def initialize_bnf_parsing(self, execution_context): - self.init_log.debug("Initializing concepts grammars.") - definitions = self.get_concepts_definitions(execution_context) + def initialize_concept_node_parsing(self, context): + self.init_log.debug("Initializing concept node parsing.") - if definitions is None: - self.init_log.debug("No BNF defined") - return + concepts_by_first_keyword = self.cache_manager.copy(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) + res = self.bnp.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword) + self.cache_manager.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, res.body) - lexer_parser = self.parsers[CONCEPT_LEXER_PARSER_CLASS]() - ret_val = lexer_parser.initialize(execution_context, definitions) - if not ret_val.status: - self.init_log.error("Failed to initialize concepts definitions " + str(ret_val.body)) - return + # sya = self.bnf.resolve_sya_associativity_and_precedence() + # self.cache_manager.put(self.RESOLVED_CONCEPTS_SYA_DEFINITION_ENTRY, sya) + # + # + # self.concepts_by_first_keyword, \ + # self.resolved_concepts_by_first_keyword = \ + # self.create_new_concept_handler.load_concepts_nodes_definitions(context) - self.concepts_grammars = lexer_parser.concepts_grammars + # self.concepts_by_first_keyword = self.sdp.get_safe( + # self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, + # load_origin=False) or {} + # + # self.sya_definitions = self.sdp.get_safe( + # self.CONCEPTS_SYA_DEFINITION_ENTRY, + # load_origin=False) or {} + # + # init_ret_value = self.bnp.resolve_concepts_by_first_keyword(self, self.concepts_by_first_keyword) + # if not init_ret_value.status: + # return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value)) + # self.resolved_concepts_by_first_keyword = init_ret_value.body - def initialize_sya_parsing(self): - self.init_log.debug("Initializing sya definitions.") - - self.concepts_by_first_keyword = self.sdp.get_safe( - self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, - load_origin=False) or {} - - self.sya_definitions = self.sdp.get_safe( - self.CONCEPTS_SYA_DEFINITION_ENTRY, - load_origin=False) or {} - - def reset(self): - self.reset_cache() - self.concepts_by_first_keyword = {} - self.concepts_grammars = {} - self.sya_definitions = {} + def reset(self, cache_only=False): + self.cache_manager.clear() + self.cache_manager.cache_only = cache_only + self.printer_handler.reset() self.sdp.reset() - self.sdp.set_key(self.USER_CONCEPTS_KEYS, 1000) - - def reset_cache(self, filter_to_use=None): - """ - reset the different cache that exists - :param filter_to_use: - :return: - """ - if filter_to_use is None: - self.cache_by_key = {} - self.cache_by_id = {} - self.cache_by_name = {} - else: - raise NotImplementedError() - - return self def evaluate_user_input(self, text: str, user_name="kodjo"): """ @@ -294,7 +360,10 @@ class Sheerka(Concept): ret = self.execute(execution_context, [user_input, reduce_requested], steps) execution_context.add_values(return_values=ret) - if not self.skip_builtins_in_db: + if self.cache_manager.is_dirty: + self.cache_manager.commit(execution_context) + + if self.save_execution_context and self.variable_handler.load(self.name, "save_execution_context"): self.sdp.save_result(execution_context) # # hack to save valid concept definition @@ -302,6 +371,8 @@ class Sheerka(Concept): # if len(ret) == 1 and ret[0].status and self.isinstance(ret[0].value, BuiltinConcepts.NEW_CONCEPT): # with open(CONCEPTS_FILE, "a") as f: # f.write(text + "\n") + + self._last_execution = execution_context return ret def print(self, result, instructions=None): @@ -343,8 +414,8 @@ class Sheerka(Concept): if obj.metadata.id is not None: return - entry = self.BUILTIN_CONCEPTS_KEYS if is_builtin else self.USER_CONCEPTS_KEYS - obj.metadata.id = self.sdp.get_next_key(entry) + key = self.BUILTIN_CONCEPTS_KEYS if is_builtin else self.USER_CONCEPTS_KEYS + obj.metadata.id = str(self.cache_manager.get(self.CONCEPTS_KEYS_ENTRY, key)) self.log.debug(f"Setting id '{obj.metadata.id}' to concept '{obj.metadata.name}'.") def create_new_concept(self, context, concept: Concept): @@ -380,21 +451,25 @@ class Sheerka(Concept): """ return self.sets_handler.set_isa(context, concept, concept_set) - def set_sya_def(self, context, list_of_def): + def force_sya_def(self, context, list_of_def): """ Set the precedence and/or the associativity of a concept + FOR TESTS PURPOSE. TO REMOVE EVENTUALLY :param context: :param list_of_def list of tuple(concept_id, precedence (int), SyaAssociativity) :return: """ # validate the entries + # If one entry is an invalid concept, rollback everything for concept_id, precedence, associativity in list_of_def: if concept_id == BuiltinConcepts.UNKNOWN_CONCEPT: return self.ret(self.name, False, self.new(BuiltinConcepts.ERROR, body=f"Concept {concept_id} is not known")) + sya_def = self.cache_manager.copy(self.RESOLVED_CONCEPTS_SYA_DEFINITION_ENTRY) or {} + # update the definitions for concept_id, precedence, associativity in list_of_def: if precedence is None and associativity is None: @@ -403,12 +478,10 @@ class Sheerka(Concept): except KeyError: pass else: - self.sya_definitions[concept_id] = (precedence, associativity.value) + sya_def[concept_id] = (precedence, associativity) - # then save - self.sdp.set(context.event.get_digest(), - self.CONCEPTS_SYA_DEFINITION_ENTRY, - self.sya_definitions) + # put in cache + self.cache_manager.put(self.RESOLVED_CONCEPTS_SYA_DEFINITION_ENTRY, False, sya_def) return self.ret(self.name, True, self.new(BuiltinConcepts.SUCCESS)) @@ -448,122 +521,108 @@ class Sheerka(Concept): if concept.key is None: raise KeyError() - self.cache_by_key[concept.key] = concept - - if concept.id: - self.cache_by_id[concept.id] = concept + self.cache_manager.add_concept(concept) return concept - def get(self, concept_key, concept_id=None): + # + # def get(self, concept_key, concept_id=None): + # """ + # Tries to find a concept + # What is return must be used a template for another concept. + # You must not modify the returned concept + # :param concept_key: key of the concept + # :param concept_id: when multiple concepts with the same key, use the id + # :return: + # """ + # + # by_key = self.get_by_key(concept_key) + # if self.is_known(by_key): + # return by_key + # + # # else return by name + # by_name = self.get_by_name(concept_key) + # if self.is_known(by_name): + # return by_name + # + # return by_key # return not found for key + + def get_by_key(self, concept_key, concept_id=None): + concept_key = str(concept_key) if isinstance(concept_key, BuiltinConcepts) else concept_key + return self.internal_get("key", concept_key, self.CONCEPTS_BY_KEY_ENTRY, concept_id) + + def get_by_name(self, concept_name, concept_id=None): + return self.internal_get("name", concept_name, self.CONCEPTS_BY_NAME_ENTRY, concept_id) + + def get_by_hash(self, concept_hash, concept_id=None): + return self.internal_get("hash", concept_hash, self.CONCEPTS_BY_HASH_ENTRY, concept_id) + + def get_by_id(self, concept_id): + return self.internal_get("id", concept_id, self.CONCEPTS_BY_ID_ENTRY, None) + + def internal_get(self, index_name, key, cache_name, concept_id=None): """ - Tries to find a concept - What is return must be used a template for another concept. - You must not modify the returned concept - :param concept_key: key of the concept - :param concept_id: when multiple concepts with the same key, use the id + Tries to find an entry + :param index_name: name of the index (ex by_id, by_key...) + :param key: index value + :param cache_name: name of the cache (ex Concepts_By_ID...) + :param concept_id: id of the concept if none, in case where there are multiple results :return: """ - by_key = self.internal_get("key", concept_key, self.cache_by_key, self.CONCEPTS_ENTRY, concept_id) - if self.is_known(by_key): - return by_key + if key is None: + return ErrorConcept(f"Concept '{key}' is undefined.") - # else return by name - by_name = self.internal_get("name", concept_key, self.cache_by_name, self.CONCEPTS_BY_NAME_ENTRY, concept_id) - if self.is_known(by_name): - return by_name + concepts = self.cache_manager.get(cache_name, key) + if concepts: + if concept_id is None: + return concepts - return by_key # return not found for key + if not hasattr(concepts, "__iter__"): + return concepts - def get_by_key(self, concept_key, concept_id=None): - return self.internal_get("key", concept_key, self.cache_by_key, self.CONCEPTS_ENTRY, concept_id) + for c in concepts: + if c.id == concept_id: + return c - def get_by_name(self, concept_name, concept_id=None): - return self.internal_get("name", concept_name, self.cache_by_name, self.CONCEPTS_BY_NAME_ENTRY, concept_id) + metadata = [(index_name, key), ("id", concept_id)] if concept_id else (index_name, key) + return self._get_unknown(metadata) - def get_by_id(self, concept_id): - if concept_id is None: - return ErrorConcept("Concept id is undefined.") - - # first search in cache - if concept_id in self.cache_by_id: - result = self.cache_by_id[concept_id] - else: - result = self.sdp.get_safe(self.CONCEPTS_BY_ID_ENTRY, concept_id) - if result is None: - result = self._get_unknown(('id', concept_id)) - else: - self.cache_by_id[concept_id] = result - - return result - - def internal_get(self, index_name, index_value, cache_to_use, sdp_entry, concept_id=None): + def has_id(self, concept_id): """ - Tries to find an entry - :param index_name: - :param index_value: - :param cache_to_use: - :param sdp_entry: + Returns True if a concept with this id exists in cache + It does not search in the remote repository :param concept_id: :return: """ + return self.cache_manager.has(self.CONCEPTS_BY_ID_ENTRY, concept_id) - if index_value is None: - return ErrorConcept(f"Concept {index_name} is undefined.") + def has_key(self, concept_key): + """ + Returns True if concept(s) with this key exist in cache + It does not search in the remote repository + :param concept_key: + :return: + """ + return self.cache_manager.has(self.CONCEPTS_BY_KEY_ENTRY, concept_key) - if isinstance(index_value, BuiltinConcepts): - index_value = str(index_value) + def has_name(self, concept_name): + """ + Returns True if concept(s) with this name exist in cache + It does not search in the remote repository + :param concept_name: + :return: + """ + return self.cache_manager.has(self.CONCEPTS_BY_NAME_ENTRY, concept_name) - # first search in cache - if index_value in cache_to_use: - result = cache_to_use[index_value] - else: - result = self.sdp.get_safe(sdp_entry, index_value) - if result is None: - metadata = [(index_name, index_value), ("id", concept_id)] if concept_id else (index_name, index_value) - result = self._get_unknown(metadata) - # Do not put in cache_by_key or cache_by_id unknown concept - # TODO: implement an MRU cache for them - else: - cache_to_use[index_value] = result - for r in (result if isinstance(result, list) else [result]): - if r.id: - self.cache_by_id[r.id] = r - - if not (isinstance(result, list) and concept_id): - return result - - # result is a list, but we have the concept_id to discriminate - for c in result: - if c.id == concept_id: - return c - - metadata = [(index_name, index_value), ("id", concept_id)] if concept_id else (index_name, index_value) - return self._get_unknown(metadata) - - def get_concepts_definitions(self, context): - - if self.concepts_definitions_cache: - return self.concepts_definitions_cache - - encoded_bnf = self.sdp.get_safe( - self.CONCEPTS_DEFINITIONS_ENTRY, - load_origin=False) or {} - - self.concepts_definitions_cache = {} - bnf_parser = self.parsers[BNF_PARSER_CLASS]() - for k, v in encoded_bnf.items(): - key, id_ = core.utils.unstr_concept(k) - concept = self.new((key, id_)) - context.log(f"Parsing BNF definition for {concept}", context.who) - rule_result = bnf_parser.parse(context, v) - if rule_result.status: - self.concepts_definitions_cache[concept] = rule_result.value.value - else: - self.log.error(f"Failed to load bnf rule for concept {key}") - - return self.concepts_definitions_cache + def has_hash(self, concept_hash): + """ + Returns True if concept(s) with this hash exist in cache + It does not search in the remote repository + :param concept_hash: + :return: + """ + return self.cache_manager.has(self.CONCEPTS_BY_HASH_ENTRY, concept_hash) def new(self, concept_key, **kwargs): """ @@ -578,7 +637,7 @@ class Sheerka(Concept): else: concept_id = None - template = self.get_by_id(concept_id) if not concept_key else self.get(concept_key, concept_id) + template = self.get_by_id(concept_id) if not concept_key else self.get_by_key(concept_key, concept_id) # manage concept not found if self.isinstance(template, BuiltinConcepts.UNKNOWN_CONCEPT) and \ @@ -599,7 +658,7 @@ class Sheerka(Concept): # otherwise, create another instance concept = self.builtin_cache[key]() if key in self.builtin_cache else Concept() - concept.update_from(template) + concept.update_from(template, update_value=False) concept.freeze_definition_hash() if len(kwargs) == 0: @@ -608,10 +667,10 @@ class Sheerka(Concept): # update the properties, values, attributes # Not quite sure that this is the correct process order for k, v in kwargs.items(): - if k in concept.props: - concept.set_prop(k, v) + if k in concept.values: + concept.set_value(k, v) elif k in PROPERTIES_FOR_NEW: - concept.values[ConceptParts(k)] = v + concept.set_value(ConceptParts(k), v) elif hasattr(concept, k): setattr(concept, k, v) else: @@ -639,12 +698,12 @@ class Sheerka(Concept): message=message, parents=parents) - def value(self, obj, reduce_simple_list=False): + def objvalue(self, obj, reduce_simple_list=False): if obj is None: return None - if hasattr(obj, "get_value"): - return obj.get_value() + if hasattr(obj, "get_obj_value"): + return obj.get_obj_value() if not isinstance(obj, Concept): return obj @@ -657,7 +716,18 @@ class Sheerka(Concept): else: body_to_use = obj.body - return self.value(body_to_use) + return self.objvalue(body_to_use) + + def objvalues(self, objs): + if not (isinstance(objs, list) or + self.isinstance(objs, BuiltinConcepts.LIST) or + self.isinstance(objs, BuiltinConcepts.ENUMERATION)): + objs = [objs] + + if isinstance(objs, list): + return (self.objvalue(obj) for obj in objs) + + return (self.objvalue(obj) for obj in objs.body) def value_by_concept(self, obj, concept): if obj is None: @@ -678,8 +748,8 @@ class Sheerka(Concept): if isinstance(obj, Concept) and obj.metadata.is_builtin and obj.key in BuiltinErrors: return obj - if isinstance(obj, list): - return obj + if isinstance(obj, (list, set, tuple)): + return [self.get_error(o) for o in obj] if self.isinstance(obj, BuiltinConcepts.RETURN_VALUE): if obj.status: @@ -687,19 +757,10 @@ class Sheerka(Concept): if self.isinstance(obj.body, BuiltinConcepts.PARSER_RESULT): return self.get_error(obj.body.body) + else: + return obj.body - return NotImplementedError() - - def get_values(self, objs): - if not (isinstance(objs, list) or - self.isinstance(objs, BuiltinConcepts.LIST) or - self.isinstance(objs, BuiltinConcepts.ENUMERATION)): - objs = [objs] - - if isinstance(objs, list): - return (self.value(obj) for obj in objs) - - return (self.value(obj) for obj in objs.body) + raise NotImplementedError() def is_success(self, obj): if isinstance(obj, bool): # quick win @@ -761,8 +822,12 @@ class Sheerka(Concept): return self.parsers_prefix + name def concepts(self): + """ + List of all known concepts (look up in sdp) + :return: + """ res = [] - lst = self.sdp.list(self.CONCEPTS_ENTRY) + lst = self.sdp.list(self.CONCEPTS_BY_ID_ENTRY) for item in lst: if isinstance(item, list): res.extend(item) @@ -818,10 +883,10 @@ class Sheerka(Concept): # the metadata can be a list, if several attributes where given # (key, 'not_found), (id, invalid_id) - unknown_concept = UnknownConcept() - unknown_concept.set_metadata_value(ConceptParts.BODY, metadata) + unknown_concept = UnknownConcept() # don't use new() for prevent circular reference + unknown_concept.set_value(ConceptParts.BODY, metadata) for meta in (metadata if isinstance(metadata, list) else [metadata]): - unknown_concept.set_prop(meta[0], meta[1]) + unknown_concept.set_value(meta[0], meta[1]) unknown_concept.metadata.is_evaluated = True return unknown_concept diff --git a/src/core/utils.py b/src/core/utils.py index 5c01970..1732e90 100644 --- a/src/core/utils.py +++ b/src/core/utils.py @@ -261,7 +261,7 @@ def decode_enum(enum_repr: str): return None -def str_concept(t): +def str_concept(t, skip_key=None): """ The key,id identifiers of a concept are stored in a tuple we want to return the key and the id, separated by a pipe @@ -272,7 +272,9 @@ def str_concept(t): >>> assert str_concept(("key", None)) == "c:key:" >>> assert str_concept((None, None)) == "" >>> assert str_concept(Concept(key="foo", id="bar")) == "c:foo|bar:" + >>> assert str_concept(Concept(key="foo", id="bar"), skip_key=True) == "c:|bar:" :param t: + :param skip_key: True if we only want the id (and not the key) :return: """ if isinstance(t, tuple): @@ -283,7 +285,7 @@ def str_concept(t): if key is None and id_ is None: return "" - result = 'c:' if key is None else "c:" + key + result = 'c:' if (key is None or skip_key) else "c:" + key if id_: result += "|" + id_ return result + ":" diff --git a/src/evaluators/AddConceptEvaluator.py b/src/evaluators/AddConceptEvaluator.py index e21c7a6..741300c 100644 --- a/src/evaluators/AddConceptEvaluator.py +++ b/src/evaluators/AddConceptEvaluator.py @@ -79,18 +79,18 @@ class AddConceptEvaluator(OneReturnValueEvaluator): continue # try to find what can be a property - for p in self.get_props(sheerka, part_ret_val, name_to_use): + for p in self.get_variables(sheerka, part_ret_val, name_to_use): props_found.add(p) - # add props by order of appearance when possible + # add variables by order of appearance when possible for token in def_concept_node.name.tokens: if token.value in props_found: - concept.def_prop(token.value, None) + concept.def_var(token.value, None) # add the remaining properties for p in props_found: - if p not in concept.props: - concept.def_prop(p, None) + if p not in concept.values: + concept.def_var(p, None) # initialize the key key_source = def_concept_node.definition.tokens if \ @@ -105,7 +105,7 @@ class AddConceptEvaluator(OneReturnValueEvaluator): ret = sheerka.create_new_concept(context, concept) if not ret.status: - error_cause = sheerka.value(ret.body) + error_cause = sheerka.objvalue(ret.body) context.log(f"Failed to add concept '{concept.name}'. Reason: {error_cause}", self.name) return sheerka.ret(self.name, ret.status, ret.value, parents=[return_value]) @@ -115,7 +115,7 @@ class AddConceptEvaluator(OneReturnValueEvaluator): return [part.value for part in core.utils.strip_tokens(source.tokens, True)] @staticmethod - def get_props(sheerka, ret_value, concept_name): + def get_variables(sheerka, ret_value, concept_name): """ Try to find out the variables This function can only be a draft, as there may be tons of different situations @@ -146,7 +146,7 @@ class AddConceptEvaluator(OneReturnValueEvaluator): # case of concept # if isinstance(ret_value.value, ParserResultConcept) and isinstance(ret_value.value.value, Concept): - return list(ret_value.value.value.props.keys()) + return list(ret_value.value.value.values.keys()) # # case of BNF diff --git a/src/evaluators/AddConceptInSetEvaluator.py b/src/evaluators/AddConceptInSetEvaluator.py index 9e6186e..0e182dd 100644 --- a/src/evaluators/AddConceptInSetEvaluator.py +++ b/src/evaluators/AddConceptInSetEvaluator.py @@ -64,7 +64,7 @@ class AddConceptInSetEvaluator(OneReturnValueEvaluator): res = sheerka.set_isa(context, concept, concept_set) if not res.status: - context.log(f"Failed. Reason: {sheerka.value(res.body)}.", self.name) + context.log(f"Failed. Reason: {sheerka.objvalue(res.body)}.", self.name) else: context.log(f"Concept added.", self.name) diff --git a/src/evaluators/ConceptEvaluator.py b/src/evaluators/ConceptEvaluator.py index 046a862..78d65d2 100644 --- a/src/evaluators/ConceptEvaluator.py +++ b/src/evaluators/ConceptEvaluator.py @@ -41,8 +41,8 @@ class ConceptEvaluator(OneReturnValueEvaluator): # Why ? # If we evaluate Concept("foo", body="a").set_prop("a", "'property_a'") # The body should be 'property_a', and not a concept called 'a' - if context.obj and concept.name in context.obj.props: - value = context.obj.props[concept.name].value + if context.obj and concept.name in context.obj.values: + value = context.obj.get_value(concept.name) context.log(f"{concept.name} is a property. Returning value '{value}'.", self.name) return sheerka.ret(self.name, True, value, parents=[return_value]) diff --git a/src/evaluators/PrepareEvalEvaluator.py b/src/evaluators/PrepareEvalEvaluator.py index 614c42f..7ad82bd 100644 --- a/src/evaluators/PrepareEvalEvaluator.py +++ b/src/evaluators/PrepareEvalEvaluator.py @@ -31,7 +31,7 @@ class PrepareEvalEvaluator(OneReturnValueEvaluator): new_text_to_parse = sheerka.ret( self.name, - True, sheerka.new(BuiltinConcepts.USER_INPUT, body=self.text[5:], user_name=context.event.user)) + True, sheerka.new(BuiltinConcepts.USER_INPUT, body=self.text[5:], user_name=context.event.user_id)) context.global_hints.add(BuiltinConcepts.EVAL_BODY_REQUESTED) context.global_hints.add(BuiltinConcepts.CONCEPT_VALUE_REQUESTED) diff --git a/src/evaluators/PythonEvaluator.py b/src/evaluators/PythonEvaluator.py index 84ed070..d49074a 100644 --- a/src/evaluators/PythonEvaluator.py +++ b/src/evaluators/PythonEvaluator.py @@ -1,15 +1,14 @@ +import ast import copy import traceback -from enum import Enum +import core.ast.nodes +import core.utils from core.ast.visitors import UnreferencedNamesVisitor from core.builtin_concepts import BuiltinConcepts, ParserResultConcept from core.concept import ConceptParts, Concept from evaluators.BaseEvaluator import OneReturnValueEvaluator from parsers.PythonParser import PythonNode -import ast -import core.ast.nodes -import core.utils class PythonEvaluator(OneReturnValueEvaluator): @@ -36,7 +35,7 @@ class PythonEvaluator(OneReturnValueEvaluator): # Do not evaluate if the ast refers to a concept (leave it to ConceptEvaluator) if isinstance(node.ast_, ast.Expression) and isinstance(node.ast_.body, ast.Name): - c = context.sheerka.get(node.ast_.body.id) + c = context.sheerka.get_by_key(node.ast_.body.id) if not context.sheerka.isinstance(c, BuiltinConcepts.UNKNOWN_CONCEPT): context.log("It's a simple concept. Not for me.", self.name) not_for_me = context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=node) @@ -69,7 +68,6 @@ class PythonEvaluator(OneReturnValueEvaluator): "sheerka": context.sheerka, "desc": context.sheerka.dump_handler.dump_desc, "concepts": context.sheerka.dump_handler.dump_concepts, - "definitions": context.sheerka.dump_handler.dump_definitions, "history": context.sheerka.dump_handler.dump_history, "state": context.sheerka.dump_handler.dump_state, "Concept": core.concept.Concept @@ -77,11 +75,12 @@ class PythonEvaluator(OneReturnValueEvaluator): if context.obj: context.log(f"Concept '{context.obj}' is in context. Adding it and its properties to locals.", self.name) - for prop_name, prop_value in context.obj.props.items(): - if isinstance(prop_value.value, Concept): - my_locals[prop_name] = context.sheerka.value(prop_value.value) + for prop_name in context.obj.variables(): + prop_value = context.obj.get_value(prop_name) + if isinstance(prop_value, Concept): + my_locals[prop_name] = context.sheerka.objvalue(prop_value) else: - my_locals[prop_name] = prop_value.value + my_locals[prop_name] = prop_value my_locals["self"] = context.obj.body @@ -118,7 +117,7 @@ class PythonEvaluator(OneReturnValueEvaluator): sub_context.add_values(return_values=evaluated) if evaluated.key == concept.key: - my_locals[name] = evaluated if return_concept else context.sheerka.value(evaluated) + my_locals[name] = evaluated if return_concept else context.sheerka.objvalue(evaluated) if self.locals: # when exta values are given. Add them my_locals.update(self.locals) @@ -142,7 +141,6 @@ class PythonEvaluator(OneReturnValueEvaluator): else: return to_resolve, None, False - @staticmethod def expr_to_expression(expr): expr.lineno = 0 diff --git a/src/parsers/AtomNodeParser.py b/src/parsers/AtomNodeParser.py index 09c52f2..3285fed 100644 --- a/src/parsers/AtomNodeParser.py +++ b/src/parsers/AtomNodeParser.py @@ -1,12 +1,11 @@ -import copy from dataclasses import dataclass from core import builtin_helpers from core.builtin_concepts import BuiltinConcepts -from core.concept import Concept, DEFINITION_TYPE_BNF -from core.tokenizer import TokenKind, Tokenizer -from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode -from parsers.BaseParser import BaseParser, UnexpectedTokenErrorNode, ErrorNode +from core.concept import DEFINITION_TYPE_BNF +from core.tokenizer import Tokenizer +from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode, SourceCodeNode +from parsers.BaseParser import UnexpectedTokenErrorNode, ErrorNode PARSERS = ["BnfNode", "SyaNode", "Python"] @@ -141,7 +140,11 @@ class AtomConceptParserHelper: self.unrecognized_tokens.fix_source() # try to recognize concepts - nodes_sequences = self._get_lexer_nodes_from_unrecognized() + nodes_sequences = builtin_helpers.get_lexer_nodes_from_unrecognized( + self.context, + self.unrecognized_tokens, + PARSERS) + if nodes_sequences: instances = [self] for i in range(len(nodes_sequences) - 1): @@ -152,7 +155,7 @@ class AtomConceptParserHelper: for instance, node_sequence in zip(instances, nodes_sequences): for node in node_sequence: instance.sequence.append(node) - if isinstance(node, UnrecognizedTokensNode) or \ + if isinstance(node, (UnrecognizedTokensNode, SourceCodeNode)) or \ hasattr(node, "unrecognized_tokens") and node.unrecognized_tokens: instance.has_unrecognized = True instance.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) @@ -193,22 +196,22 @@ class AtomConceptParserHelper: clone.has_unrecognized = self.has_unrecognized return clone - def _get_lexer_nodes_from_unrecognized(self): - """ - Use the source of self.unrecognized_tokens gto find concepts or source code - :return: - """ - - res = builtin_helpers.parse_unrecognized(self.context, self.unrecognized_tokens.source, PARSERS) - only_parsers_results = builtin_helpers.only_parsers_results(self.context, res) - - if not only_parsers_results.status: - return None - - return builtin_helpers.get_lexer_nodes( - only_parsers_results.body.body, - self.unrecognized_tokens.start, - self.unrecognized_tokens.tokens) + # def _get_lexer_nodes_from_unrecognized(self): + # """ + # Use the source of self.unrecognized_tokens gto find concepts or source code + # :return: + # """ + # + # res = builtin_helpers.parse_unrecognized(self.context, self.unrecognized_tokens.source, PARSERS) + # only_parsers_results = builtin_helpers.only_parsers_results(self.context, res) + # + # if not only_parsers_results.status: + # return None + # + # return builtin_helpers.get_lexer_nodes( + # only_parsers_results.body.body, + # self.unrecognized_tokens.start, + # self.unrecognized_tokens.tokens) class AtomNodeParser(BaseNodeParser): @@ -230,7 +233,6 @@ class AtomNodeParser(BaseNodeParser): def __init__(self, **kwargs): super().__init__("AtomNode", 50, **kwargs) - self.enabled = False @staticmethod def _is_eligible(concept): @@ -239,7 +241,8 @@ class AtomNodeParser(BaseNodeParser): :param concept: :return: """ - return len(concept.metadata.props) == 0 or concept.metadata.definition_type == DEFINITION_TYPE_BNF + # return len(concept.metadata.props) == 0 or concept.metadata.definition_type == DEFINITION_TYPE_BNF + return len(concept.metadata.variables) == 0 and concept.metadata.definition_type != DEFINITION_TYPE_BNF def get_concepts_sequences(self): @@ -255,6 +258,13 @@ class AtomNodeParser(BaseNodeParser): concept_parser_helpers.extend(forked) forked.clear() + def _get_concepts_by_name(name): + other_concepts = self.sheerka.get_by_name(name) + if isinstance(other_concepts, list): + return other_concepts + + return [other_concepts] if self.sheerka.is_known(other_concepts) else [] + concept_parser_helpers = [AtomConceptParserHelper(self.context)] while self.next_token(False): @@ -268,7 +278,7 @@ class AtomNodeParser(BaseNodeParser): if concept_parser.eat_token(self.token, self.pos): concept_parser.lock() - concepts = self.get_concepts(token, self._is_eligible) + concepts = self.get_concepts(token, self._is_eligible, custom=_get_concepts_by_name) if not concepts: for concept_parser in concept_parser_helpers: concept_parser.eat_unrecognized(token, self.pos) diff --git a/src/parsers/BaseNodeParser.py b/src/parsers/BaseNodeParser.py index e995633..4b46dc0 100644 --- a/src/parsers/BaseNodeParser.py +++ b/src/parsers/BaseNodeParser.py @@ -2,8 +2,9 @@ from collections import namedtuple from dataclasses import dataclass from enum import Enum +import core.utils from core.builtin_concepts import BuiltinConcepts -from core.concept import VARIABLE_PREFIX, Concept +from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, ConceptParts from core.sheerka.ExecutionContext import ExecutionContext from core.tokenizer import TokenKind, LexerError, Token from parsers.BaseParser import Node, BaseParser, ErrorNode @@ -187,6 +188,9 @@ class SourceCodeNode(LexerNode): self.end == other.end and \ self.source == other.source + if isinstance(other, SCN): + return other == self + if not isinstance(other, SourceCodeNode): return False @@ -352,6 +356,51 @@ class HelperWithPos: return self +class SCN(HelperWithPos): + """ + SourceCodeNode tester class + It matches with SourceCodeNode but with less constraints + + SCN == SourceCodeNode if source, start, end (start and end are not validated when None) + """ + + def __init__(self, source, start=None, end=None): + super().__init__(start, end) + self.source = source + + def __eq__(self, other): + if id(self) == id(other): + return True + + if isinstance(other, SourceCodeNode): + if self.source != other.source: + return False + if self.start is not None and self.start != other.start: + return False + if self.end is not None and self.end != other.end: + return False + + return True + + if not isinstance(other, CN): + return False + + return self.source == other.source and \ + self.start == other.start and \ + self.end == other.end + + def __hash__(self): + return hash((self.source, self.start, self.end)) + + def __repr__(self): + txt = f"SCN(source='{self.source}'" + if self.start is not None: + txt += f", start={self.start}" + if self.end is not None: + txt += f", end={self.end}" + return txt + ")" + + class CN(HelperWithPos): """ ConceptNode tester class @@ -390,6 +439,8 @@ class CN(HelperWithPos): return False if self.end is not None and self.end != other.end: return False + if self.source is not None and self.source != other.source: + return False return True if not isinstance(other, CN): @@ -425,9 +476,10 @@ class CNC(CN): CNC == ConceptNode if CNC.compiled == ConceptNode.concept.compiled """ - def __init__(self, concept_key, start=None, end=None, source=None, **kwargs): + def __init__(self, concept_key, start=None, end=None, source=None, exclude_body=False, **kwargs): super().__init__(concept_key, start, end, source) self.compiled = kwargs + self.exclude_body = exclude_body def __eq__(self, other): if id(self) == id(other): @@ -442,7 +494,13 @@ class CNC(CN): return False if self.end is not None and self.end != other.end: return False - return self.compiled == other.concept.compiled # assert instead of return to help debugging tests + if self.source is not None and self.source != other.source: + return False + if self.exclude_body: + to_compare = {k: v for k, v in other.concept.compiled.items() if k != ConceptParts.BODY} + else: + to_compare = other.concept.compiled + return self.compiled == to_compare if not isinstance(other, CNC): return False @@ -518,11 +576,10 @@ class BaseNodeParser(BaseParser): super().__init__(name, priority) if 'sheerka' in kwargs: sheerka = kwargs.get("sheerka") - self.init_from_sheerka(sheerka) + self.concepts_by_first_keyword = sheerka.resolved_concepts_by_first_keyword else: self.concepts_by_first_keyword = None - self.sya_definitions = None self.token = None self.pos = -1 @@ -532,17 +589,16 @@ class BaseNodeParser(BaseParser): self.text = None self.sheerka = None - def init_from_sheerka(self, sheerka): + def init_from_concepts(self, context, concepts, **kwargs): """ - Use the definitons from Sheerka to initialize - :param sheerka: + Initialize the parser with a list of concepts + For unit tests convenience + :param context + :param concepts :return: """ - self.concepts_by_first_keyword = sheerka.concepts_by_first_keyword - if sheerka.sya_definitions: - self.sya_definitions = {} - for k, v in sheerka.sya_definitions.items(): - self.sya_definitions[k] = (v[0], SyaAssociativity(v[1])) + concepts_by_first_keyword = self.get_concepts_by_first_keyword(context, concepts).body + self.concepts_by_first_keyword = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword).body def reset_parser(self, context, text): self.context = context @@ -582,82 +638,43 @@ class BaseNodeParser(BaseParser): return self.token.type != TokenKind.EOF - def initialize(self, context, concepts, sya_definitions=None, use_sheerka=False): - """ - To quickly find a concept, we store them in an hash where the key is the first token of the concept - example : - Concept("foo a").def_prop("a"), "foo" is a token, "a" is a variable - So the key to use will be "foo" - - Concept("a foo").def_prop("a") -> first token is "foo" - - Concept("Hello my dear a").def_prop("a") -> first token is "Hello" - Note that under the same key, there will be multiple entry - a B-Tree may be a better implementation in the future - - We also store sya_definition which a is tuple (concept_precedence:int, concept_associativity:SyaAssociativity) - :param context: - :param concepts: list[Concept] - :param sya_definitions: hash[concept_id, tuple(precedence:int, associativity:SyaAssociativity)] - :param use_sheerka: first init with the definitions from Sheerka - :return: - """ - self.context = context - self.sheerka = context.sheerka - - if use_sheerka: - self.init_from_sheerka(self.sheerka) - - if sya_definitions: - if self.sya_definitions: - self.sya_definitions.update(sya_definitions) - else: - self.sya_definitions = sya_definitions - - if self.concepts_by_first_keyword is None: - self.concepts_by_first_keyword = {} - - for concept in concepts: - keywords = concept.key.split() - for keyword in keywords: - if keyword.startswith(VARIABLE_PREFIX): - continue - - self.concepts_by_first_keyword.setdefault(keyword, []).append(concept.id) - break - - return self.sheerka.ret(self.name, True, self.concepts_by_first_keyword) - - def get_concepts(self, token, to_keep, to_map=None): + def get_concepts(self, token, to_keep, custom=None, to_map=None, strip_quotes=False): """ Tries to find if there are concepts that match the value of the token :param token: :param to_keep: predicate to tell if the concept is eligible + :param custom: lambda name -> List[Concepts] that gives extra concepts, according to the name :param to_map: + :param strip_quotes: Remove quotes from strings :return: """ + if token.type == TokenKind.WHITESPACE: + return None + if token.type == TokenKind.STRING: - name = token.value[1:-1] + name = token.value[1:-1] if strip_quotes else token.value elif token.type == TokenKind.KEYWORD: name = token.value.value else: name = token.value + custom_concepts = custom(name) if custom else [] + result = [] if name in self.concepts_by_first_keyword: - for concept_id in self.concepts_by_first_keyword[name]: + for concept_id in self.concepts_by_first_keyword.get(name): concept = self.sheerka.get_by_id(concept_id) if not to_keep(concept): continue - concept = to_map(concept) if to_map else concept + concept = to_map(self, concept) if to_map else concept result.append(concept) - return result + return result + custom_concepts - return None + return custom_concepts if custom else None @staticmethod def get_token_value(token): @@ -667,3 +684,116 @@ class BaseNodeParser(BaseParser): return token.value.value else: return token.value + + @staticmethod + def get_concepts_by_first_keyword(context, concepts, use_sheerka=False): + """ + Create the map describing the first token expected by a concept + :param context: + :param concepts: lists of concepts to parse + :param use_sheerka: if True, update concepts_by_first_keyword from sheerka + :return: + """ + sheerka = context.sheerka + res = sheerka.cache_manager.copy(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) if use_sheerka else {} + for concept in concepts: + keywords = BaseNodeParser.get_first_tokens(sheerka, concept) + + if keywords is None: + # no first token found for a concept ? + return sheerka.ret(sheerka.name, False, concept) + + for keyword in keywords: + res.setdefault(keyword, []).append(concept.id) + + return sheerka.ret("BaseNodeParser", True, res) + + @staticmethod + def resolve_concepts_by_first_keyword(context, concepts_by_first_keyword): + sheerka = context.sheerka + + def _make_unique(elements): + keys = {} + for e in elements: + keys[e] = 1 + return list(keys.keys()) + + def _resolve_concepts(concept_str): + resolved = [] + to_resolve = [] + concept = sheerka.get_by_id(core.utils.unstr_concept(concept_str)[1]) + if sheerka.isaset(context, concept): + concepts = sheerka.get_set_elements(context, concept) + else: + concepts = [concept] + + for concept in concepts: + BaseNodeParser.ensure_bnf(context, concept) # need to make sure that it cannot fail + keywords = BaseNodeParser.get_first_tokens(sheerka, concept) + for keyword in keywords: + (to_resolve if keyword.startswith("c:|") else resolved).append(keyword) + + for concept_to_resolve_str in to_resolve: + resolved += _resolve_concepts(concept_to_resolve_str) + + return resolved + + res = {} + for k, v in concepts_by_first_keyword.items(): + if k.startswith("c:|"): + resolved_keywords = _resolve_concepts(k) + for resolved in resolved_keywords: + res.setdefault(resolved, []).extend(v) + else: + res.setdefault(k, []).extend(v) + + # 'uniquify' the lists + for k, v in res.items(): + res[k] = _make_unique(v) + + return sheerka.ret("BaseNodeParser", True, res) + + @staticmethod + def resolve_sya_associativity_and_precedence(context, sya): + pass + + @staticmethod + def get_first_tokens(sheerka, concept): + """ + + :param sheerka: + :param concept: + :return: + """ + if concept.bnf: + from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor + bnf_visitor = BnfNodeFirstTokenVisitor(sheerka) + bnf_visitor.visit(concept.bnf) + return bnf_visitor.first_tokens + else: + keywords = concept.key.split() + for keyword in keywords: + if keyword.startswith(VARIABLE_PREFIX): + continue + + return [keyword] + + return None + + @staticmethod + def ensure_bnf(context, concept, parser_name="BaseNodeParser"): + if concept.metadata.definition_type == DEFINITION_TYPE_BNF and not concept.bnf: + from parsers.BnfParser import BnfParser + regex_parser = BnfParser() + desc = f"Resolving BNF {concept.metadata.definition}" + with context.push(parser_name, obj=concept, desc=desc) as sub_context: + sub_context.add_inputs(parser_input=concept.metadata.definition) + bnf_parsing_ret_val = regex_parser.parse(sub_context, concept.metadata.definition) + sub_context.add_values(return_values=bnf_parsing_ret_val) + + if not bnf_parsing_ret_val.status: + raise Exception(bnf_parsing_ret_val.value) + + concept.bnf = bnf_parsing_ret_val.body.body + if concept.id: + context.sheerka.get_by_id(concept.id).bnf = concept.bnf # update bnf in cache diff --git a/src/parsers/BnfNodeParser.py b/src/parsers/BnfNodeParser.py index 31cbeaa..f990b25 100644 --- a/src/parsers/BnfNodeParser.py +++ b/src/parsers/BnfNodeParser.py @@ -6,15 +6,24 @@ # Arpeggio: A flexible PEG parser for Python, # Knowledge-Based Systems, 2016, 95, 71 - 74, doi:10.1016/j.knosys.2015.12.004 ##################################################################################################### -from collections import namedtuple -from dataclasses import dataclass from collections import defaultdict -from core.builtin_concepts import BuiltinConcepts, ParserResultConcept -from core.concept import Concept, ConceptParts, DoNotResolve -from core.tokenizer import TokenKind, Tokenizer, Token -from parsers.BaseNodeParser import LexerNode, GrammarErrorNode, ConceptNode, UnrecognizedTokensNode -from parsers.BaseParser import BaseParser, ErrorNode +from dataclasses import dataclass + import core.utils +from cache.Cache import Cache +from core import builtin_helpers +from core.builtin_concepts import BuiltinConcepts +from core.concept import Concept, DEFINITION_TYPE_BNF, DoNotResolve, ConceptParts +from core.tokenizer import Tokenizer, Token, TokenKind +from parsers.BaseNodeParser import BaseNodeParser, LexerNode, UnrecognizedTokensNode, ConceptNode, GrammarErrorNode +from parsers.BaseParser import ErrorNode + +PARSERS = ["AtomNode", "SyaNode", "Python"] + + +@dataclass +class ConceptParsingError(ErrorNode): + concept: Concept class NonTerminalNode(LexerNode): @@ -75,16 +84,6 @@ class TerminalNode(LexerNode): return hash((self.parsing_expression, self.start, self.end, self.value)) -@dataclass() -class UnknownConceptNode(ErrorNode): - concept_key: str - - -@dataclass() -class TooManyConceptNode(ErrorNode): - concept_key: str - - class ParsingExpression: def __init__(self, *args, **kwargs): self.elements = args @@ -125,7 +124,7 @@ class ConceptExpression(ParsingExpression): self.concept = concept def __repr__(self): - return self.add_rule_name_if_needed(f"{self.concept}") + return self.add_rule_name_if_needed(f"{self.concept}") def __eq__(self, other): if not super().__eq__(other): @@ -143,58 +142,27 @@ class ConceptExpression(ParsingExpression): def __hash__(self): return hash((self.concept, self.rule_name)) - @staticmethod - def get_parsing_expression_from_name(name): - tokens = Tokenizer(name) - nodes = [StrMatch(core.utils.strip_quotes(token.value)) for token in list(tokens)[:-1]] - if len(nodes) == 1: - return nodes[0] - else: - sequence = Sequence(nodes) - sequence.nodes = nodes - return sequence - - def _parse(self, parser): - to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept - if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT): - return None - - self.concept = to_match # Memoize - - if to_match not in parser.concepts_grammars: - # Try to match the concept using its name - expr = self.get_parsing_expression_from_name(to_match.name) - node = expr.parse(parser) - else: - node = parser.concepts_grammars[to_match].parse(parser) - + def _parse(self, parser_helper): + node = self.nodes[0].parse(parser_helper) if node is None: return None - - return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node]) + return NonTerminalNode(self, + node.start, + node.end, + parser_helper.parser.tokens[node.start: node.end + 1], + [node]) -class ConceptGroupExpression(ConceptExpression): - def _parse(self, parser): - to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept - if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT): - return None - - self.concept = to_match # Memoize - - if to_match not in parser.concepts_grammars: - concepts_in_group = parser.sheerka.get_set_elements(parser.context, self.concept) - nodes = [ConceptExpression(c, rule_name=c.name) for c in concepts_in_group] - expr = OrderedChoice(nodes) - expr.nodes = nodes - node = expr.parse(parser) - else: - node = parser.concepts_grammars[to_match].parse(parser) - - if node is None: - return None - - return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node]) +# class ConceptGroupExpression(ConceptExpression): +# def _parse(self, parser_helper): +# node = self.nodes[0].parse(parser_helper) +# if node is None: +# return None +# return NonTerminalNode(self, +# node.start, +# node.end, +# node.tokens, # node is an OrderedChoice +# [node]) class Sequence(ParsingExpression): @@ -202,13 +170,13 @@ class Sequence(ParsingExpression): Will match sequence of parser expressions in exact order they are defined. """ - def _parse(self, parser): - init_pos = parser.pos - end_pos = parser.pos + def _parse(self, parser_helper): + init_pos = parser_helper.pos + end_pos = parser_helper.pos children = [] for e in self.nodes: - node = e.parse(parser) + node = e.parse(parser_helper) if node is None: return None else: @@ -216,7 +184,7 @@ class Sequence(ParsingExpression): children.append(node) end_pos = node.end - return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children) + return NonTerminalNode(self, init_pos, end_pos, parser_helper.parser.tokens[init_pos: end_pos + 1], children) def __repr__(self): to_str = ", ".join(repr(n) for n in self.elements) @@ -229,15 +197,19 @@ class OrderedChoice(ParsingExpression): It will stop at the first match (so the order of definition is important) """ - def _parse(self, parser): - init_pos = parser.pos + def _parse(self, parser_helper): + init_pos = parser_helper.pos for e in self.nodes: - node = e.parse(parser) + node = e.parse(parser_helper) if node: - return NonTerminalNode(self, init_pos, node.end, parser.tokens[init_pos: node.end + 1], [node]) + return NonTerminalNode(self, + init_pos, + node.end, + parser_helper.parser.tokens[init_pos: node.end + 1], + [node]) - parser.seek(init_pos) # backtrack + parser_helper.seek(init_pos) # backtrack return None @@ -253,26 +225,26 @@ class Optional(ParsingExpression): If you need order, use Optional(OrderedChoice) """ - def _parse(self, parser): - init_pos = parser.pos - selected_node = NonTerminalNode(self, parser.pos, -1, [], []) # means that nothing is found + def _parse(self, parser_helper): + init_pos = parser_helper.pos + selected_node = NonTerminalNode(self, parser_helper.pos, -1, [], []) # means that nothing is found for e in self.nodes: - node = e.parse(parser) + node = e.parse(parser_helper) if node: if node.end > selected_node.end: selected_node = NonTerminalNode( self, node.start, node.end, - parser.tokens[node.start: node.end + 1], + parser_helper.parser.tokens[node.start: node.end + 1], [node]) - parser.seek(init_pos) # backtrack + parser_helper.seek(init_pos) # backtrack if selected_node.end != -1: - parser.seek(selected_node.end) - parser.next_token() # eat the tokens found + parser_helper.seek(selected_node.end) + parser_helper.next_token() # eat the tokens found return selected_node @@ -303,25 +275,25 @@ class ZeroOrMore(Repetition): times. It will never fail. """ - def _parse(self, parser): - init_pos = parser.pos + def _parse(self, parser_helper): + init_pos = parser_helper.pos end_pos = -1 children = [] while True: - current_pos = parser.pos + current_pos = parser_helper.pos # maybe eat the separator if needed if self.sep and children: - sep_result = self.sep.parse(parser) + sep_result = self.sep.parse(parser_helper) if sep_result is None: - parser.seek(current_pos) + parser_helper.seek(current_pos) break # eat the ZeroOrMore - node = self.nodes[0].parse(parser) + node = self.nodes[0].parse(parser_helper) if node is None: - parser.seek(current_pos) + parser_helper.seek(current_pos) break else: if node.end != -1: # because returns -1 when no match @@ -331,7 +303,7 @@ class ZeroOrMore(Repetition): if len(children) == 0: return NonTerminalNode(self, init_pos, -1, [], []) - return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children) + return NonTerminalNode(self, init_pos, end_pos, parser_helper.parser.tokens[init_pos: end_pos + 1], children) def __repr__(self): to_str = ", ".join(repr(n) for n in self.elements) @@ -343,25 +315,25 @@ class OneOrMore(Repetition): OneOrMore will try to match parser expression specified one or more times. """ - def _parse(self, parser): - init_pos = parser.pos + def _parse(self, parser_helper): + init_pos = parser_helper.pos end_pos = -1 children = [] while True: - current_pos = parser.pos + current_pos = parser_helper.pos # maybe eat the separator if needed if self.sep and children: - sep_result = self.sep.parse(parser) + sep_result = self.sep.parse(parser_helper) if sep_result is None: - parser.seek(current_pos) + parser_helper.seek(current_pos) break # eat the ZeroOrMore - node = self.nodes[0].parse(parser) + node = self.nodes[0].parse(parser_helper) if node is None: - parser.seek(current_pos) + parser_helper.seek(current_pos) break else: if node.end != -1: # because returns -1 when no match @@ -371,7 +343,7 @@ class OneOrMore(Repetition): if len(children) == 0: # if nothing is found, it's an error return None - return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children) + return NonTerminalNode(self, init_pos, end_pos, parser_helper.parser.tokens[init_pos: end_pos + 1], children) def __repr__(self): to_str = ", ".join(repr(n) for n in self.elements) @@ -426,60 +398,106 @@ class StrMatch(Match): return self.to_match == other.to_match and self.ignore_case == other.ignore_case - def _parse(self, parser): - token = parser.get_token() + def _parse(self, parser_helper): + token = parser_helper.get_token() m = str(token.value).lower() == self.to_match.lower() if self.ignore_case \ else token.value == self.to_match if m: - node = TerminalNode(self, parser.pos, parser.pos, token.value) - parser.next_token() + node = TerminalNode(self, parser_helper.pos, parser_helper.pos, token.value) + parser_helper.next_token() return node return None -class BnfNodeParser(BaseParser): - def __init__(self, **kwargs): - super().__init__("BnfNode", 50) - if 'grammars' in kwargs: - self.concepts_grammars = kwargs.get("grammars") - elif 'sheerka' in kwargs: - self.concepts_grammars = kwargs.get("sheerka").concepts_grammars - else: - self.concepts_grammars = {} +class ParsingExpressionVisitor: + """ + visit ParsingExpression + """ - self.ignore_case = True + STOP = "##_Stop_##" + + def visit(self, parsing_expression): + name = parsing_expression.__class__.__name__ + + method = 'visit_' + name + visitor = getattr(self, method, self.generic_visit) + return visitor(parsing_expression) + + def generic_visit(self, parsing_expression): + if hasattr(self, "visit_all"): + self.visit_all(parsing_expression) + + for node in parsing_expression.elements: + if isinstance(node, Concept): + res = self.visit(ConceptExpression(node.key or node.name)) + elif isinstance(node, str): + res = self.visit(StrMatch(node)) + else: + res = self.visit(node) + + if res == self.STOP: + return + + +class BnfNodeFirstTokenVisitor(ParsingExpressionVisitor): + def __init__(self, sheerka): + self.sheerka = sheerka + self.first_tokens = None + + def add_first_token(self, first_token): + if not self.first_tokens: + self.first_tokens = [first_token] + else: + self.first_tokens.append(first_token) + + def visit_ConceptExpression(self, pe): + concept = self.sheerka.get_by_key(pe.concept) if isinstance(pe.concept, str) else pe.concept + if self.sheerka.is_known(concept): + self.add_first_token(core.utils.str_concept(concept, skip_key=True)) + return self.STOP + + def visit_StrMatch(self, pe): + self.add_first_token(pe.to_match) + return self.STOP + + def visit_OrderedChoice(self, parsing_expression): + for node in parsing_expression.elements: + self.visit(node) + return self.STOP + + +class BnfConceptParserHelper: + def __init__(self, parser): + self.parser = parser + self.debug = [] + self.errors = [] + self.sequence = [] + + self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) + self.has_unrecognized = False + self.bnf_parsed = False + + self.forked = [] self.token = None self.pos = -1 - self.tokens = None - self.context = None - self.text = None - self.sheerka = None + def __repr__(self): + return f"BnfConceptParserHelper({self.sequence})" - def add_error(self, error, next_token=True): - self.error_sink.append(error) - if next_token: - self.next_token() - return error + def __eq__(self, other): + if id(self) == id(other): + return True - def reset_parser(self, context, text): - self.context = context - self.sheerka = context.sheerka - self.text = text - - try: - self.tokens = list(self.get_input_as_tokens(text)) - except core.tokenizer.LexerError as e: - self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False) + if not isinstance(other, BnfConceptParserHelper): return False - self.token = None - self.pos = -1 - self.next_token(False) - return True + return self.sequence == other.sequence and self.errors == other.errors + + def __hash__(self): + return len(self.sequence) + len(self.errors) def get_token(self) -> Token: return self.token @@ -489,265 +507,136 @@ class BnfNodeParser(BaseParser): return False self.pos += 1 - self.token = self.tokens[self.pos] + self.token = self.parser.tokens[self.pos] if skip_whitespace: while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE: self.pos += 1 - self.token = self.tokens[self.pos] + self.token = self.parser.tokens[self.pos] return self.token.type != TokenKind.EOF def seek(self, pos): self.pos = pos - self.token = self.tokens[self.pos] - return True + self.token = self.parser.tokens[self.pos] - def rewind(self, offset, skip_whitespace=True): - self.pos += offset - self.token = self.tokens[self.pos] + def has_error(self): + return len(self.errors) > 0 - if skip_whitespace: - while self.pos > 0 and (self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE): - self.pos -= 1 - self.token = self.tokens[self.pos] + def is_locked(self): + return self.parser.pos <= self.pos or self.has_error() - def initialize(self, context, concepts_definitions): - """ - Adds a bunch of concepts, and how they can be recognized - :param context: execution context - :param concepts_definitions: dictionary of concept, concept_definition - :return: - """ + def eat_concept(self, concept, token): + if self.is_locked(): + return - self.context = context - self.sheerka = context.sheerka - concepts_to_resolve = set() + self.debug.append(concept) + self.manage_unrecognized() + for forked in self.forked: + # manage the fact that some clone may have been forked + forked.eat_concept(concept, token) - for concept, concept_def in concepts_definitions.items(): - # ## Gets the grammars - context.log(f"Resolving grammar for '{concept}'", context.who) - concept.init_key() # make sure that the key is initialized - grammar = self.get_model(concept_def, concepts_to_resolve) - self.concepts_grammars[concept] = grammar + # init + parsing_expression = self.parser.get_parsing_expression(concept) + if not isinstance(parsing_expression, ParsingExpression): + self.debug.append(concept) + error_msg = f"Failed to parse concept '{concept}'" + if parsing_expression is not None: + error_msg += f". Reason: '{parsing_expression}'" + self.errors.append(GrammarErrorNode(error_msg)) + return - if self.has_error: - return self.sheerka.ret(self.name, False, self.error_sink) + self.pos = self.parser.pos + self.token = self.parser.tokens[self.pos] - # ## Removes concepts with infinite recursions - concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve) - for concept in concepts_to_remove: - concepts_to_resolve.remove(concept) - del self.concepts_grammars[concept] - - if self.has_error: - return self.sheerka.ret(self.name, False, self.error_sink) + # parse + node = parsing_expression.parse(self) + if node is not None and node.end != -1: + self.sequence.append(self.create_concept_node(concept, node)) + self.pos = node.end + self.bnf_parsed = True else: - return self.sheerka.ret(self.name, True, self.concepts_grammars) + self.debug.append(("Rewind", token)) + self.unrecognized_tokens.add_token(token, self.parser.pos) + self.pos = self.parser.pos # reset position - def get_concept(self, concept_name): - if concept_name in self.context.concepts: - return self.context.concepts[concept_name] - return self.sheerka.get(concept_name) + def eat_unrecognized(self, token): + if self.is_locked(): + return - def get_model(self, concept_def, concepts_to_resolve): + self.debug.append(token) + self.unrecognized_tokens.add_token(token, self.parser.pos) - # TODO - # inner_get_model must not modify the initial ParsingExpression - # A copy must be created - def inner_get_model(expression): - if isinstance(expression, Concept): - if self.sheerka.isaset(self.context, expression): - ret = ConceptGroupExpression(expression, rule_name=expression.name) - else: - ret = ConceptExpression(expression, rule_name=expression.name) - concepts_to_resolve.add(expression) - elif isinstance(expression, ConceptExpression): # it includes ConceptGroupExpression - if expression.rule_name is None or expression.rule_name == "": - expression.rule_name = expression.concept.name if isinstance(expression.concept, Concept) \ - else expression.concept - if isinstance(expression.concept, str): - concept = self.get_concept(expression.concept) - if self.sheerka.is_known(concept): - expression.concept = concept - concepts_to_resolve.add(expression.concept) - ret = expression - elif isinstance(expression, str): - ret = StrMatch(expression, ignore_case=self.ignore_case) - elif isinstance(expression, StrMatch): - ret = expression - if ret.ignore_case is None: - ret.ignore_case = self.ignore_case - elif isinstance(expression, Sequence) or \ - isinstance(expression, OrderedChoice) or \ - isinstance(expression, ZeroOrMore) or \ - isinstance(expression, OneOrMore) or \ - isinstance(expression, Optional): - ret = expression - ret.nodes = [inner_get_model(e) for e in ret.elements] - else: - ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False) + def manage_unrecognized(self): + if self.unrecognized_tokens.is_empty(): + return - # Translate separator expression. - if isinstance(expression, Repetition) and expression.sep: - expression.sep = inner_get_model(expression.sep) + # do not put empty UnrecognizedToken in out + if self.unrecognized_tokens.is_whitespace(): + self.unrecognized_tokens.reset() + return - return ret + self.unrecognized_tokens.fix_source() - model = inner_get_model(concept_def) + # try to recognize concepts + nodes_sequences = builtin_helpers.get_lexer_nodes_from_unrecognized( + self.parser.context, + self.unrecognized_tokens, + PARSERS) - return model + if nodes_sequences: + instances = [self] + for i in range(len(nodes_sequences) - 1): + clone = self.clone() + instances.append(clone) + self.forked.append(clone) - def detect_infinite_recursion(self, concepts_to_resolve): + for instance, node_sequence in zip(instances, nodes_sequences): + for node in node_sequence: + instance.sequence.append(node) + if isinstance(node, UnrecognizedTokensNode) or \ + hasattr(node, "unrecognized_tokens") and node.unrecognized_tokens: + instance.has_unrecognized = True + instance.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) - # infinite recursion matcher - def _is_infinite_recursion(ref_concept, node): - if isinstance(node, ConceptExpression): - if node.concept == ref_concept: - return True - - if isinstance(node.concept, str): - to_match = self.get_concept(node.concept) - if self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT): - return False - else: - to_match = node.concept - - if to_match not in self.concepts_grammars: - return False - - return _is_infinite_recursion(ref_concept, self.concepts_grammars[to_match]) - - if isinstance(node, OrderedChoice): - return _is_infinite_recursion(ref_concept, node.nodes[0]) - - if isinstance(node, Sequence): - for node in node.nodes: - if _is_infinite_recursion(ref_concept, node): - return True - return False - - return False - - removed_concepts = [] - for e in concepts_to_resolve: - if isinstance(e, str): - e = self.get_concept(e) - if self.sheerka.isinstance(e, BuiltinConcepts.UNKNOWN_CONCEPT): - continue - - if e not in self.concepts_grammars: - continue - - to_resolve = self.concepts_grammars[e] - if _is_infinite_recursion(e, to_resolve): - removed_concepts.append(e) - return removed_concepts - - def parse(self, context, parser_input): - if parser_input == "": - return context.sheerka.ret( - self.name, - False, - context.sheerka.new(BuiltinConcepts.IS_EMPTY) - ) - - if not self.reset_parser(context, parser_input): - return self.sheerka.ret( - self.name, - False, - context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) - - concepts_found = [[]] - unrecognized_tokens = None - has_unrecognized = False - - # actually list of list - # The first dimension is the number of possibilities found - # The second dimension is the number of concepts found, under one possibility - # - # Example 1 - # concept foo : 'one' 'two' - # concept bar : 'one' 'two' - # input 'one two' -> will produce two possibilities (foo and bar). - # - # Example 2 - # concept foo : 'one' - # concept bar : 'two' - # input 'one two' -> will produce one possibility which is (foo, bar) (foo then bar) - - while True: - init_pos = self.pos - res = [] - - for concept, grammar in self.concepts_grammars.items(): - self.seek(init_pos) - node = grammar.parse(self) # a node is TerminalNode or NonTerminalNode - if node is not None and node.end != -1: - updated_concept = self.finalize_concept(context.sheerka, concept, node) - concept_node = ConceptNode( - updated_concept, - node.start, - node.end, - self.tokens[node.start: node.end + 1], - None, - node) - res.append(concept_node) - - if len(res) == 0: # not recognized - self.seek(init_pos) - if unrecognized_tokens: - unrecognized_tokens.add_token(self.get_token(), init_pos) - else: - unrecognized_tokens = UnrecognizedTokensNode(init_pos, init_pos, [self.get_token()]) - - if not self.next_token(False): - break - - else: # some concepts are recognized - if unrecognized_tokens and unrecognized_tokens.not_whitespace(): - unrecognized_tokens.fix_source() - concepts_found = core.utils.product(concepts_found, [unrecognized_tokens]) - has_unrecognized = True - unrecognized_tokens = None - - res = self.get_bests(res) # only keep the concepts that eat the more tokens - concepts_found = core.utils.product(concepts_found, res) - - # loop - self.seek(res[0].end) - if not self.next_token(False): - break - - # Fix the source for unrecognized tokens - if unrecognized_tokens and unrecognized_tokens.not_whitespace(): - unrecognized_tokens.fix_source() - concepts_found = core.utils.product(concepts_found, [unrecognized_tokens]) - has_unrecognized = True - - # else - # returns as many ReturnValue than choices found - ret = [] - for choice in concepts_found: - ret.append( - self.sheerka.ret( - self.name, - not has_unrecognized, - self.sheerka.new( - BuiltinConcepts.PARSER_RESULT, - parser=self, - source=parser_input, - body=choice, - try_parsed=choice))) - - if len(ret) == 1: - self.log_result(context, parser_input, ret[0]) - return ret[0] else: - self.log_multiple_results(context, parser_input, ret) - return ret + self.sequence.append(self.unrecognized_tokens) + self.has_unrecognized = True - def finalize_concept(self, sheerka, template, underlying, init_empty_body=True): + # create another instance + self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) + + def clone(self): + clone = BnfConceptParserHelper(self.parser) + clone.debug = self.debug[:] + self.errors = self.errors[:] + clone.sequence = self.sequence[:] + clone.pos = self.pos + clone.unrecognized_tokens = self.unrecognized_tokens.clone() + return clone + + def finalize(self): + if self.bnf_parsed > 0: + self.manage_unrecognized() + for forked in self.forked: + # manage that some clones may have been forked + forked.finalize() + + def create_concept_node(self, template, underlying): + sheerka = self.parser.context.sheerka + key = (template.key, template.id) if template.id else template.key + concept = sheerka.new(key) + concept = self.finalize_concept(sheerka, concept, underlying) + concept_node = ConceptNode( + concept, + underlying.start, + underlying.end, + self.parser.tokens[underlying.start: underlying.end + 1], + None, + underlying) + return concept_node + + def finalize_concept(self, sheerka, concept, underlying, init_empty_body=True): """ Updates the properties of the concept Goes in recursion if the property is a concept @@ -798,7 +687,8 @@ class BnfNodeParser(BaseParser): result = _underlying_value_cache[id(concept_match_node)] else: ref_tpl = concept_match_node.parsing_expression.concept - result = self.finalize_concept(sheerka, ref_tpl, concept_match_node.children[0], init_empty_body) + new = sheerka.new_from_template(ref_tpl, ref_tpl.key) + result = self.finalize_concept(sheerka, new, concept_match_node.children[0], init_empty_body) _underlying_value_cache[id(concept_match_node)] = result else: # the value is a string @@ -816,8 +706,6 @@ class BnfNodeParser(BaseParser): for child in _underlying.children: _process_rule_name(_concept, child) - key = (template.key, template.id) if template.id else template.key - concept = sheerka.new(key) if init_empty_body and concept.metadata.body is None: value = _get_underlying_value(underlying) concept.compiled[ConceptParts.BODY] = value @@ -831,81 +719,271 @@ class BnfNodeParser(BaseParser): return concept - def encode_grammar(self, grammar): - """ - Transform the grammar into something that can easily can be serialized - :param grammar: - :return: - """ - def _encode(expression): - if isinstance(expression, StrMatch): - res = f"'{expression.to_match}'" +class BnfNodeParser(BaseNodeParser): + def __init__(self, **kwargs): + super().__init__("BnfNode", 50, **kwargs) - elif isinstance(expression, ConceptExpression): - res = core.utils.str_concept(expression.concept) + if 'sheerka' in kwargs: + sheerka = kwargs.get("sheerka") + self.concepts_grammars = sheerka.concepts_grammars + else: + self.concepts_grammars = Cache() - elif isinstance(expression, Sequence): - res = "(" + " ".join(_encode(c) for c in expression.nodes) + ")" - - elif isinstance(expression, OrderedChoice): - res = "(" + "|".join(_encode(c) for c in expression.nodes) + ")" - - elif isinstance(expression, Optional): - res = _encode(expression.nodes[0]) + "?" - - elif isinstance(expression, ZeroOrMore): - res = _encode(expression.nodes[0]) + "*" - - elif isinstance(expression, OneOrMore): - res = _encode(expression.nodes[0]) + "+" - - if expression.rule_name: - res += "=" + expression.rule_name - - return res - - result = {} - for k, v in grammar.items(): - key = core.utils.str_concept(k) - value = _encode(v) - result[key] = value - return result + self.ignore_case = True @staticmethod - def get_bests(results): + def _is_eligible(concept): """ - Returns the result that is the longest - :param results: + Predicate that select concepts that must handled by AtomNodeParser + :param concept: :return: """ - by_end_pos = defaultdict(list) - for result in results: - by_end_pos[result.end].append(result) + return concept.metadata.definition_type == DEFINITION_TYPE_BNF - return by_end_pos[max(by_end_pos)] + @staticmethod + def get_valid(parsers_helpers): + valid_parser_helpers = [] + for parser_helper in parsers_helpers: + if not parser_helper.bnf_parsed or parser_helper.has_error(): + continue + if parser_helper in valid_parser_helpers: + continue -class ParsingExpressionVisitor: - """ - visit ParsingExpression - """ + valid_parser_helpers.append(parser_helper) - def visit(self, parsing_expression): - name = parsing_expression.__class__.__name__ + return valid_parser_helpers - method = 'visit_' + name - visitor = getattr(self, method, self.generic_visit) - return visitor(parsing_expression) + def get_concepts_sequences(self): + def _add_forked_to_concept_parser_helpers(): + # check that if some new InfixToPostfix are created + for parser in concept_parser_helpers: + if len(parser.forked) > 0: + forked.extend(parser.forked) + parser.forked.clear() + if len(forked) > 0: + concept_parser_helpers.extend(forked) + forked.clear() - def generic_visit(self, parsing_expression): - if hasattr(self, "visit_all"): - self.visit_all(parsing_expression) + def _get_longest(parser_helpers): + # when there is a match with several concepts + # on keep the ones that eat the more tokens + by_end_pos = defaultdict(list) + for helper in parser_helpers: + by_end_pos[helper.pos].append(helper) + + return by_end_pos[max(by_end_pos)] + + forked = [] + + concept_parser_helpers = [BnfConceptParserHelper(self)] + + while self.next_token(False): + + token = self.get_token() + + try: + concepts = self.get_concepts(token, self._is_eligible, strip_quotes=False) + + if not concepts: + for concept_parser in concept_parser_helpers: + concept_parser.eat_unrecognized(token) + continue + + if len(concepts) == 1: + for concept_parser in concept_parser_helpers: + concept_parser.eat_concept(concepts[0], token) + continue + + # make the cartesian product + temp_res = [] + for concept_parser in concept_parser_helpers: + if concept_parser.is_locked(): + # It means that it already eat the token + # so simply add it, do not clone + temp_res.append(concept_parser) + continue + + for concept in concepts: + clone = concept_parser.clone() + temp_res.append(clone) + clone.eat_concept(concept, token) + + # only keep the longest + concept_parser_helpers = _get_longest(temp_res) + + finally: + _add_forked_to_concept_parser_helpers() + + # make sure that remaining items in stack are moved to out + for concept_parser in concept_parser_helpers: + concept_parser.finalize() + _add_forked_to_concept_parser_helpers() + + return concept_parser_helpers + + def get_parsing_expression(self, concept, already_seen=None): + if concept.id in self.concepts_grammars: + return self.concepts_grammars.get(concept.id) + + if not concept.bnf: + BaseNodeParser.ensure_bnf(self.context, concept, self.name) + + expression = concept.bnf + desc = f"Resolving parsing expression {expression}" + with self.context.push(self.name, obj=concept, desc=desc) as sub_context: + sub_context.add_inputs(expression=expression) + resolved = self.resolve_parsing_expression(expression, already_seen or set()) + sub_context.add_values(return_values=resolved) + + self.concepts_grammars.put(concept.id, resolved) + + if self.has_error: + return None + + return self.concepts_grammars.get(concept.id) + + def resolve_parsing_expression(self, parsing_expression, already_seen): + + def inner_resolve(expression, inner_already_seen): + # if isinstance(expression, Concept): + # if self.sheerka.isaset(self.context, expression): + # ret = ConceptGroupExpression(expression, rule_name=expression.name) + # else: + # ret = ConceptExpression(expression, rule_name=expression.name) + # possible_recursion.add(expression) + if isinstance(expression, str): + ret = StrMatch(expression, ignore_case=self.ignore_case) + + elif not isinstance(expression, ParsingExpression): + return expression # escalate the error + + elif isinstance(expression, ConceptExpression): + concept = self.get_concept(expression.concept) + if concept in inner_already_seen: + return self.sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG, body=concept) + expression.concept = concept + inner_already_seen.add(concept) + + if not self.sheerka.is_known(concept): + unknown_concept = self.sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=concept) + return self.add_error(unknown_concept) + + # bnf concept + elif concept.metadata.definition_type == DEFINITION_TYPE_BNF: + pe = self.get_parsing_expression(concept, inner_already_seen) + + elif self.sheerka.isaset(self.context, concept): + concepts_in_group = self.sheerka.get_set_elements(self.context, concept) + nodes = [ConceptExpression(c, rule_name=c.name) for c in concepts_in_group] + pe = inner_resolve(OrderedChoice(*nodes), inner_already_seen) + + else: + # regular concepts + tokens = Tokenizer(concept.name) + nodes = [StrMatch(core.utils.strip_quotes(token.value)) for token in list(tokens)[:-1]] + pe = inner_resolve(nodes[0] if len(nodes) == 1 else Sequence(nodes), inner_already_seen) + + if not isinstance(pe, ParsingExpression): + return pe + expression.nodes = [pe] + expression.rule_name = expression.rule_name or concept.name + ret = expression + + elif isinstance(expression, StrMatch): + ret = expression + if ret.ignore_case is None: + ret.ignore_case = self.ignore_case + + elif isinstance(expression, Sequence) or \ + isinstance(expression, OrderedChoice) or \ + isinstance(expression, ZeroOrMore) or \ + isinstance(expression, OneOrMore) or \ + isinstance(expression, Optional): + ret = expression + ret.nodes = [] + for e in ret.elements: + pe = inner_resolve(e, already_seen.copy()) + if not isinstance(pe, ParsingExpression): + return pe + ret.nodes.append(pe) - for node in parsing_expression.elements: - if isinstance(node, Concept): - self.visit(ConceptExpression(node.key or node.name)) - elif isinstance(node, str): - self.visit(StrMatch(node)) else: - self.visit(node) + ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False) + + # Translate separator expression. + if isinstance(expression, Repetition) and expression.sep: + expression.sep = inner_resolve(expression.sep, already_seen) + + return ret + + parsing_expression = inner_resolve(parsing_expression, already_seen) + return parsing_expression + + def get_concept(self, concept): + if isinstance(concept, Concept): + return concept + + if concept in self.context.concepts: + return self.context.concepts[concept] + return self.sheerka.get_by_key(concept) + + def parse(self, context, parser_input): + """ + parser_input can be string, but text can also be an list of tokens + :param context: + :param parser_input: + :return: + """ + + context.log(f"Parsing '{parser_input}' with BnfNode", self.name) + sheerka = context.sheerka + + if parser_input == "" or isinstance(parser_input, list) and len(parser_input) == 0: + return sheerka.ret(self.name, + False, + sheerka.new(BuiltinConcepts.NOT_FOR_ME, + body=parser_input, + reason=BuiltinConcepts.IS_EMPTY)) + + if not self.reset_parser(context, parser_input): + return self.sheerka.ret( + self.name, + False, + context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) + + sequences = self.get_concepts_sequences() + valid_parser_helpers = self.get_valid(sequences) + if valid_parser_helpers is None: + # token error + return self.sheerka.ret( + self.name, + False, + context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) + + if len(valid_parser_helpers) == 0: + return self.sheerka.ret( + self.name, + False, + context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input)) + + ret = [] + for parser_helper in valid_parser_helpers: + ret.append( + self.sheerka.ret( + self.name, + not parser_helper.has_unrecognized, + self.sheerka.new( + BuiltinConcepts.PARSER_RESULT, + parser=self, + source=parser_input, + body=parser_helper.sequence, + try_parsed=parser_helper.sequence))) + + if len(ret) == 1: + self.log_result(context, parser_input, ret[0]) + return ret[0] + else: + self.log_multiple_results(context, parser_input, ret) + return ret diff --git a/src/parsers/BnfParser.py b/src/parsers/BnfParser.py index 1d3dc6d..18e7141 100644 --- a/src/parsers/BnfParser.py +++ b/src/parsers/BnfParser.py @@ -6,7 +6,7 @@ from core.sheerka.Sheerka import ExecutionContext from core.tokenizer import Tokenizer, Token, TokenKind, LexerError from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode from parsers.BnfNodeParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, \ - StrMatch, ConceptGroupExpression + StrMatch @dataclass() @@ -234,8 +234,9 @@ class BnfParser(BaseParser): if token.type == TokenKind.CONCEPT: self.next_token() concept = self.sheerka.new((token.value[0], token.value[1])) - expr = ConceptGroupExpression(concept) if self.sheerka.isaset(self.context, concept) \ - else ConceptExpression(concept) + expr = ConceptExpression(concept) + # expr = ConceptGroupExpression(concept) if self.sheerka.isaset(self.context, concept) \ + # else ConceptExpression(concept) return self.eat_rule_name_if_needed(expr) if token.type == TokenKind.IDENTIFIER: @@ -259,8 +260,7 @@ class BnfParser(BaseParser): body=("key", concept_name))) return None else: - expr = ConceptGroupExpression(concept) if self.sheerka.isaset(self.context, concept) \ - else ConceptExpression(concept) + expr = ConceptExpression(concept) expr.rule_name = concept.name return self.eat_rule_name_if_needed(expr) diff --git a/src/parsers/ConceptsWithConceptsParser.py b/src/parsers/ConceptsWithConceptsParser.py deleted file mode 100644 index 5ba3543..0000000 --- a/src/parsers/ConceptsWithConceptsParser.py +++ /dev/null @@ -1,109 +0,0 @@ -# try to match something like -# ConceptNode 'plus' ConceptNode -# -# Replaced by SyaNodeParser -from core.builtin_concepts import BuiltinConcepts -from core.tokenizer import TokenKind, Token -from parsers.BaseNodeParser import SourceCodeNode -from parsers.BaseParser import BaseParser -from parsers.BnfNodeParser import ConceptNode, UnrecognizedTokensNode -from parsers.MultipleConceptsParser import MultipleConceptsParser -from core.concept import VARIABLE_PREFIX - -multiple_concepts_parser = MultipleConceptsParser() - - -class ConceptsWithConceptsParser(BaseParser): - def __init__(self, **kwargs): - super().__init__("ConceptsWithConcepts", 25) - self.enabled = False - - @staticmethod - def get_tokens(nodes): - tokens = [] - - for node in nodes: - if isinstance(node, ConceptNode): - index, line, column = node.tokens[0].index, node.tokens[0].line, node.tokens[0].column - tokens.append(Token(TokenKind.CONCEPT, node.concept, index, line, column)) - else: - for token in node.tokens: - if token.type == TokenKind.EOF: - break - elif token.type in (TokenKind.NEWLINE, TokenKind.WHITESPACE): - continue - else: - tokens.append(token) - - return tokens - - @staticmethod - def get_key(nodes): - key = "" - index = 0 - for node in nodes: - if key: - key += " " - - if isinstance(node, UnrecognizedTokensNode): - key += node.source.strip() - else: - key += f"{VARIABLE_PREFIX}{index}" - index += 1 - - return key - - def finalize_concept(self, context, concept, nodes): - index = 0 - for node in nodes: - - if isinstance(node, ConceptNode): - prop_name = list(concept.props.keys())[index] - concept.compiled[prop_name] = node.concept - context.log( - f"Setting property '{prop_name}='{node.concept}'.", - self.name) - index += 1 - elif isinstance(node, SourceCodeNode): - prop_name = list(concept.props.keys())[index] - sheerka = context.sheerka - value = sheerka.new(BuiltinConcepts.PARSER_RESULT, parser=self, source=node.source, body=node.node) - concept.compiled[prop_name] = [context.sheerka.ret(self.name, True, value)] - context.log( - f"Setting property '{prop_name}'='Python({node.source})'.", - self.name) - index += 1 - - return concept - - def parse(self, context, parser_input): - sheerka = context.sheerka - nodes = self.get_input_as_lexer_nodes(parser_input, multiple_concepts_parser) - if not nodes: - return None - - concept_key = self.get_key(nodes) - concept = sheerka.new(concept_key) - if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT): - return sheerka.ret( - self.name, - False, - sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.body)) - - concepts = concept if hasattr(concept, "__iter__") else [concept] - for concept in concepts: - self.finalize_concept(context, concept, nodes) - - res = [] - for concept in concepts: - res.append(sheerka.ret( - self.name, - True, - sheerka.new( - BuiltinConcepts.PARSER_RESULT, - parser=self, - source=parser_input.source, - body=concept, - try_parsed=None))) - - return res[0] if len(res) == 1 else res diff --git a/src/parsers/DefaultParser.py b/src/parsers/DefaultParser.py index dc22b67..e250d1b 100644 --- a/src/parsers/DefaultParser.py +++ b/src/parsers/DefaultParser.py @@ -384,7 +384,8 @@ class DefaultParser(BaseParser): return None, NotInitializedNode() regex_parser = BnfParser() - with self.context.push(self.name, obj=current_concept_def) as sub_context: + desc = f"Resolving BNF {current_concept_def.definition}" + with self.context.push(self.name, obj=current_concept_def, desc=desc) as sub_context: parsing_result = regex_parser.parse(sub_context, tokens) sub_context.add_values(return_values=parsing_result) diff --git a/src/parsers/ExactConceptParser.py b/src/parsers/ExactConceptParser.py index 10dc09d..7eff3e3 100644 --- a/src/parsers/ExactConceptParser.py +++ b/src/parsers/ExactConceptParser.py @@ -1,9 +1,9 @@ import logging from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts -from parsers.BaseParser import BaseParser -from core.tokenizer import Tokenizer, Keywords, TokenKind, LexerError from core.concept import VARIABLE_PREFIX +from core.tokenizer import Keywords, TokenKind, LexerError +from parsers.BaseParser import BaseParser class ExactConceptParser(BaseParser): @@ -11,10 +11,11 @@ class ExactConceptParser(BaseParser): Tries to recognize a single concept """ - MAX_WORDS_SIZE = 10 + MAX_WORDS_SIZE = 3 - def __init__(self, **kwargs): + def __init__(self, max_word_size=None, **kwargs): BaseParser.__init__(self, "ExactConcept", 80) + self.max_word_size = max_word_size def parse(self, context, parser_input): """ @@ -33,11 +34,11 @@ class ExactConceptParser(BaseParser): context.log(f"Error found in tokenizer {e}", self.name) return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=e)) - if len(words) > self.MAX_WORDS_SIZE: + if len(words) > (self.max_word_size or self.MAX_WORDS_SIZE): context.log(f"Max words reached. Stopping.", self.name) return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=parser_input)) - recognized = False + recognized = [] # keep track of the concepts founds for combination in self.combinations(words): concept_key = " ".join(combination) @@ -49,16 +50,23 @@ class ExactConceptParser(BaseParser): concepts = result if isinstance(result, list) else [result] for concept in concepts: + if concept.id in recognized: + context.log(f"Recognized concept {concept} again. Skipping.", self.name) + # example + # if the input is foo a and a concept is defined as foo a + # The will be two matches. One for 'foo a' and 'foo _var_0' + # but it's the same concept foo a + continue + context.log(f"Recognized concept {concept}.", self.name) # update the properties if needed - need_validation = False for i, token in enumerate(combination): if token.startswith(VARIABLE_PREFIX): index = int(token[len(VARIABLE_PREFIX):]) - concept.def_prop_by_index(index, words[i]) + concept.def_var_by_index(index, words[i]) concept.metadata.need_validation = True if self.verbose_log.isEnabledFor(logging.DEBUG): - prop_name = list(concept.props.keys())[index] + prop_name = concept.metadata.variables[index][0] context.log( f"Added property {index}: {prop_name}='{words[i]}'.", self.name) @@ -69,12 +77,13 @@ class ExactConceptParser(BaseParser): context.sheerka.new( BuiltinConcepts.PARSER_RESULT, parser=self, - source=parser_input if isinstance(parser_input, str) else self.get_text_from_tokens(parser_input), + source=parser_input if isinstance(parser_input, str) else self.get_text_from_tokens( + parser_input), body=concept, try_parsed=concept))) - recognized = True + recognized.append(concept.id) - if recognized: + if len(recognized) > 0: if len(res) == 1: self.log_result(context, parser_input, res[0]) else: diff --git a/src/parsers/ExplainParser.py b/src/parsers/ExplainParser.py index 5493b28..88b1d24 100644 --- a/src/parsers/ExplainParser.py +++ b/src/parsers/ExplainParser.py @@ -318,7 +318,7 @@ class ExplainParser(BaseSplitIterParser): def parse(self, context, parser_input): """ - text can be string, but text can also be an list of tokens + parser_input can be string, but text can also be an list of tokens :param context: :param parser_input: :return: diff --git a/src/parsers/MultipleConceptsParser.py b/src/parsers/MultipleConceptsParser.py deleted file mode 100644 index e02c6c2..0000000 --- a/src/parsers/MultipleConceptsParser.py +++ /dev/null @@ -1,163 +0,0 @@ -# to be replaced by SyaNodeParser -import ast - -from core.builtin_concepts import BuiltinConcepts -from core.tokenizer import TokenKind -from parsers.BaseNodeParser import SourceCodeNode -from parsers.BaseParser import BaseParser -from parsers.BnfNodeParser import BnfNodeParser, UnrecognizedTokensNode, ConceptNode -import core.utils -from parsers.PythonParser import PythonParser - -concept_lexer_parser = BnfNodeParser() - - -class MultipleConceptsParser(BaseParser): - """ - Parser that will take the result of BnfNodeParser and - try to resolve the unrecognized tokens token by token - - It is a success when it returns a list ConceptNode exclusively - """ - - def __init__(self, **kwargs): - BaseParser.__init__(self, "MultipleConcepts", 45) - self.enabled = False - - @staticmethod - def finalize(nodes_found, unrecognized_tokens): - if not unrecognized_tokens: - return nodes_found, unrecognized_tokens - - unrecognized_tokens.fix_source() - if unrecognized_tokens.not_whitespace(): - nodes_found = core.utils.product(nodes_found, [unrecognized_tokens]) - - return nodes_found, None - - @staticmethod - def create_or_add(unrecognized_tokens, token, index): - if unrecognized_tokens: - unrecognized_tokens.add_token(token, index) - else: - unrecognized_tokens = UnrecognizedTokensNode(index, index, [token]) - return unrecognized_tokens - - def parse(self, context, parser_input): - sheerka = context.sheerka - nodes = self.get_input_as_lexer_nodes(parser_input, concept_lexer_parser) - if not nodes: - return None - - nodes_found = [[]] - concepts_only = True - - for node in nodes: - if isinstance(node, UnrecognizedTokensNode): - unrecognized_tokens = None - i = 0 - - while i < len(node.tokens): - - token_index = node.start + i - token = node.tokens[i] - - concepts_nodes = self.get_concepts_nodes(context, token_index, token) - if concepts_nodes is not None: - nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens) - nodes_found = core.utils.product(nodes_found, concepts_nodes) - i += 1 - continue - - source_code_node = self.get_source_code_node(context, token_index, node.tokens[i:]) - if source_code_node: - nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens) - nodes_found = core.utils.product(nodes_found, [source_code_node]) - i += len(source_code_node.tokens) - continue - - # not a concept nor some source code - unrecognized_tokens = self.create_or_add(unrecognized_tokens, token, token_index) - concepts_only &= token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE) - i += 1 - - # finish processing if needed - nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens) - - else: - nodes_found = core.utils.product(nodes_found, [node]) - - ret = [] - for choice in nodes_found: - ret.append( - sheerka.ret( - self.name, - concepts_only, - sheerka.new( - BuiltinConcepts.PARSER_RESULT, - parser=self, - source=parser_input.source, - body=choice, - try_parsed=None)) - ) - - if len(ret) == 1: - self.log_result(context, parser_input.source, ret[0]) - return ret[0] - else: - self.log_multiple_results(context, parser_input.source, ret) - return ret - - @staticmethod - def get_concepts_nodes(context, index, token): - """ - Tries to recognize a concept - from the univers of all known concepts - """ - - if token.type != TokenKind.IDENTIFIER: - return None - - concept = context.new_concept(token.value) - if hasattr(concept, "__iter__") or context.sheerka.is_known(concept): - concepts = concept if hasattr(concept, "__iter__") else [concept] - concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts] - return concepts_nodes - - return None - - @staticmethod - def get_source_code_node(context, index, tokens): - """ - Tries to recognize source code. - For the time being, only Python is supported - :param context: - :param tokens: - :param index: - :return: - """ - - if len(tokens) == 0 or (len(tokens) == 1 and tokens[0].type == TokenKind.EOF): - return None - - end_index = len(tokens) - while end_index > 0: - parser = PythonParser() - tokens_to_parse = tokens[:end_index] - res = parser.parse(context, tokens_to_parse) - if res.status: - # only expression are accepted - ast_ = res.value.value.ast_ - if not isinstance(ast_, ast.Expression): - return None - try: - compiled = compile(ast_, "", "eval") - eval(compiled, {}, {}) - except Exception: - return None - - source = BaseParser.get_text_from_tokens(tokens_to_parse) - return SourceCodeNode(res.value.value, index, index + end_index - 1, tokens_to_parse, source) - end_index -= 1 - - return None diff --git a/src/parsers/PythonParser.py b/src/parsers/PythonParser.py index 56fa525..2a9b1f2 100644 --- a/src/parsers/PythonParser.py +++ b/src/parsers/PythonParser.py @@ -1,11 +1,11 @@ -from core.builtin_concepts import BuiltinConcepts, ParserResultConcept -from core.tokenizer import Tokenizer, LexerError, TokenKind -from parsers.BaseParser import BaseParser, Node, ErrorNode -from dataclasses import dataclass import ast import logging -import core.utils +from dataclasses import dataclass +import core.utils +from core.builtin_concepts import BuiltinConcepts +from core.tokenizer import LexerError, TokenKind +from parsers.BaseParser import BaseParser, Node, ErrorNode from parsers.BnfNodeParser import ConceptNode log = logging.getLogger(__name__) diff --git a/src/parsers/PythonWithConceptsParser.py b/src/parsers/PythonWithConceptsParser.py index 131a21a..52b939a 100644 --- a/src/parsers/PythonWithConceptsParser.py +++ b/src/parsers/PythonWithConceptsParser.py @@ -1,7 +1,6 @@ from core.builtin_concepts import BuiltinConcepts from parsers.BaseParser import BaseParser from parsers.BnfNodeParser import ConceptNode -from parsers.MultipleConceptsParser import MultipleConceptsParser from parsers.PythonParser import PythonParser from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser diff --git a/src/parsers/SyaNodeParser.py b/src/parsers/SyaNodeParser.py index a6bb839..5820a28 100644 --- a/src/parsers/SyaNodeParser.py +++ b/src/parsers/SyaNodeParser.py @@ -1,4 +1,3 @@ -import copy from collections import namedtuple from dataclasses import dataclass, field from typing import List @@ -7,10 +6,10 @@ from core import builtin_helpers from core.builtin_concepts import BuiltinConcepts from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF from core.sheerka.ExecutionContext import ExecutionContext -from core.tokenizer import LexerError, Token, TokenKind +from core.tokenizer import Token, TokenKind from parsers.BaseNodeParser import UnrecognizedTokensNode, ConceptNode, SourceCodeNode, SyaAssociativity, \ - SourceCodeWithConceptNode -from parsers.BaseParser import BaseParser, ErrorNode + SourceCodeWithConceptNode, BaseNodeParser +from parsers.BaseParser import ErrorNode, UnexpectedTokenErrorNode PARSERS = ["BnfNode", "AtomNode", "Python"] @@ -116,13 +115,13 @@ class SyaConceptParserHelper: return len(self.expected) == 0 def is_atom(self): - return len(self.concept.concept.metadata.props) == 0 and len(self.expected) == 0 + return len(self.concept.concept.metadata.variables) == 0 and len(self.expected) == 0 def is_expected(self, token): if self.is_matched(): return False - token_value = self._get_token_value(token) + token_value = BaseNodeParser.get_token_value(token) for expected in self.expected: if not expected.startswith(VARIABLE_PREFIX) and expected == token_value: @@ -139,7 +138,7 @@ class SyaConceptParserHelper: # return True is a whole sequence of keyword is eaten # example - # Concept("foo a bar baz qux b").def_prop("a").def_prop("b") + # Concept("foo a bar baz qux b").def_var("a").def_var("b") # 'bar' is just eaten. We will return False because 'baz' and 'qux' are still waiting if len(self.expected) == 0: return True @@ -169,14 +168,14 @@ class SyaConceptParserHelper: self.concept = self.concept.concept return self - @staticmethod - def _get_token_value(token): - if token.type == TokenKind.STRING: - return token.value[1:-1] - elif token.type == TokenKind.KEYWORD: - return token.value.value - else: - return token.value + # @staticmethod + # def _get_token_value(token): + # if token.type == TokenKind.STRING: + # return token.value[1:-1] + # elif token.type == TokenKind.KEYWORD: + # return token.value.value + # else: + # return token.value def clone(self): clone = SyaConceptParserHelper(self.concept, self.start, self.end) @@ -215,7 +214,10 @@ class InFixToPostFix: if not isinstance(other, InFixToPostFix): return False - return self.out == other.out + return self.out == other.out and self.errors == other.errors + + def __hash__(self): + return len(self.sequence) + len(self.errors) def _add_error(self, error): self.errors.append(error) @@ -396,6 +398,7 @@ class InFixToPostFix: del current_concept.expected[0] def manage_unrecognized(self): + if self.unrecognized_tokens.is_empty(): return @@ -514,10 +517,10 @@ class InFixToPostFix: def handle_expected_token(self, token, pos): """ True if the token is part of the concept being parsed and the last token in a sequence is eaten - Example : Concept("foo a bar b").def_prop("a").def_prop("b") + Example : Concept("foo a bar b").def_var("a").def_var("b") The expected tokens are 'foo' and 'bar' (as a and b are parameters) - Example: Concept("foo a bar baz b").def_prop("a").def_prop("b") + Example: Concept("foo a bar baz b").def_var("a").def_var("b") If the token is 'bar', it will be eaten but handle_expected_token() will return False as we still expect 'baz' :param token: @@ -565,6 +568,18 @@ class InFixToPostFix: return True + # else: + # if token.type != TokenKind.WHITESPACE: + # # hack, because whitespaces are not correctly parsed in self.expected + # # KSI 2020/04/25 + # # I no longer understand why we are in a loop (the reverse one) + # # if we are parsing a concept and the expected token does not match + # # The whole class should be in error + # self._add_error(UnexpectedTokenErrorNode( + # f"Failed to parse '{current_concept.concept.concept}'", + # token, current_concept.expected)) + # return False + return False def eat_token(self, token, pos): @@ -581,7 +596,7 @@ class InFixToPostFix: if self.handle_expected_token(token, pos): # a token is found, let's check if it's part of a concepts being parsed - # example Concept(name="foo", definition="foo a bar b").def_prop("a").def_prop("b") + # example Concept(name="foo", definition="foo a bar b").def_var("a").def_var("b") # if the token 'bar' is found, it has to be considered as part of the concept foo self.debug.append(token) return True @@ -780,16 +795,13 @@ class PostFixToItem: has_unrecognized: bool -class SyaNodeParser(BaseParser): +class SyaNodeParser(BaseNodeParser): + def __init__(self, **kwargs): - BaseParser.__init__(self, "SyaNode", 50) + super().__init__("SyaNode", 50, **kwargs) if 'sheerka' in kwargs: sheerka = kwargs.get("sheerka") - self.concepts_by_first_keyword = sheerka.concepts_by_first_keyword - self.sya_definitions = {} - if sheerka.sya_definitions: - for k, v in sheerka.sya_definitions.items(): - self.sya_definitions[k] = (v[0], SyaAssociativity(v[1])) + self.sya_definitions = sheerka.resolved_sya_def else: self.concepts_by_first_keyword = {} @@ -803,104 +815,133 @@ class SyaNodeParser(BaseParser): self.text = None self.sheerka = None - def reset_parser(self, context, text): - self.context = context - self.sheerka = context.sheerka - self.text = text - - try: - self.tokens = list(self.get_input_as_tokens(text)) - except LexerError as e: - self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False) - return False - - self.token = None - self.pos = -1 - return True - - def add_error(self, error, next_token=True): - self.error_sink.append(error) - if next_token: - self.next_token() - return error - - def get_token(self) -> Token: - return self.token - - def next_token(self, skip_whitespace=True): - if self.token and self.token.type == TokenKind.EOF: - return False - - self.pos += 1 - self.token = self.tokens[self.pos] - - if skip_whitespace: - while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE: - self.pos += 1 - self.token = self.tokens[self.pos] - - return self.token.type != TokenKind.EOF - - def initialize(self, context, concepts=None, sya_definitions=None): - self.context = context - self.sheerka = context.sheerka + def init_from_concepts(self, context, concepts, **kwargs): + super().init_from_concepts(context, concepts) + sya_definitions = kwargs.get("sya", None) if sya_definitions: self.sya_definitions = sya_definitions - if concepts: - for concept in concepts: - keywords = concept.key.split() - for keyword in keywords: - if keyword.startswith(VARIABLE_PREFIX): - continue - - self.concepts_by_first_keyword.setdefault(keyword, []).append(concept.id) - break - - return self.sheerka.ret(self.name, True, self.concepts_by_first_keyword) - - def get_concepts(self, token): + @staticmethod + def _is_eligible(concept): """ - Tries to find if there are concepts that match the value of the token - :param token: + Predicate that select concepts that must handled by AtomNodeParser + :param concept: :return: """ + # We only concepts that has parameter (refuse atoms) + # Bnf definitions are not supposed to be managed by this parser either + return len(concept.metadata.variables) > 0 and concept.metadata.definition_type != DEFINITION_TYPE_BNF - if token.type == TokenKind.STRING: - name = token.value[1:-1] - elif token.type == TokenKind.KEYWORD: - name = token.value.value - else: - name = token.value + @staticmethod + def _get_sya_concept_def(parser, concept): + sya_concept_def = SyaConceptDef(concept) + if concept.id in parser.sya_definitions: + sya_def = parser.sya_definitions.get(concept.id) + if sya_def[0] is not None: + sya_concept_def.precedence = sya_def[0] + if sya_def[1] is not None: + sya_concept_def.associativity = sya_def[1] + return sya_concept_def - result = [] - if name in self.concepts_by_first_keyword: - for concept_id in self.concepts_by_first_keyword[name]: + # def reset_parser(self, context, text): + # self.context = context + # self.sheerka = context.sheerka + # self.text = text + # + # try: + # self.tokens = list(self.get_input_as_tokens(text)) + # except LexerError as e: + # self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False) + # return False + # + # self.token = None + # self.pos = -1 + # return True + # + # def add_error(self, error, next_token=True): + # self.error_sink.append(error) + # if next_token: + # self.next_token() + # return error + # + # def get_token(self) -> Token: + # return self.token + # + # def next_token(self, skip_whitespace=True): + # if self.token and self.token.type == TokenKind.EOF: + # return False + # + # self.pos += 1 + # self.token = self.tokens[self.pos] + # + # if skip_whitespace: + # while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE: + # self.pos += 1 + # self.token = self.tokens[self.pos] + # + # return self.token.type != TokenKind.EOF - concept = self.sheerka.get_by_id(concept_id) - - if len(concept.metadata.props) == 0: - # only concepts that has parameter (refuse atoms) - # Note that this test is needed if the definition of the concept has changed - continue - - if concept.metadata.definition_type == DEFINITION_TYPE_BNF: - # bnf definitions are not supposed to be managed by this parser - continue - - sya_concept_def = SyaConceptDef(concept) - if concept.id in self.sya_definitions: - sya_def = self.sya_definitions[concept.id] - if sya_def[0] is not None: - sya_concept_def.precedence = sya_def[0] - if sya_def[1] is not None: - sya_concept_def.associativity = sya_def[1] - - result.append(sya_concept_def) - return result - - return None + # def initialize(self, context, concepts=None, sya_definitions=None): + # self.context = context + # self.sheerka = context.sheerka + # + # if sya_definitions: + # self.sya_definitions = sya_definitions + # + # if concepts: + # for concept in concepts: + # keywords = concept.key.split() + # for keyword in keywords: + # if keyword.startswith(VARIABLE_PREFIX): + # continue + # + # self.concepts_by_first_keyword.setdefault(keyword, []).append(concept.id) + # break + # + # return self.sheerka.ret(self.name, True, self.concepts_by_first_keyword) + # + # def get_concepts(self, token): + # """ + # Tries to find if there are concepts that match the value of the token + # :param token: + # :return: + # """ + # + # if token.type == TokenKind.STRING: + # name = token.value[1:-1] + # elif token.type == TokenKind.KEYWORD: + # name = token.value.value + # else: + # name = token.value + # + # result = [] + # if name in self.concepts_by_first_keyword: + # for concept_id in self.concepts_by_first_keyword[name]: + # + # concept = self.sheerka.get_by_id(concept_id) + # + # if len(concept.metadata.props) == 0: + # # only concepts that has parameter (refuse atoms) + # # Note that this test is needed if the definition of the concept has changed + # continue + # + # if concept.metadata.definition_type == DEFINITION_TYPE_BNF: + # # bnf definitions are not supposed to be managed by this parser + # continue + # + # sya_concept_def = SyaConceptDef(concept) + # if concept.id in self.sya_definitions: + # sya_def = self.sya_definitions[concept.id] + # if sya_def[0] is not None: + # sya_concept_def.precedence = sya_def[0] + # if sya_def[1] is not None: + # sya_concept_def.associativity = sya_def[1] + # + # result.append(sya_concept_def) + # return result + # + # return None def infix_to_postfix(self, context, text): """ @@ -943,7 +984,7 @@ class SyaNodeParser(BaseParser): if infix_to_postfix.eat_token(token, self.pos): infix_to_postfix.lock() - concepts = self.get_concepts(token) + concepts = self.get_concepts(token, self._is_eligible, to_map=self._get_sya_concept_def) if not concepts: for infix_to_postfix in res: infix_to_postfix.eat_unrecognized(token, self.pos) @@ -988,7 +1029,7 @@ class SyaNodeParser(BaseParser): else: items.append(res) item.has_unrecognized |= hasattr(res, "has_unrecognized") and res.has_unrecognized or \ - isinstance(res, UnrecognizedTokensNode) + isinstance(res, UnrecognizedTokensNode) item.nodes = items item.fix_all_pos() item.tokens = self.tokens[item.start:item.end + 1] @@ -1000,7 +1041,7 @@ class SyaNodeParser(BaseParser): end = item.end has_unrecognized = False concept = sheerka.new_from_template(item.concept, item.concept.id) - for param_index in reversed(range(len(concept.metadata.props))): + for param_index in reversed(range(len(concept.metadata.variables))): inner_item = self.postfix_to_item(sheerka, postfixed) if inner_item.start < start: start = inner_item.start @@ -1008,7 +1049,7 @@ class SyaNodeParser(BaseParser): end = inner_item.end has_unrecognized |= isinstance(inner_item, UnrecognizedTokensNode) - param_name = concept.metadata.props[param_index][0] + param_name = concept.metadata.variables[param_index][0] param_value = inner_item.concept if hasattr(inner_item, "concept") else \ [inner_item.return_value] if isinstance(inner_item, SourceCodeNode) else \ inner_item @@ -1115,3 +1156,10 @@ class SyaNodeParser(BaseParser): result.append(infix_to_postfix) return result + + # @staticmethod + # def init_sheerka(self, sheerka): + # if hasattr(BaseNodeParser, "init_sheerka"): + # BaseNodeParser.init_sheerka(sheerka) + # + # # init syadefinitins diff --git a/src/parsers/UnrecognizedNodeParser.py b/src/parsers/UnrecognizedNodeParser.py index 7e589ec..cb89979 100644 --- a/src/parsers/UnrecognizedNodeParser.py +++ b/src/parsers/UnrecognizedNodeParser.py @@ -52,11 +52,20 @@ class UnrecognizedNodeParser(BaseParser): res = only_successful(context, res) if res.status: lexer_nodes = get_lexer_nodes(res.body.body, node.start, node.tokens) - sequences_found = core.utils.product(sequences_found, lexer_nodes) + if lexer_nodes: + # make lexer_nodes is not empty (for example, some Python result are discarded) + sequences_found = core.utils.product(sequences_found, lexer_nodes) + else: + sequences_found = core.utils.product(sequences_found, [node]) + has_unrecognized = True else: sequences_found = core.utils.product(sequences_found, [node]) has_unrecognized = True + elif isinstance(node, SourceCodeNode): + sequences_found = core.utils.product(sequences_found, [node]) + has_unrecognized = True # never trust source code not. I may be an invalid source code + else: # cannot happen as of today :-) raise NotImplementedError() diff --git a/src/parsers/_BnfNodeParser_Old.py b/src/parsers/_BnfNodeParser_Old.py new file mode 100644 index 0000000..bd8b1e3 --- /dev/null +++ b/src/parsers/_BnfNodeParser_Old.py @@ -0,0 +1,912 @@ +# ##################################################################################################### +# # This implementation of the parser is highly inspired by the arpeggio project (https://github.com/textX/Arpeggio) +# # I don't directly use the project, but it helped me figure out +# # what to do. +# # Dejanović I., Milosavljević G., Vaderna R.: +# # Arpeggio: A flexible PEG parser for Python, +# # Knowledge-Based Systems, 2016, 95, 71 - 74, doi:10.1016/j.knosys.2015.12.004 +# ##################################################################################################### +# from collections import namedtuple +# from dataclasses import dataclass +# from collections import defaultdict +# from core.builtin_concepts import BuiltinConcepts, ParserResultConcept +# from core.concept import Concept, ConceptParts, DoNotResolve +# from core.tokenizer import TokenKind, Tokenizer, Token +# from parsers.BaseNodeParser import LexerNode, GrammarErrorNode, ConceptNode, UnrecognizedTokensNode +# from parsers.BaseParser import BaseParser, ErrorNode +# import core.utils +# +# +# class NonTerminalNode(LexerNode): +# """ +# Returned by the BnfNodeParser +# """ +# +# def __init__(self, parsing_expression, start, end, tokens, children=None): +# super().__init__(start, end, tokens) +# self.parsing_expression = parsing_expression +# self.children = children +# +# def __repr__(self): +# name = self.parsing_expression.rule_name or self.parsing_expression.__class__.__name__ +# if len(self.children) > 0: +# sub_names = "(" + ",".join([repr(child) for child in self.children]) + ")" +# else: +# sub_names = "" +# return name + sub_names +# +# def __eq__(self, other): +# if not isinstance(other, NonTerminalNode): +# return False +# +# return self.parsing_expression == other.parsing_expression and \ +# self.start == other.start and \ +# self.end == other.end and \ +# self.children == other.children +# +# def __hash__(self): +# return hash((self.parsing_expression, self.start, self.end, self.children)) +# +# +# class TerminalNode(LexerNode): +# """ +# Returned by the BnfNodeParser +# """ +# +# def __init__(self, parsing_expression, start, end, value): +# super().__init__(start, end, source=value) +# self.parsing_expression = parsing_expression +# self.value = value +# +# def __repr__(self): +# name = self.parsing_expression.rule_name or "" +# return name + f"'{self.value}'" +# +# def __eq__(self, other): +# if not isinstance(other, TerminalNode): +# return False +# +# return self.parsing_expression == other.parsing_expression and \ +# self.start == other.start and \ +# self.end == other.end and \ +# self.value == other.value +# +# def __hash__(self): +# return hash((self.parsing_expression, self.start, self.end, self.value)) +# +# +# @dataclass() +# class UnknownConceptNode(ErrorNode): +# concept_key: str +# +# +# @dataclass() +# class TooManyConceptNode(ErrorNode): +# concept_key: str +# +# +# class ParsingExpression: +# def __init__(self, *args, **kwargs): +# self.elements = args +# +# nodes = kwargs.get('nodes', []) +# if not hasattr(nodes, '__iter__'): +# nodes = [nodes] +# self.nodes = nodes +# +# self.rule_name = kwargs.get('rule_name', '') +# +# def __eq__(self, other): +# if not isinstance(other, ParsingExpression): +# return False +# +# return self.rule_name == other.rule_name and self.elements == other.elements +# +# def __hash__(self): +# return hash((self.rule_name, self.elements)) +# +# def parse(self, parser): +# return self._parse(parser) +# +# def add_rule_name_if_needed(self, text): +# return text + "=" + self.rule_name if self.rule_name else text +# +# +# class ConceptExpression(ParsingExpression): +# """ +# Will match a concept +# It used only for rule definition +# +# When the grammar is created, it is replaced by the actual concept +# """ +# +# def __init__(self, concept, rule_name=""): +# super().__init__(rule_name=rule_name) +# self.concept = concept +# +# def __repr__(self): +# return self.add_rule_name_if_needed(f"{self.concept}") +# +# def __eq__(self, other): +# if not super().__eq__(other): +# return False +# +# if not isinstance(other, ConceptExpression): +# return False +# +# if isinstance(self.concept, Concept): +# return self.concept.name == other.concept.name +# +# # when it's only the name of the concept +# return self.concept == other.concept +# +# def __hash__(self): +# return hash((self.concept, self.rule_name)) +# +# @staticmethod +# def get_parsing_expression_from_name(name): +# tokens = Tokenizer(name) +# nodes = [StrMatch(core.utils.strip_quotes(token.value)) for token in list(tokens)[:-1]] +# if len(nodes) == 1: +# return nodes[0] +# else: +# sequence = Sequence(nodes) +# sequence.nodes = nodes +# return sequence +# +# def _parse(self, parser): +# to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept +# if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT): +# return None +# +# self.concept = to_match # Memoize +# +# if to_match not in parser.concepts_grammars: +# # Try to match the concept using its name +# expr = self.get_parsing_expression_from_name(to_match.name) +# node = expr.parse(parser) +# else: +# node = parser.concepts_grammars[to_match].parse(parser) +# +# if node is None: +# return None +# +# return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node]) +# +# +# class ConceptGroupExpression(ConceptExpression): +# def _parse(self, parser): +# to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept +# if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT): +# return None +# +# self.concept = to_match # Memoize +# +# if to_match not in parser.concepts_grammars: +# concepts_in_group = parser.sheerka.get_set_elements(parser.context, self.concept) +# nodes = [ConceptExpression(c, rule_name=c.name) for c in concepts_in_group] +# expr = OrderedChoice(nodes) +# expr.nodes = nodes +# node = expr.parse(parser) +# else: +# node = parser.concepts_grammars[to_match].parse(parser) +# +# if node is None: +# return None +# +# return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node]) +# +# +# class Sequence(ParsingExpression): +# """ +# Will match sequence of parser expressions in exact order they are defined. +# """ +# +# def _parse(self, parser): +# init_pos = parser.pos +# end_pos = parser.pos +# +# children = [] +# for e in self.nodes: +# node = e.parse(parser) +# if node is None: +# return None +# else: +# if node.end != -1: # because returns -1 when no match +# children.append(node) +# end_pos = node.end +# +# return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children) +# +# def __repr__(self): +# to_str = ", ".join(repr(n) for n in self.elements) +# return self.add_rule_name_if_needed(f"({to_str})") +# +# +# class OrderedChoice(ParsingExpression): +# """ +# Will match one among multiple +# It will stop at the first match (so the order of definition is important) +# """ +# +# def _parse(self, parser): +# init_pos = parser.pos +# +# for e in self.nodes: +# node = e.parse(parser) +# if node: +# return NonTerminalNode(self, init_pos, node.end, parser.tokens[init_pos: node.end + 1], [node]) +# +# parser.seek(init_pos) # backtrack +# +# return None +# +# def __repr__(self): +# to_str = "| ".join(repr(n) for n in self.elements) +# return self.add_rule_name_if_needed(f"({to_str})") +# +# +# class Optional(ParsingExpression): +# """ +# Will match or not the elements +# if many matches, will choose longest one +# If you need order, use Optional(OrderedChoice) +# """ +# +# def _parse(self, parser): +# init_pos = parser.pos +# selected_node = NonTerminalNode(self, parser.pos, -1, [], []) # means that nothing is found +# +# for e in self.nodes: +# node = e.parse(parser) +# if node: +# if node.end > selected_node.end: +# selected_node = NonTerminalNode( +# self, +# node.start, +# node.end, +# parser.tokens[node.start: node.end + 1], +# [node]) +# +# parser.seek(init_pos) # backtrack +# +# if selected_node.end != -1: +# parser.seek(selected_node.end) +# parser.next_token() # eat the tokens found +# +# return selected_node +# +# def __repr__(self): +# if len(self.elements) == 1: +# return f"{self.elements[0]}?" +# else: +# to_str = ", ".join(repr(n) for n in self.elements) +# return self.add_rule_name_if_needed(f"({to_str})?") +# +# +# class Repetition(ParsingExpression): +# """ +# Base class for all repetition-like parser expressions (?,*,+) +# Args: +# eolterm(bool): Flag that indicates that end of line should +# terminate repetition match. +# """ +# +# def __init__(self, *elements, **kwargs): +# super(Repetition, self).__init__(*elements, **kwargs) +# self.sep = kwargs.get('sep', None) +# +# +# class ZeroOrMore(Repetition): +# """ +# ZeroOrMore will try to match parser expression specified zero or more +# times. It will never fail. +# """ +# +# def _parse(self, parser): +# init_pos = parser.pos +# end_pos = -1 +# children = [] +# +# while True: +# current_pos = parser.pos +# +# # maybe eat the separator if needed +# if self.sep and children: +# sep_result = self.sep.parse(parser) +# if sep_result is None: +# parser.seek(current_pos) +# break +# +# # eat the ZeroOrMore +# node = self.nodes[0].parse(parser) +# if node is None: +# parser.seek(current_pos) +# break +# else: +# if node.end != -1: # because returns -1 when no match +# children.append(node) +# end_pos = node.end +# +# if len(children) == 0: +# return NonTerminalNode(self, init_pos, -1, [], []) +# +# return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children) +# +# def __repr__(self): +# to_str = ", ".join(repr(n) for n in self.elements) +# return self.add_rule_name_if_needed(f"({to_str})*") +# +# +# class OneOrMore(Repetition): +# """ +# OneOrMore will try to match parser expression specified one or more times. +# """ +# +# def _parse(self, parser): +# init_pos = parser.pos +# end_pos = -1 +# children = [] +# +# while True: +# current_pos = parser.pos +# +# # maybe eat the separator if needed +# if self.sep and children: +# sep_result = self.sep.parse(parser) +# if sep_result is None: +# parser.seek(current_pos) +# break +# +# # eat the ZeroOrMore +# node = self.nodes[0].parse(parser) +# if node is None: +# parser.seek(current_pos) +# break +# else: +# if node.end != -1: # because returns -1 when no match +# children.append(node) +# end_pos = node.end +# +# if len(children) == 0: # if nothing is found, it's an error +# return None +# +# return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children) +# +# def __repr__(self): +# to_str = ", ".join(repr(n) for n in self.elements) +# return self.add_rule_name_if_needed(f"({to_str})+") +# +# +# class UnorderedGroup(Repetition): +# """ +# Will try to match all of the parsing expression in any order. +# """ +# +# def _parse(self, parser): +# raise NotImplementedError() +# +# # def __repr__(self): +# # to_str = ", ".join(repr(n) for n in self.elements) +# # return f"({to_str})#" +# +# +# class Match(ParsingExpression): +# """ +# Base class for all classes that will try to match something from the input. +# """ +# +# def __init__(self, rule_name, root=False): +# super(Match, self).__init__(rule_name=rule_name, root=root) +# +# def parse(self, parser): +# result = self._parse(parser) +# return result +# +# +# class StrMatch(Match): +# """ +# Matches a literal +# """ +# +# def __init__(self, to_match, rule_name="", ignore_case=True): +# super(Match, self).__init__(rule_name=rule_name) +# self.to_match = to_match +# self.ignore_case = ignore_case +# +# def __repr__(self): +# return self.add_rule_name_if_needed(f"'{self.to_match}'") +# +# def __eq__(self, other): +# if not super().__eq__(other): +# return False +# +# if not isinstance(other, StrMatch): +# return False +# +# return self.to_match == other.to_match and self.ignore_case == other.ignore_case +# +# def _parse(self, parser): +# token = parser.get_token() +# m = str(token.value).lower() == self.to_match.lower() if self.ignore_case \ +# else token.value == self.to_match +# +# if m: +# node = TerminalNode(self, parser.pos, parser.pos, token.value) +# parser.next_token() +# return node +# +# return None +# +# +# class BnfNodeParser(BaseParser): +# def __init__(self, **kwargs): +# super().__init__("BnfNode_old", 50) +# self.enabled = False +# if 'grammars' in kwargs: +# self.concepts_grammars = kwargs.get("grammars") +# elif 'sheerka' in kwargs: +# self.concepts_grammars = kwargs.get("sheerka").concepts_grammars +# else: +# self.concepts_grammars = {} +# +# self.ignore_case = True +# +# self.token = None +# self.pos = -1 +# self.tokens = None +# +# self.context = None +# self.text = None +# self.sheerka = None +# +# def add_error(self, error, next_token=True): +# self.error_sink.append(error) +# if next_token: +# self.next_token() +# return error +# +# def reset_parser(self, context, text): +# self.context = context +# self.sheerka = context.sheerka +# self.text = text +# +# try: +# self.tokens = list(self.get_input_as_tokens(text)) +# except core.tokenizer.LexerError as e: +# self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False) +# return False +# +# self.token = None +# self.pos = -1 +# self.next_token(False) +# return True +# +# def get_token(self) -> Token: +# return self.token +# +# def next_token(self, skip_whitespace=True): +# if self.token and self.token.type == TokenKind.EOF: +# return False +# +# self.pos += 1 +# self.token = self.tokens[self.pos] +# +# if skip_whitespace: +# while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE: +# self.pos += 1 +# self.token = self.tokens[self.pos] +# +# return self.token.type != TokenKind.EOF +# +# def seek(self, pos): +# self.pos = pos +# self.token = self.tokens[self.pos] +# return True +# +# def rewind(self, offset, skip_whitespace=True): +# self.pos += offset +# self.token = self.tokens[self.pos] +# +# if skip_whitespace: +# while self.pos > 0 and (self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE): +# self.pos -= 1 +# self.token = self.tokens[self.pos] +# +# def initialize(self, context, concepts_definitions): +# """ +# Adds a bunch of concepts, and how they can be recognized +# :param context: execution context +# :param concepts_definitions: dictionary of concept, concept_definition +# :return: +# """ +# +# self.context = context +# self.sheerka = context.sheerka +# concepts_to_resolve = set() +# +# for concept, concept_def in concepts_definitions.items(): +# # ## Gets the grammars +# context.log(f"Resolving grammar for '{concept}'", context.who) +# concept.init_key() # make sure that the key is initialized +# grammar = self.get_model(concept_def, concepts_to_resolve) +# self.concepts_grammars[concept] = grammar +# +# if self.has_error: +# return self.sheerka.ret(self.name, False, self.error_sink) +# +# # ## Removes concepts with infinite recursions +# concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve) +# for concept in concepts_to_remove: +# concepts_to_resolve.remove(concept) +# del self.concepts_grammars[concept] +# +# if self.has_error: +# return self.sheerka.ret(self.name, False, self.error_sink) +# else: +# return self.sheerka.ret(self.name, True, self.concepts_grammars) +# +# def get_concept(self, concept_name): +# if concept_name in self.context.concepts: +# return self.context.concepts[concept_name] +# return self.sheerka.get_by_key(concept_name) +# +# def get_model(self, concept_def, concepts_to_resolve): +# +# # TODO +# # inner_get_model must not modify the initial ParsingExpression +# # A copy must be created +# def inner_get_model(expression): +# if isinstance(expression, Concept): +# if self.sheerka.isaset(self.context, expression): +# ret = ConceptGroupExpression(expression, rule_name=expression.name) +# else: +# ret = ConceptExpression(expression, rule_name=expression.name) +# concepts_to_resolve.add(expression) +# elif isinstance(expression, ConceptExpression): # it includes ConceptGroupExpression +# if expression.rule_name is None or expression.rule_name == "": +# expression.rule_name = expression.concept.name if isinstance(expression.concept, Concept) \ +# else expression.concept +# if isinstance(expression.concept, str): +# concept = self.get_concept(expression.concept) +# if self.sheerka.is_known(concept): +# expression.concept = concept +# concepts_to_resolve.add(expression.concept) +# ret = expression +# elif isinstance(expression, str): +# ret = StrMatch(expression, ignore_case=self.ignore_case) +# elif isinstance(expression, StrMatch): +# ret = expression +# if ret.ignore_case is None: +# ret.ignore_case = self.ignore_case +# elif isinstance(expression, Sequence) or \ +# isinstance(expression, OrderedChoice) or \ +# isinstance(expression, ZeroOrMore) or \ +# isinstance(expression, OneOrMore) or \ +# isinstance(expression, Optional): +# ret = expression +# ret.nodes = [inner_get_model(e) for e in ret.elements] +# else: +# ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False) +# +# # Translate separator expression. +# if isinstance(expression, Repetition) and expression.sep: +# expression.sep = inner_get_model(expression.sep) +# +# return ret +# +# model = inner_get_model(concept_def) +# +# return model +# +# def detect_infinite_recursion(self, concepts_to_resolve): +# +# # infinite recursion matcher +# def _is_infinite_recursion(ref_concept, node): +# if isinstance(node, ConceptExpression): +# if node.concept == ref_concept: +# return True +# +# if isinstance(node.concept, str): +# to_match = self.get_concept(node.concept) +# if self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT): +# return False +# else: +# to_match = node.concept +# +# if to_match not in self.concepts_grammars: +# return False +# +# return _is_infinite_recursion(ref_concept, self.concepts_grammars[to_match]) +# +# if isinstance(node, OrderedChoice): +# return _is_infinite_recursion(ref_concept, node.nodes[0]) +# +# if isinstance(node, Sequence): +# for node in node.nodes: +# if _is_infinite_recursion(ref_concept, node): +# return True +# return False +# +# return False +# +# removed_concepts = [] +# for e in concepts_to_resolve: +# if isinstance(e, str): +# e = self.get_concept(e) +# if self.sheerka.isinstance(e, BuiltinConcepts.UNKNOWN_CONCEPT): +# continue +# +# if e not in self.concepts_grammars: +# continue +# +# to_resolve = self.concepts_grammars[e] +# if _is_infinite_recursion(e, to_resolve): +# removed_concepts.append(e) +# return removed_concepts +# +# def parse(self, context, parser_input): +# if parser_input == "": +# return context.sheerka.ret( +# self.name, +# False, +# context.sheerka.new(BuiltinConcepts.IS_EMPTY) +# ) +# +# if not self.reset_parser(context, parser_input): +# return self.sheerka.ret( +# self.name, +# False, +# context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) +# +# concepts_found = [[]] +# unrecognized_tokens = None +# has_unrecognized = False +# +# # actually list of list +# # The first dimension is the number of possibilities found +# # The second dimension is the number of concepts found, under one possibility +# # +# # Example 1 +# # concept foo : 'one' 'two' +# # concept bar : 'one' 'two' +# # input 'one two' -> will produce two possibilities (foo and bar). +# # +# # Example 2 +# # concept foo : 'one' +# # concept bar : 'two' +# # input 'one two' -> will produce one possibility which is (foo, bar) (foo then bar) +# +# while True: +# init_pos = self.pos +# res = [] +# +# for concept, grammar in self.concepts_grammars.items(): +# self.seek(init_pos) +# node = grammar.parse(self) # a node is TerminalNode or NonTerminalNode +# if node is not None and node.end != -1: +# updated_concept = self.finalize_concept(context.sheerka, concept, node) +# concept_node = ConceptNode( +# updated_concept, +# node.start, +# node.end, +# self.tokens[node.start: node.end + 1], +# None, +# node) +# res.append(concept_node) +# +# if len(res) == 0: # not recognized +# self.seek(init_pos) +# if unrecognized_tokens: +# unrecognized_tokens.add_token(self.get_token(), init_pos) +# else: +# unrecognized_tokens = UnrecognizedTokensNode(init_pos, init_pos, [self.get_token()]) +# +# if not self.next_token(False): +# break +# +# else: # some concepts are recognized +# if unrecognized_tokens and unrecognized_tokens.not_whitespace(): +# unrecognized_tokens.fix_source() +# concepts_found = core.utils.product(concepts_found, [unrecognized_tokens]) +# has_unrecognized = True +# unrecognized_tokens = None +# +# res = self.get_bests(res) # only keep the concepts that eat the more tokens +# concepts_found = core.utils.product(concepts_found, res) +# +# # loop +# self.seek(res[0].end) +# if not self.next_token(False): +# break +# +# # Fix the source for unrecognized tokens +# if unrecognized_tokens and unrecognized_tokens.not_whitespace(): +# unrecognized_tokens.fix_source() +# concepts_found = core.utils.product(concepts_found, [unrecognized_tokens]) +# has_unrecognized = True +# +# # else +# # returns as many ReturnValue than choices found +# ret = [] +# for choice in concepts_found: +# ret.append( +# self.sheerka.ret( +# self.name, +# not has_unrecognized, +# self.sheerka.new( +# BuiltinConcepts.PARSER_RESULT, +# parser=self, +# source=parser_input, +# body=choice, +# try_parsed=choice))) +# +# if len(ret) == 1: +# self.log_result(context, parser_input, ret[0]) +# return ret[0] +# else: +# self.log_multiple_results(context, parser_input, ret) +# return ret +# +# def finalize_concept(self, sheerka, template, underlying, init_empty_body=True): +# """ +# Updates the properties of the concept +# Goes in recursion if the property is a concept +# """ +# +# # this cache is to make sure that we return the same concept for the same ConceptExpression +# _underlying_value_cache = {} +# +# def _add_prop(_concept, prop_name, value): +# """ +# Adds a new entry, +# makes a list if the property already exists +# """ +# if prop_name not in _concept.compiled or _concept.compiled[prop_name] is None: +# # new entry +# _concept.compiled[prop_name] = value +# else: +# # make a list if there was a value +# previous_value = _concept.compiled[prop_name] +# if isinstance(previous_value, list): +# previous_value.append(value) +# else: +# new_value = [previous_value, value] +# _concept.compiled[prop_name] = new_value +# +# def _look_for_concept_match(_underlying): +# """ +# At some point, there is either an StrMatch or a ConceptMatch, +# that allowed the recognition. +# Look for the ConceptMatch, with recursion if needed +# """ +# if isinstance(_underlying.parsing_expression, ConceptExpression): +# return _underlying +# +# if not isinstance(_underlying, NonTerminalNode): +# return None +# +# if len(_underlying.children) != 1: +# return None +# +# return _look_for_concept_match(_underlying.children[0]) +# +# def _get_underlying_value(_underlying): +# concept_match_node = _look_for_concept_match(_underlying) +# if concept_match_node: +# # the value is a concept +# if id(concept_match_node) in _underlying_value_cache: +# result = _underlying_value_cache[id(concept_match_node)] +# else: +# ref_tpl = concept_match_node.parsing_expression.concept +# result = self.finalize_concept(sheerka, ref_tpl, concept_match_node.children[0], init_empty_body) +# _underlying_value_cache[id(concept_match_node)] = result +# else: +# # the value is a string +# result = DoNotResolve(_underlying.source) +# +# return result +# +# def _process_rule_name(_concept, _underlying): +# if _underlying.parsing_expression.rule_name: +# value = _get_underlying_value(_underlying) +# _add_prop(_concept, _underlying.parsing_expression.rule_name, value) +# _concept.metadata.need_validation = True +# +# if isinstance(_underlying, NonTerminalNode): +# for child in _underlying.children: +# _process_rule_name(_concept, child) +# +# key = (template.key, template.id) if template.id else template.key +# concept = sheerka.new(key) +# if init_empty_body and concept.metadata.body is None: +# value = _get_underlying_value(underlying) +# concept.compiled[ConceptParts.BODY] = value +# if underlying.parsing_expression.rule_name: +# _add_prop(concept, underlying.parsing_expression.rule_name, value) +# # KSI : Why don't we set concept.metadata.need_validation to True ? +# +# if isinstance(underlying, NonTerminalNode): +# for node in underlying.children: +# _process_rule_name(concept, node) +# +# return concept +# +# def encode_grammar(self, grammar): +# """ +# Transform the grammar into something that can easily can be serialized +# :param grammar: +# :return: +# """ +# +# def _encode(expression): +# if isinstance(expression, StrMatch): +# res = f"'{expression.to_match}'" +# +# elif isinstance(expression, ConceptExpression): +# res = core.utils.str_concept(expression.concept) +# +# elif isinstance(expression, Sequence): +# res = "(" + " ".join(_encode(c) for c in expression.nodes) + ")" +# +# elif isinstance(expression, OrderedChoice): +# res = "(" + "|".join(_encode(c) for c in expression.nodes) + ")" +# +# elif isinstance(expression, Optional): +# res = _encode(expression.nodes[0]) + "?" +# +# elif isinstance(expression, ZeroOrMore): +# res = _encode(expression.nodes[0]) + "*" +# +# elif isinstance(expression, OneOrMore): +# res = _encode(expression.nodes[0]) + "+" +# +# if expression.rule_name: +# res += "=" + expression.rule_name +# +# return res +# +# result = {} +# for k, v in grammar.items(): +# key = core.utils.str_concept(k) +# value = _encode(v) +# result[key] = value +# return result +# +# @staticmethod +# def get_bests(results): +# """ +# Returns the result that is the longest +# :param results: +# :return: +# """ +# by_end_pos = defaultdict(list) +# for result in results: +# by_end_pos[result.end].append(result) +# +# return by_end_pos[max(by_end_pos)] +# +# +# class ParsingExpressionVisitor: +# """ +# visit ParsingExpression +# """ +# +# def visit(self, parsing_expression): +# name = parsing_expression.__class__.__name__ +# +# method = 'visit_' + name +# visitor = getattr(self, method, self.generic_visit) +# return visitor(parsing_expression) +# +# def generic_visit(self, parsing_expression): +# if hasattr(self, "visit_all"): +# self.visit_all(parsing_expression) +# +# for node in parsing_expression.elements: +# if isinstance(node, Concept): +# self.visit(ConceptExpression(node.key or node.name)) +# elif isinstance(node, str): +# self.visit(StrMatch(node)) +# else: +# self.visit(node) diff --git a/src/parsers/_ConceptsWithConceptsParser.py b/src/parsers/_ConceptsWithConceptsParser.py new file mode 100644 index 0000000..d0fe43e --- /dev/null +++ b/src/parsers/_ConceptsWithConceptsParser.py @@ -0,0 +1,108 @@ +# # try to match something like +# # ConceptNode 'plus' ConceptNode +# # +# # Replaced by SyaNodeParser +# from core.builtin_concepts import BuiltinConcepts +# from core.tokenizer import TokenKind, Token +# from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode +# from parsers.BaseParser import BaseParser +# from parsers.MultipleConceptsParser import MultipleConceptsParser +# from core.concept import VARIABLE_PREFIX +# +# multiple_concepts_parser = MultipleConceptsParser() +# +# +# class ConceptsWithConceptsParser(BaseParser): +# def __init__(self, **kwargs): +# super().__init__("ConceptsWithConcepts", 25) +# self.enabled = False +# +# @staticmethod +# def get_tokens(nodes): +# tokens = [] +# +# for node in nodes: +# if isinstance(node, ConceptNode): +# index, line, column = node.tokens[0].index, node.tokens[0].line, node.tokens[0].column +# tokens.append(Token(TokenKind.CONCEPT, node.concept, index, line, column)) +# else: +# for token in node.tokens: +# if token.type == TokenKind.EOF: +# break +# elif token.type in (TokenKind.NEWLINE, TokenKind.WHITESPACE): +# continue +# else: +# tokens.append(token) +# +# return tokens +# +# @staticmethod +# def get_key(nodes): +# key = "" +# index = 0 +# for node in nodes: +# if key: +# key += " " +# +# if isinstance(node, UnrecognizedTokensNode): +# key += node.source.strip() +# else: +# key += f"{VARIABLE_PREFIX}{index}" +# index += 1 +# +# return key +# +# def finalize_concept(self, context, concept, nodes): +# index = 0 +# for node in nodes: +# +# if isinstance(node, ConceptNode): +# prop_name = list(concept.props.keys())[index] +# concept.compiled[prop_name] = node.concept +# context.log( +# f"Setting property '{prop_name}='{node.concept}'.", +# self.name) +# index += 1 +# elif isinstance(node, SourceCodeNode): +# prop_name = list(concept.props.keys())[index] +# sheerka = context.sheerka +# value = sheerka.new(BuiltinConcepts.PARSER_RESULT, parser=self, source=node.source, body=node.node) +# concept.compiled[prop_name] = [context.sheerka.ret(self.name, True, value)] +# context.log( +# f"Setting property '{prop_name}'='Python({node.source})'.", +# self.name) +# index += 1 +# +# return concept +# +# def parse(self, context, parser_input): +# sheerka = context.sheerka +# nodes = self.get_input_as_lexer_nodes(parser_input, multiple_concepts_parser) +# if not nodes: +# return None +# +# concept_key = self.get_key(nodes) +# concept = sheerka.new(concept_key) +# if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT): +# return sheerka.ret( +# self.name, +# False, +# sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.body)) +# +# concepts = concept if hasattr(concept, "__iter__") else [concept] +# for concept in concepts: +# self.finalize_concept(context, concept, nodes) +# +# res = [] +# for concept in concepts: +# res.append(sheerka.ret( +# self.name, +# True, +# sheerka.new( +# BuiltinConcepts.PARSER_RESULT, +# parser=self, +# source=parser_input.source, +# body=concept, +# try_parsed=None))) +# +# return res[0] if len(res) == 1 else res diff --git a/src/parsers/_MultipleConceptsParser.py b/src/parsers/_MultipleConceptsParser.py new file mode 100644 index 0000000..7283d41 --- /dev/null +++ b/src/parsers/_MultipleConceptsParser.py @@ -0,0 +1,163 @@ +# # to be replaced by SyaNodeParser +# import ast +# +# from core.builtin_concepts import BuiltinConcepts +# from core.tokenizer import TokenKind +# from parsers.BaseNodeParser import SourceCodeNode +# from parsers.BaseParser import BaseParser +# from parsers.BnfNodeParser import BnfNodeParser, UnrecognizedTokensNode, ConceptNode +# import core.utils +# from parsers.PythonParser import PythonParser +# +# concept_lexer_parser = BnfNodeParser() +# +# +# class MultipleConceptsParser(BaseParser): +# """ +# Parser that will take the result of BnfNodeParser and +# try to resolve the unrecognized tokens token by token +# +# It is a success when it returns a list ConceptNode exclusively +# """ +# +# def __init__(self, **kwargs): +# BaseParser.__init__(self, "MultipleConcepts", 45) +# self.enabled = False +# +# @staticmethod +# def finalize(nodes_found, unrecognized_tokens): +# if not unrecognized_tokens: +# return nodes_found, unrecognized_tokens +# +# unrecognized_tokens.fix_source() +# if unrecognized_tokens.not_whitespace(): +# nodes_found = core.utils.product(nodes_found, [unrecognized_tokens]) +# +# return nodes_found, None +# +# @staticmethod +# def create_or_add(unrecognized_tokens, token, index): +# if unrecognized_tokens: +# unrecognized_tokens.add_token(token, index) +# else: +# unrecognized_tokens = UnrecognizedTokensNode(index, index, [token]) +# return unrecognized_tokens +# +# def parse(self, context, parser_input): +# sheerka = context.sheerka +# nodes = self.get_input_as_lexer_nodes(parser_input, concept_lexer_parser) +# if not nodes: +# return None +# +# nodes_found = [[]] +# concepts_only = True +# +# for node in nodes: +# if isinstance(node, UnrecognizedTokensNode): +# unrecognized_tokens = None +# i = 0 +# +# while i < len(node.tokens): +# +# token_index = node.start + i +# token = node.tokens[i] +# +# concepts_nodes = self.get_concepts_nodes(context, token_index, token) +# if concepts_nodes is not None: +# nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens) +# nodes_found = core.utils.product(nodes_found, concepts_nodes) +# i += 1 +# continue +# +# source_code_node = self.get_source_code_node(context, token_index, node.tokens[i:]) +# if source_code_node: +# nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens) +# nodes_found = core.utils.product(nodes_found, [source_code_node]) +# i += len(source_code_node.tokens) +# continue +# +# # not a concept nor some source code +# unrecognized_tokens = self.create_or_add(unrecognized_tokens, token, token_index) +# concepts_only &= token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE) +# i += 1 +# +# # finish processing if needed +# nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens) +# +# else: +# nodes_found = core.utils.product(nodes_found, [node]) +# +# ret = [] +# for choice in nodes_found: +# ret.append( +# sheerka.ret( +# self.name, +# concepts_only, +# sheerka.new( +# BuiltinConcepts.PARSER_RESULT, +# parser=self, +# source=parser_input.source, +# body=choice, +# try_parsed=None)) +# ) +# +# if len(ret) == 1: +# self.log_result(context, parser_input.source, ret[0]) +# return ret[0] +# else: +# self.log_multiple_results(context, parser_input.source, ret) +# return ret +# +# @staticmethod +# def get_concepts_nodes(context, index, token): +# """ +# Tries to recognize a concept +# from the univers of all known concepts +# """ +# +# if token.type != TokenKind.IDENTIFIER: +# return None +# +# concept = context.new_concept(token.value) +# if hasattr(concept, "__iter__") or context.sheerka.is_known(concept): +# concepts = concept if hasattr(concept, "__iter__") else [concept] +# concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts] +# return concepts_nodes +# +# return None +# +# @staticmethod +# def get_source_code_node(context, index, tokens): +# """ +# Tries to recognize source code. +# For the time being, only Python is supported +# :param context: +# :param tokens: +# :param index: +# :return: +# """ +# +# if len(tokens) == 0 or (len(tokens) == 1 and tokens[0].type == TokenKind.EOF): +# return None +# +# end_index = len(tokens) +# while end_index > 0: +# parser = PythonParser() +# tokens_to_parse = tokens[:end_index] +# res = parser.parse(context, tokens_to_parse) +# if res.status: +# # only expression are accepted +# ast_ = res.value.value.ast_ +# if not isinstance(ast_, ast.Expression): +# return None +# try: +# compiled = compile(ast_, "", "eval") +# eval(compiled, {}, {}) +# except Exception: +# return None +# +# source = BaseParser.get_text_from_tokens(tokens_to_parse) +# return SourceCodeNode(res.value.value, index, index + end_index - 1, tokens_to_parse, source) +# end_index -= 1 +# +# return None diff --git a/src/printer/Formatter.py b/src/printer/Formatter.py index 769de86..25404fb 100644 --- a/src/printer/Formatter.py +++ b/src/printer/Formatter.py @@ -4,6 +4,11 @@ from printer.FormatInstructions import FormatDetailDesc, FormatDetailType, Forma class Formatter: def __init__(self): + self.custom_l_formats = None + self.custom_d_formats = None + self.reset_formats() + + def reset_formats(self): self.custom_l_formats = {} self.custom_d_formats = [] diff --git a/src/printer/SheerkaPrinter.py b/src/printer/SheerkaPrinter.py index 8959033..952f5dc 100644 --- a/src/printer/SheerkaPrinter.py +++ b/src/printer/SheerkaPrinter.py @@ -29,10 +29,15 @@ class SheerkaPrinter: self.sheerka = sheerka self.formatter = Formatter() self.formatter.register_format_l(EXECUTION_CONTEXT_CLASS, "[{id:3}] %tab%{desc} ({status})") + self.custom_concepts_printers = None + self.reset() + + def reset(self): self.custom_concepts_printers = { str(BuiltinConcepts.EXPLANATION): self.print_explanation, str(BuiltinConcepts.RETURN_VALUE): self.print_return_value, } + self.formatter.reset_formats() def register_custom_printer(self, concept, custom_format): key = concept.key if isinstance(concept, Concept) else concept diff --git a/src/sdp/sheerkaDataProvider.py b/src/sdp/sheerkaDataProvider.py index 01503f2..9ba36f8 100644 --- a/src/sdp/sheerkaDataProvider.py +++ b/src/sdp/sheerkaDataProvider.py @@ -1,8 +1,9 @@ import hashlib import json -import zlib +import time from dataclasses import dataclass from datetime import datetime, date +from threading import RLock from core.sheerka_logger import get_logger from sdp.sheerkaDataProviderIO import SheerkaDataProviderIO @@ -20,22 +21,19 @@ def json_default_converter(o): if isinstance(o, (date, datetime)): return o.isoformat() - if isinstance(o, SheerkaDataProviderRef): - return f"##XREF##:{o.target}" - class Event(object): """ Class that represents something that modifies the state of the system """ - def __init__(self, message="", user="", date=datetime.now(), parents=None): - self.version = 1 - self.user = user - self.date = date - self.message = message - self.parents = parents - self._digest = None + def __init__(self, message="", user_id="", date=datetime.now(), parents=None): + self.version = 1 # if the class Event ever changes, to keep track of the version + self.user_id = user_id # id of the user that triggers the modification + self.date = date # when + self.message = message # user input or whatever that modifies the system + self.parents = parents # digest(s) of the parent(s) of this event + self._digest = None # digest of the event def __str__(self): return f"{self.date.strftime('%d/%m/%Y %H:%M:%S')} {self.message}" @@ -52,14 +50,14 @@ class Event(object): if self._digest: return self._digest - if self.message == "" and self.user == "": + if self.message == "" and self.user_id == "": self._digest = "xxx" # to speed unit tests return self._digest if not isinstance(self.message, str): raise NotImplementedError - to_hash = f"Event:{self.user}{self.date}{self.message}{self.parents}".encode("utf-8") + to_hash = f"Event:{self.user_id}{self.date}{self.message}{self.parents}".encode("utf-8") self._digest = hashlib.sha256(to_hash).hexdigest() return self._digest @@ -67,71 +65,13 @@ class Event(object): return self.__dict__ def from_dict(self, as_dict): - self.user = as_dict["user"] + self.user_id = as_dict["user_id"] self.date = datetime.fromisoformat(as_dict["date"]) self.message = as_dict["message"] self.parents = as_dict["parents"] self._digest = as_dict["_digest"] # freeze the digest -class ObjToUpdate: - """ - Internal key value class; - You give it an obj, and it tries to figure out what is the key of the obj - Note that you can force the key if you want - It was first create to make the difference between an object that has a key and {key, value} - """ - - def __init__(self, obj, key=None, digest=None): - self.obj = obj - self.has_key = None - self.has_digest = None - self._key = None - self._digest = None - if key is not None: - self.set_key(key) - if digest is not None: - self.set_digest(digest) - - def get_key(self): - if self.has_key is None: - key = SheerkaDataProvider.get_obj_key(self.obj) - if key is None: - self.has_key = False - return None - else: - self.has_key = True - self._key = key - return key - elif not self.has_key: - return None - else: - return self._key - - def get_digest(self): - if self.has_digest is None: - digest = SheerkaDataProvider.get_obj_digest(self.obj) - if digest is None: - self.has_digest = False - return None - else: - self.has_digest = True - self._digest = digest - return digest - elif not self.has_digest: - return None - else: - return self._digest - - def set_digest(self, digest): - self.has_digest = True - self._digest = digest - - def set_key(self, key): - self.has_key = True - self._key = key - - class State: """ Class that represents the state of the system (dictionary of all known entries) @@ -144,201 +84,86 @@ class State: self.events = [] self.data = {} - @staticmethod - def check_duplicate(items, obj: ObjToUpdate, key): - digest = obj.get_digest() - if digest is None: - return - - if not hasattr(items, "__iter__"): - items = [items] - - for item in items: - item_digest = SheerkaDataProvider.get_obj_digest(item) - if item_digest == digest: - raise SheerkaDataProviderDuplicateKeyError(key, obj.obj) - - def update(self, entry, obj: ObjToUpdate, append=True): - """ - adds obj to entry - :param entry: - :param obj: - :param append: if True, duplicate keys will create lists - :return: - """ - obj_to_use = {obj.get_key(): obj.obj} if obj.has_key else obj.obj - - if entry not in self.data: - self.data[entry] = obj_to_use - - elif not append: - if isinstance(obj_to_use, dict): - self.data[entry].update(obj_to_use) - else: - self.data[entry] = obj_to_use - - elif isinstance(self.data[entry], list): - self.check_duplicate(self.data[entry], obj, entry) - self.data[entry].append(obj.obj) - - elif isinstance(obj_to_use, dict): - for k in obj_to_use: - if k not in self.data[entry]: - self.data[entry][k] = obj_to_use[k] - elif isinstance(self.data[entry][k], list): - self.check_duplicate(self.data[entry][k], obj, entry + "." + k) - self.data[entry][k].append(obj_to_use[k]) - else: - self.check_duplicate(self.data[entry][k], obj, entry + "." + k) - self.data[entry][k] = [self.data[entry][k], obj_to_use[k]] - - elif isinstance(self.data[entry], dict): - raise SheerkaDataProviderError(f"Cannot found key on '{obj.obj}' while all other elements have.", obj.obj) - - else: - self.check_duplicate(self.data[entry], obj, entry) - self.data[entry] = [self.data[entry], obj_to_use] - - def modify(self, entry, key, obj, obj_key): - # if the key changes, make sure to remove the previous entry - append = False - if obj_key != key: - self.remove(entry, lambda k, o: k == key) # modify from on object to another - append = True - - self.update(entry, ObjToUpdate(obj, obj_key), append=append) - - def modify_in_list(self, entry, key, obj, obj_key, obj_origin, load_ref_if_needed, save_ref_if_needed): - found = False - to_remove = None - new_digest = None - - def _get_item_origin(o): - if hasattr(o, Serializer.ORIGIN): - return getattr(o, Serializer.ORIGIN) - - if isinstance(o, dict) and Serializer.ORIGIN in o: - return o[Serializer.ORIGIN] - - if hasattr(o, "get_digest"): - return o.get_digest() - - if isinstance(o, str): - return o - - return None - - for i in range(len(self.data[entry][key])): - item, is_ref = load_ref_if_needed(self.data[entry][key][i]) - item_origin = _get_item_origin(item) - if item_origin is None: - continue - if item_origin == obj_origin: - obj = save_ref_if_needed(is_ref, obj) - if is_ref: - new_digest = obj[len(SheerkaDataProvider.REF_PREFIX):] - if obj_key == key: - self.data[entry][key][i] = obj - else: - to_remove = i - self.update(entry, ObjToUpdate(obj, obj_key), append=True) - found = True - break - - if not found: - raise (SheerkaDataProviderError(f"Cannot modify '{entry}.{key}'. Item '{obj_origin}' not found.", obj)) - - if to_remove is not None: - del self.data[entry][key][to_remove] - - return new_digest - - def remove(self, entry, filter): - if filter is None: - del (self.data[entry]) - - elif isinstance(self.data[entry], dict): - keys_to_remove = [] - for key, element in self.data[entry].items(): - if filter(key, element): - keys_to_remove.append(key) - for key in keys_to_remove: - del (self.data[entry][key]) - - elif not isinstance(self.data[entry], list): - if filter(self.data[entry]): - del (self.data[entry]) - - else: - for element in self.data[entry]: - if filter(element): - self.data[entry].remove(element) - def get_digest(self): as_json = json.dumps(self.__dict__, default=json_default_converter) return hashlib.sha256(as_json.encode("utf-8")).hexdigest() - def contains(self, entry, key): - """ - if key is None, returns True if entry exists - if key has a value - returns True if entry is an dict and contains key - :param entry: - :param key: - :return: - """ - if entry not in self.data: - return False - if key is None: - return entry in self.data - if not isinstance(self.data[entry], dict): - return False - return key in self.data[entry] - - -class SheerkaDataProviderError(Exception): - def __init__(self, message, obj): - Exception.__init__(self, message) - self.obj = obj - - -class SheerkaDataProviderDuplicateKeyError(Exception): - def __init__(self, key, obj): - Exception.__init__(self, "Duplicate object.") - self.key = key - self.obj = obj - @dataclass class SheerkaDataProviderResult: """ Object that is returned after adding, setting or modifying an entry """ - obj: object # obj that was given to store/modify entry: str # entry where the object is put key: str # key to use to retrieve the object digest: str # digest used to store the reference - already_exists: bool = False # the same object was already persisted -@dataclass -class SheerkaDataProviderRef: - """ - Object that tells where an object is store (target is the digest of the reference) - """ - key: str # key of the object - target: str # digest of the reference - original_target: str = None # when the object is modified, previous digest +class SheerkaDataProviderTransaction: - def get_digest(self): - return self.original_target + def __init__(self, sdp, event): + self.sdp = sdp + self.event = event + self.state = None + self.snapshot = None + self.event_digest = None - def get_key(self): - return self.key + def __enter__(self): + self.sdp.lock.acquire() + + # save the event if needed + self.event_digest = self.sdp.save_event(self.event) if isinstance(self.event, Event) else self.event + + # load state + self.snapshot = self.sdp.get_snapshot(SheerkaDataProvider.HeadFile) + self.state = self.sdp.load_state(self.snapshot) + return self + + def add(self, entry, key, items, use_ref=False): + """Adds items to the state""" + with self.sdp.lock: + if entry not in self.state.data: + self.state.data[entry] = {} + + if use_ref: + if isinstance(items, list): + items = [self.sdp.REF_PREFIX + self.sdp.save_obj(item) for item in items] + elif isinstance(items, set): + items = {self.sdp.REF_PREFIX + self.sdp.save_obj(item) for item in items} + else: + items = self.sdp.REF_PREFIX + self.sdp.save_obj(items) + + if key: + self.state.data[entry][key] = items + else: + self.state.data[entry] = items + + def remove(self, entry, key): + """ + Remove an entry + :param entry: + :param key: + :return: + """ + with self.sdp.lock: + try: + del (self.state.data[entry][key]) + except KeyError: + pass + + def __exit__(self, exc_type, exc_val, exc_tb): + self.state.parents = [] if self.snapshot is None else [self.snapshot] + self.state.events = [self.event_digest] + self.state.date = datetime.now() + + self.snapshot = self.sdp.save_state(self.state) + self.sdp.set_snapshot(SheerkaDataProvider.HeadFile, self.snapshot) + + return False # let's escalate the exceptions class SheerkaDataProvider: - """Manages the state of the system""" + """Manages the persistence state of the system""" EventFolder = "events" StateFolder = "state" @@ -359,49 +184,7 @@ class SheerkaDataProvider: self.first_time = self.io.first_time self.serializer = Serializer() - - @staticmethod - def get_obj_key(obj): - """ - Tries to find the key of an object - Look for .key, .get_key() - :param obj: - :return: String version of that is found, None otherwise - """ - return str(obj.get_key()) if hasattr(obj, "get_key") \ - else str(obj.key) if hasattr(obj, "key") \ - else None - - @staticmethod - def get_obj_digest(obj): - """ - Tries to find the key of an object - Look for .digest, .get_digest() - :param obj: - :return: digest, None otherwise - """ - if isinstance(obj, str) and obj.startswith(SheerkaDataProvider.REF_PREFIX): - return obj[len(SheerkaDataProvider.REF_PREFIX):] - - return obj.digest if hasattr(obj, "digest") \ - else obj.get_digest() if hasattr(obj, "get_digest") \ - else None - - @staticmethod - def get_obj_origin(obj): - """ - Get the digest used to save obj if set - """ - if isinstance(obj, dict) and Serializer.ORIGIN in obj: - return obj[Serializer.ORIGIN] - - if hasattr(obj, Serializer.ORIGIN): - return getattr(obj, Serializer.ORIGIN) - - if isinstance(obj, SheerkaDataProviderRef): - return obj.original_target - - return None + self.lock = RLock() @staticmethod def get_stream_digest(stream): @@ -412,377 +195,60 @@ class SheerkaDataProvider: stream.seek(0) return sha256_hash.hexdigest() - @staticmethod - def is_reference(obj): - return isinstance(obj, str) and obj.startswith(SheerkaDataProvider.REF_PREFIX) + def get_transaction(self, event): + return SheerkaDataProviderTransaction(self, event) - def reset(self): - self.first_time = self.io.first_time - if hasattr(self.io, "reset"): - self.io.reset() - - def add(self, event_digest: str, entry, obj, allow_multiple=True, use_ref=False): + def get(self, entry, key=None, default=None, load_origin=True): """ - Adds obj to the entry 'entry' - :param event_digest: digest of the event that triggers the modification of the state - :param entry: entry of the state to update - :param obj: obj to insert or add - :param allow_multiple: if set to true, the same key can be added several times. - All entries will be put in a list - :param use_ref: if True the actual object is saved under 'objects' folder, - only a reference is saved in the state - :return: (entry, key) to retrieve the object - """ - - original_obj = obj.copy() if isinstance(obj, dict) else obj - - snapshot = self.get_snapshot(SheerkaDataProvider.HeadFile) - state = self.load_state(snapshot) - - self.log.debug(f"Adding obj '{obj}' in entry '{entry}' (allow_multiple={allow_multiple}, use_ref={use_ref})") - - if not isinstance(obj, ObjToUpdate): - obj = ObjToUpdate(obj) - - # check uniqueness, cannot add the same key twice if allow_multiple == False - key = obj.get_key() - self.log.debug(f"key found : '{key}'") if key else self.log.debug("No key found") - if not allow_multiple: - if isinstance(obj.obj, dict): - for k in obj.obj: - if state.contains(entry, k): - raise IndexError(f"{entry}.{k}") - else: - if state.contains(entry, key): - raise IndexError(f"{entry}.{key}" if key else entry) - - state.parents = [] if snapshot is None else [snapshot] - state.events = [event_digest] - state.date = datetime.now() - - if use_ref: - obj.set_digest(self.save_obj(obj.obj)) - obj.obj = self.REF_PREFIX + obj.get_digest() - - state.update(entry, obj) - - new_snapshot = self.save_state(state) - self.set_snapshot(SheerkaDataProvider.HeadFile, new_snapshot) - return SheerkaDataProviderResult(original_obj, entry, obj.get_key(), obj.get_digest()) - - def add_with_auto_key(self, event_digest: str, entry, obj): - """ - Add obj to entry. An autogenerated key created for obj - :param event_digest: - :param entry: - :param obj: - :return: - """ - - original_obj = obj.copy() if isinstance(obj, dict) else obj - - next_key = self.get_next_key(entry) - if hasattr(obj, "set_key"): - obj.set_key(next_key) - res = self.add(event_digest, entry, ObjToUpdate(obj, next_key)) - return SheerkaDataProviderResult(original_obj, res.entry, res.key, res.digest) - - def add_unique(self, event_digest: str, entry, obj): - """Add an entry and make sure it's unique""" - - original_obj = obj.copy() if isinstance(obj, dict) else obj - - snapshot = self.get_snapshot(SheerkaDataProvider.HeadFile) - state = self.load_state(snapshot) - - state.parents = [] if snapshot is None else [snapshot] - state.events = [event_digest] - state.date = datetime.now() - if entry not in state.data: - state.data[entry] = {obj} - already_exist = False - else: - already_exist = obj in state.data[entry] - if not already_exist: - state.data[entry].add(obj) - - new_snapshot = self.save_state(state) - self.set_snapshot(SheerkaDataProvider.HeadFile, new_snapshot) - return SheerkaDataProviderResult( - original_obj, - entry, - None, - None, - already_exist) - - def set(self, event_digest, entry, obj, use_ref=False, is_ref=False): - """ - Add or replace an entry. The entry is reinitialized. - If the previous value was dict, all keys are lost - :param event_digest: - :param entry: - :param obj: - :param use_ref: Do not save obj in State (save it under objects), use_ref in State - :param is_ref: obj is supposed to be a reference - :return: - """ - - original_obj = obj.copy() if isinstance(obj, dict) else obj - - if use_ref and is_ref: - raise SheerkaDataProviderError("Cannot use use_ref and is_ref at the same time", None) - - if is_ref and not isinstance(obj, dict): - raise SheerkaDataProviderError("is_ref can only be used with dictionaries", obj) - - snapshot = self.get_snapshot(SheerkaDataProvider.HeadFile) - state = self.load_state(snapshot) - - state.parents = [] if snapshot is None else [snapshot] - state.events = [event_digest] - state.date = datetime.now() - - key = self.get_obj_key(obj) - obj = self.save_ref_if_needed(use_ref, obj) - - if is_ref: - for k, v in obj.items(): - obj[k] = self.REF_PREFIX + v - - state.data[entry] = obj if key is None else {key: obj} - - new_snapshot = self.save_state(state) - self.set_snapshot(SheerkaDataProvider.HeadFile, new_snapshot) - return SheerkaDataProviderResult(original_obj, entry, key, self.get_obj_digest(obj)) - - def modify(self, event_digest, entry, key, obj): - """ - Replace an element - If the key is not provided, has the same effect than set eg, the entry is reset - :param event_digest: - :param entry: - :param key: key of the object to update - :param obj: new data - :return: - """ - - original_obj = obj.copy() if isinstance(obj, dict) else obj - - if key is None: - raise SheerkaDataProviderError("Key is mandatory.", None) - - snapshot = self.get_snapshot(SheerkaDataProvider.HeadFile) - state = self.load_state(snapshot) - - if entry not in state.data: - raise IndexError(entry) - - if key is not None and key not in state.data[entry]: - raise IndexError(f"{entry}.{key}") - - state.parents = [] if snapshot is None else [snapshot] - state.events = [event_digest] - state.date = datetime.now() - - # Gets obj original key, it will help to know if the key has changed - obj_key = self.get_obj_key(obj) or key - digest = None - - if isinstance(state.data[entry][key], list): - obj_origin = self.get_obj_origin(obj) - if obj_origin is None: - raise (SheerkaDataProviderError(f"Multiple entries under '{entry}.{key}'", obj)) - - digest = state.modify_in_list( - entry, - key, - obj, - obj_key, - obj_origin, - self.load_ref_if_needed, - self.save_ref_if_needed) - - else: - was_saved_as_reference = self.is_reference(state.data[entry][key]) - if was_saved_as_reference: - obj = self.save_ref_if_needed(True, obj) - digest = self.get_obj_digest(obj) - - state.modify(entry, key, obj, obj_key) - - new_snapshot = self.save_state(state) - self.set_snapshot(SheerkaDataProvider.HeadFile, new_snapshot) - return SheerkaDataProviderResult(original_obj, entry, obj_key, digest) - - def list(self, entry, filter=None): - """ - Lists elements of entry 'entry' - :param entry: name of the entry to list - :param filter: filter to use - :return: list of elements - """ - snapshot = self.get_snapshot(SheerkaDataProvider.HeadFile) - state = self.load_state(snapshot) - if entry not in state.data: - return [] - - elements = state.data[entry] - - if isinstance(elements, dict): - # manage when elements have a key - filter_to_use = (lambda k, o: True) if filter is None else filter - for key, element in elements.items(): - if filter_to_use(key, element): - if isinstance(element, list): - yield [self.load_ref_if_needed(e)[0] for e in element] - else: - yield self.load_ref_if_needed(element)[0] - else: - # manage when no key is defined for the elements - if not isinstance(elements, list) and not isinstance(elements, set): - elements = [elements] - - filter_to_use = (lambda o: True) if filter is None else filter - for element in elements: - if filter_to_use(element): - yield self.load_ref_if_needed(element)[0] - - def remove(self, event_digest, entry, filter=None, silent_remove=True): - """ - Removes elements under the entry 'entry' - :param event_digest: event that triggers the deletion - :param entry: - :param filter: filter to use - :param silent_remove: Do not throw exception if entry does not exist - :return: new sha256 of the state - TODO: Remove by key - """ - snapshot = self.get_snapshot(SheerkaDataProvider.HeadFile) - state = self.load_state(snapshot) - - if entry not in state.data: - if silent_remove: - return snapshot - else: - raise IndexError(entry) - - state.parents = [] if snapshot is None else [snapshot] - state.events = [event_digest] - state.date = datetime.now() - state.remove(entry, filter) - - new_snapshot = self.save_state(state) - self.set_snapshot(SheerkaDataProvider.HeadFile, new_snapshot) - return new_snapshot - - def get(self, entry, key=None, load_origin=True): - """ - Retrieve an element by its key + Get an element :param entry: :param key: - :param load_origin: if True, adds the origin (parent digest) to the object + :param default: value to return is not found + :param load_origin: adds the parent object digest to the object :return: """ - snapshot = self.get_snapshot(SheerkaDataProvider.HeadFile) - state = self.load_state(snapshot) + with self.lock: + snapshot = self.get_snapshot(SheerkaDataProvider.HeadFile) + state = self.load_state(snapshot) - if entry not in state.data: - raise IndexError(entry) + if entry not in state.data: + return default - if key is not None and key not in state.data[entry]: - raise IndexError(f"{entry}.{key}") + if key is not None and key not in state.data[entry]: + return default - item = state.data[entry] if key is None else state.data[entry][key] - if isinstance(item, list): - return [self.load_ref_if_needed(i, load_origin)[0] for i in item] + item = state.data[entry] if key is None else state.data[entry][key] + if isinstance(item, list): + return [self.load_ref_if_needed(i, load_origin) for i in item] + elif isinstance(item, set): + return {self.load_ref_if_needed(i, load_origin) for i in item} + return self.load_ref_if_needed(item, load_origin) - return self.load_ref_if_needed(item, load_origin)[0] - - def get_safe(self, entry, key=None, load_origin=True): - """ - Retrieve an element by its key. Return None if the element does not exist - :param entry: - :param key: - :param load_origin: if True, adds the origin (parent digest) to the object - :return: - """ - snapshot = self.get_snapshot(SheerkaDataProvider.HeadFile) - state = self.load_state(snapshot) - - if entry not in state.data: - return None - - if key is not None and key not in state.data[entry]: - return None - - item = state.data[entry] if key is None else state.data[entry][key] - if isinstance(item, list): - return [self.load_ref_if_needed(i, load_origin)[0] for i in item] - - return self.load_ref_if_needed(item, load_origin)[0] - - def get_ref(self, entry, key=None): - """ - Returns the reference of an object if the object exists - This function allows to retrieve obj.##origin## without loading the object - :param entry: - :param key: - :return: - """ - - snapshot = self.get_snapshot(SheerkaDataProvider.HeadFile) - state = self.load_state(snapshot) - - if entry not in state.data: - raise IndexError(entry) - - if key is not None and key not in state.data[entry]: - raise IndexError(f"{entry}.{key}") - - item = state.data[entry] if key is None else state.data[entry][key] - if isinstance(item, list): - res = [] - for element in item: - if not self.is_reference(element): - raise SheerkaDataProviderError("Not a reference", f"{entry}.{key}") - res.append(self.get_obj_digest(element)) - return res - - if not self.is_reference(item): - raise SheerkaDataProviderError("Not a reference", f"{entry}.{key}") - - return self.get_obj_digest(item) - - def exists(self, entry, key=None, digest=None): + def exists(self, entry, key=None): """ Returns true if the entry is defined :param key: :param entry: - :param digest: digest of the object, when several entries share the same key :return: """ - snapshot = self.get_snapshot(SheerkaDataProvider.HeadFile) - state = self.load_state(snapshot) - exist = entry in state.data + with self.lock: + snapshot = self.get_snapshot(SheerkaDataProvider.HeadFile) + state = self.load_state(snapshot) - if not exist or key is None: - return exist + exist = entry in state.data + if not exist or key is None: + return exist - items = state.data[entry] - exist = key in items - if not exist or digest is None: - return exist + return key in state.data[entry] - items = items[key] - if not isinstance(items, list): - items = [items] - - for item in items: - item_digest = SheerkaDataProvider.get_obj_digest(item) - if item_digest == digest: - return True - - return False + def reset(self): + """ + Test only, delete all entries + :return: + """ + self.first_time = self.io.first_time + if hasattr(self.io, "reset"): + self.io.reset() def save_event(self, event: Event): """ @@ -848,6 +314,36 @@ class SheerkaDataProvider: digest = event.parents[0] count += 1 + def get_snapshot(self, file): + head_file = self.io.path_join(file) + if not self.io.exists(head_file): + return None + return self.io.read_text(head_file) + + def set_snapshot(self, file, digest): + head_file = self.io.path_join(file) + return self.io.write_text(head_file, digest) + + def load_state(self, digest): + if digest is None: + return State() + + target_path = self.io.get_obj_path(SheerkaDataProvider.StateFolder, digest) + with self.io.open(target_path, "rb") as f: + context = SerializerContext(sheerka=self.sheerka) + return self.serializer.deserialize(f, context) + + def save_state(self, state: State): + digest = state.get_digest() + self.log.debug(f"Saving new state. digest={digest}") + target_path = self.io.get_obj_path(SheerkaDataProvider.StateFolder, digest) + if self.io.exists(target_path): + return digest + + context = SerializerContext(sheerka=self.sheerka) + self.io.write_binary(target_path, self.serializer.serialize(state, context).read()) + return digest + def get_result_file_path(self, digest, is_admin): ext = "_admin_result" if is_admin else "_result" return self.io.get_obj_path(SheerkaDataProvider.EventFolder, digest) + ext @@ -872,14 +368,18 @@ class SheerkaDataProvider: :param is_admin: True is the result is an internal admin result file :return: """ + start = time.time() + message = execution_context.event.message digest = execution_context.event.get_digest() - self.log.debug(f"Saving execution context. digest={digest}") + self.log.debug(f"Saving execution context. digest={digest}, message={message}") target_path = self.get_result_file_path(digest, is_admin) if self.io.exists(target_path): return digest context = SerializerContext(sheerka=self.sheerka) - self.io.write_binary(target_path, self.serializer.serialize(execution_context, context).read()) + length = self.io.write_binary(target_path, self.serializer.serialize(execution_context, context).read()) + elapsed = time.time() - start + self.log.debug(f"Saved execution context. message={message}, length={length}, elapsed={elapsed}") return digest def load_result(self, digest, is_admin=False): @@ -895,25 +395,18 @@ class SheerkaDataProvider: context = SerializerContext(sheerka=self.sheerka) return self.serializer.deserialize(f, context) - def save_state(self, state: State): - digest = state.get_digest() - self.log.debug(f"Saving new state. digest={digest}") - target_path = self.io.get_obj_path(SheerkaDataProvider.StateFolder, digest) - if self.io.exists(target_path): - return digest + def load_ref_if_needed(self, obj, load_origin=True): + """ + Make sure the real obj is returned + :param obj: + :param load_origin: + :return: + """ + if not isinstance(obj, str) or not obj.startswith(SheerkaDataProvider.REF_PREFIX): + return obj - context = SerializerContext(sheerka=self.sheerka) - self.io.write_binary(target_path, self.serializer.serialize(state, context).read()) - return digest - - def load_state(self, digest): - if digest is None: - return State() - - target_path = self.io.get_obj_path(SheerkaDataProvider.StateFolder, digest) - with self.io.open(target_path, "rb") as f: - context = SerializerContext(sheerka=self.sheerka) - return self.serializer.deserialize(f, context) + resolved = self.load_obj(obj[len(SheerkaDataProvider.REF_PREFIX):], load_origin) + return resolved or obj def save_obj(self, obj): self.log.debug(f"Saving '{obj}' as reference...") @@ -950,133 +443,3 @@ class SheerkaDataProvider: elif not isinstance(obj, str): setattr(obj, Serializer.ORIGIN, digest) return obj - - def load_ref_if_needed(self, obj, load_origin=True): - if isinstance(obj, SheerkaDataProviderRef): - resolved = self.load_obj(obj.target, load_origin) - return resolved, False - - if not isinstance(obj, str) or not obj.startswith(SheerkaDataProvider.REF_PREFIX): - return obj, False - - resolved = self.load_obj(obj[len(SheerkaDataProvider.REF_PREFIX):], load_origin) - return (obj, False) if resolved is None else (resolved, True) - - def save_ref_if_needed(self, save_ref, obj): - if not save_ref: - return obj - - digest = self.save_obj(obj) - return self.REF_PREFIX + digest - - def get_cache_params(self, category, key): - digest = hashlib.sha3_256(f"{category}:{key}".encode("utf-8")).hexdigest() - cache_path = self.io.get_obj_path(SheerkaDataProvider.CacheFolder, digest) - return digest, cache_path - - def add_to_cache(self, category, key, obj, update=False): - """ - Save obj in the internal cache system - :param category: - :param key: - :param obj: - :param update: - :return: - """ - digest, cache_path = self.get_cache_params(category, key) - - if self.io.exists(cache_path) and not update: - return digest - - self.io.write_binary(cache_path, zlib.compress(obj.encode("utf-8"), 9)) - return digest - - def load_from_cache(self, category, key): - """ - Reload a compress object from the cache - :param category: - :param key: - :return: - """ - digest, cache_path = self.get_cache_params(category, key) - - if not self.io.exists(cache_path): - raise IndexError(f"{category}.{key}") - - with self.io.open(cache_path, "rb") as f: - return zlib.decompress(f.read()).decode("utf-8") - - def remove_from_cache(self, category, key): - """ - - :param category: - :param key: - :return: - """ - digest, cache_path = self.get_cache_params(category, key) - if self.io.exists(cache_path): - self.io.remove(cache_path) - - return digest - - def in_cache(self, category, key): - """ - Returns true if the key is in cache - :param category: - :param key: - :return: - """ - digest, cache_path = self.get_cache_params(category, key) - return self.io.exists(cache_path) - - def get_snapshot(self, file): - head_file = self.io.path_join(file) - if not self.io.exists(head_file): - return None - return self.io.read_text(head_file) - # with open(head_file, "r") as f: - # return f.read() - - def set_snapshot(self, file, digest): - head_file = self.io.path_join(file) - return self.io.write_text(head_file, digest) - # with open(head_file, "w") as f: - # return f.write(digest) - - def load_keys(self): - keys_file = self.io.path_join(SheerkaDataProvider.KeysFile) - if not self.io.exists(keys_file): - keys = {} - else: - with self.io.open(keys_file, "r") as f: - keys = json.load(f) - return keys - - def save_keys(self, keys): - keys_file = self.io.path_join(SheerkaDataProvider.KeysFile) - with self.io.open(keys_file, "w") as f: - json.dump(keys, f) - - def get_next_key(self, entry): - keys = self.load_keys() - - next_key = keys.get(entry, 0) + 1 - keys[entry] = next_key - - self.save_keys(keys) - return str(next_key) - - def set_key(self, entry, value): - keys = self.load_keys() - keys[entry] = value - self.save_keys(keys) - return str(value) - - def dump_state(self, digest=None): - digest = digest or self.get_snapshot(SheerkaDataProvider.HeadFile) - state = self.load_state(digest) - print(json.dumps(state.data, sort_keys=True, default=json_default_converter, indent=True)) - - def dump_obj(self, digest): - obj = self.load_obj(digest) - print(json.dumps(obj.__dict__, sort_keys=True, default=json_default_converter, indent=True)) diff --git a/src/sdp/sheerkaDataProviderIO.py b/src/sdp/sheerkaDataProviderIO.py index c7c3df9..1e2cbf6 100644 --- a/src/sdp/sheerkaDataProviderIO.py +++ b/src/sdp/sheerkaDataProviderIO.py @@ -77,9 +77,11 @@ class SheerkaDataProviderFileIO(SheerkaDataProviderIO): def write_text(self, file_path, content): self._write(file_path, content, "w") + return len(content) def write_binary(self, file_path, content): self._write(file_path, content, "wb") + return len(content) def exists(self, file_path): return path.exists(file_path) @@ -120,10 +122,12 @@ class SheerkaDataProviderMemoryIO(SheerkaDataProviderIO): def write_binary(self, file_path, content): self._ensure_parent_folder(file_path) self.mem_fs.writebytes(file_path, content) + return len(content) def write_text(self, file_path, content): self._ensure_parent_folder(file_path) self.mem_fs.writetext(file_path, content) + return len(content) def remove(self, file_path): self.mem_fs.remove(file_path) @@ -155,9 +159,11 @@ class SheerkaDataProviderDictionaryIO(SheerkaDataProviderIO): def write_binary(self, file_path, content): self.cache[file_path] = content + return len(content) def write_text(self, file_path, content): self.cache[file_path] = content + return len(content) def remove(self, file_path): del (self.cache[file_path]) diff --git a/src/sdp/sheerkaDataProvider_Old.py b/src/sdp/sheerkaDataProvider_Old.py new file mode 100644 index 0000000..f8dd4e4 --- /dev/null +++ b/src/sdp/sheerkaDataProvider_Old.py @@ -0,0 +1,1087 @@ +import hashlib +import json +import zlib +import time +from dataclasses import dataclass +from datetime import datetime, date + +from core.sheerka_logger import get_logger +from sdp.sheerkaDataProviderIO import SheerkaDataProviderIO +from sdp.sheerkaSerializer import Serializer, SerializerContext + + +def json_default_converter(o): + """ + Default formatter for json + It's used when the json serializer does not know + how to serialise a type + :param o: + :return: + """ + if isinstance(o, (date, datetime)): + return o.isoformat() + + if isinstance(o, SheerkaDataProviderRef): + return f"##XREF##:{o.target}" + + +class Event(object): + """ + Class that represents something that modifies the state of the system + """ + + def __init__(self, message="", user="", date=datetime.now(), parents=None): + self.version = 1 + self.user = user + self.date = date + self.message = message + self.parents = parents + self._digest = None + + def __str__(self): + return f"{self.date.strftime('%d/%m/%Y %H:%M:%S')} {self.message}" + + def __repr__(self): + return f"{self.get_digest()[:12]} {self.message}" + + def get_digest(self): + """ + Returns the digest of the event + :return: hexa form of the sha256 + """ + + if self._digest: + return self._digest + + if self.message == "" and self.user == "": + self._digest = "xxx" # to speed unit tests + return self._digest + + if not isinstance(self.message, str): + raise NotImplementedError + + to_hash = f"Event:{self.user}{self.date}{self.message}{self.parents}".encode("utf-8") + self._digest = hashlib.sha256(to_hash).hexdigest() + return self._digest + + def to_dict(self): + return self.__dict__ + + def from_dict(self, as_dict): + self.user = as_dict["user"] + self.date = datetime.fromisoformat(as_dict["date"]) + self.message = as_dict["message"] + self.parents = as_dict["parents"] + self._digest = as_dict["_digest"] # freeze the digest + + +class ObjToUpdate: + """ + Internal key value class; + You give it an obj, and it tries to figure out what is the key of the obj + Note that you can force the key if you want + It was first create to make the difference between an object that has a key and {key, value} + """ + + def __init__(self, obj, key=None, digest=None): + self.obj = obj + self.has_key = None + self.has_digest = None + self._key = None + self._digest = None + if key is not None: + self.set_key(key) + if digest is not None: + self.set_digest(digest) + + def get_key(self): + if self.has_key is None: + key = SheerkaDataProvider.get_obj_key(self.obj) + if key is None: + self.has_key = False + return None + else: + self.has_key = True + self._key = key + return key + elif not self.has_key: + return None + else: + return self._key + + def get_digest(self): + if self.has_digest is None: + digest = SheerkaDataProvider.get_obj_digest(self.obj) + if digest is None: + self.has_digest = False + return None + else: + self.has_digest = True + self._digest = digest + return digest + elif not self.has_digest: + return None + else: + return self._digest + + def set_digest(self, digest): + self.has_digest = True + self._digest = digest + + def set_key(self, key): + self.has_key = True + self._key = key + + +class State: + """ + Class that represents the state of the system (dictionary of all known entries) + """ + + def __init__(self): + self.version = 1 + self.date = None + self.parents = [] + self.events = [] + self.data = {} + + @staticmethod + def check_duplicate(items, obj: ObjToUpdate, key): + digest = obj.get_digest() + if digest is None: + return + + if not hasattr(items, "__iter__"): + items = [items] + + for item in items: + item_digest = SheerkaDataProvider.get_obj_digest(item) + if item_digest == digest: + raise SheerkaDataProviderDuplicateKeyError(key, obj.obj) + + def update(self, entry, obj: ObjToUpdate, append=True): + """ + adds obj to entry + :param entry: + :param obj: + :param append: if True, duplicate keys will create lists + :return: + """ + obj_to_use = {obj.get_key(): obj.obj} if obj.has_key else obj.obj + + if entry not in self.data: + self.data[entry] = obj_to_use + + elif not append: + if isinstance(obj_to_use, dict): + self.data[entry].update(obj_to_use) + else: + self.data[entry] = obj_to_use + + elif isinstance(self.data[entry], list): + self.check_duplicate(self.data[entry], obj, entry) + self.data[entry].append(obj.obj) + + elif isinstance(obj_to_use, dict): + for k in obj_to_use: + if k not in self.data[entry]: + self.data[entry][k] = obj_to_use[k] + elif isinstance(self.data[entry][k], list): + self.check_duplicate(self.data[entry][k], obj, entry + "." + k) + self.data[entry][k].append(obj_to_use[k]) + else: + self.check_duplicate(self.data[entry][k], obj, entry + "." + k) + self.data[entry][k] = [self.data[entry][k], obj_to_use[k]] + + elif isinstance(self.data[entry], dict): + raise SheerkaDataProviderError(f"Cannot found key on '{obj.obj}' while all other elements have.", obj.obj) + + else: + self.check_duplicate(self.data[entry], obj, entry) + self.data[entry] = [self.data[entry], obj_to_use] + + def modify(self, entry, key, obj, obj_key): + # if the key changes, make sure to remove the previous entry + append = False + if obj_key != key: + self.remove(entry, lambda k, o: k == key) # modify from on object to another + append = True + + self.update(entry, ObjToUpdate(obj, obj_key), append=append) + + def modify_in_list(self, entry, key, obj, obj_key, obj_origin, load_ref_if_needed, save_ref_if_needed): + found = False + to_remove = None + new_digest = None + + def _get_item_origin(o): + if hasattr(o, Serializer.ORIGIN): + return getattr(o, Serializer.ORIGIN) + + if isinstance(o, dict) and Serializer.ORIGIN in o: + return o[Serializer.ORIGIN] + + if hasattr(o, "get_digest"): + return o.get_digest() + + if isinstance(o, str): + return o + + return None + + for i in range(len(self.data[entry][key])): + item, is_ref = load_ref_if_needed(self.data[entry][key][i]) + item_origin = _get_item_origin(item) + if item_origin is None: + continue + if item_origin == obj_origin: + obj = save_ref_if_needed(is_ref, obj) + if is_ref: + new_digest = obj[len(SheerkaDataProvider.REF_PREFIX):] + if obj_key == key: + self.data[entry][key][i] = obj + else: + to_remove = i + self.update(entry, ObjToUpdate(obj, obj_key), append=True) + found = True + break + + if not found: + raise (SheerkaDataProviderError(f"Cannot modify '{entry}.{key}'. Item '{obj_origin}' not found.", obj)) + + if to_remove is not None: + del self.data[entry][key][to_remove] + + return new_digest + + def remove(self, entry, filter): + if filter is None: + del (self.data[entry]) + + elif isinstance(self.data[entry], dict): + keys_to_remove = [] + for key, element in self.data[entry].items(): + if filter(key, element): + keys_to_remove.append(key) + for key in keys_to_remove: + del (self.data[entry][key]) + + elif not isinstance(self.data[entry], list): + if filter(self.data[entry]): + del (self.data[entry]) + + else: + for element in self.data[entry]: + if filter(element): + self.data[entry].remove(element) + + def get_digest(self): + as_json = json.dumps(self.__dict__, default=json_default_converter) + return hashlib.sha256(as_json.encode("utf-8")).hexdigest() + + def contains(self, entry, key): + """ + if key is None, returns True if entry exists + if key has a value + returns True if entry is an dict and contains key + :param entry: + :param key: + :return: + """ + if entry not in self.data: + return False + if key is None: + return entry in self.data + if not isinstance(self.data[entry], dict): + return False + return key in self.data[entry] + + +class SheerkaDataProviderError(Exception): + def __init__(self, message, obj): + Exception.__init__(self, message) + self.obj = obj + + +class SheerkaDataProviderDuplicateKeyError(Exception): + def __init__(self, key, obj): + Exception.__init__(self, "Duplicate object.") + self.key = key + self.obj = obj + + +@dataclass +class SheerkaDataProviderResult: + """ + Object that is returned after adding, setting or modifying an entry + """ + obj: object # obj that was given to store/modify + entry: str # entry where the object is put + key: str # key to use to retrieve the object + digest: str # digest used to store the reference + already_exists: bool = False # the same object was already persisted + + +@dataclass +class SheerkaDataProviderRef: + """ + Object that tells where an object is store (target is the digest of the reference) + """ + key: str # key of the object + target: str # digest of the reference + original_target: str = None # when the object is modified, previous digest + + def get_digest(self): + return self.original_target + + def get_key(self): + return self.key + + +class SheerkaDataProvider: + """Manages the state of the system""" + + EventFolder = "events" + StateFolder = "state" + ObjectsFolder = "objects" + CacheFolder = "cache" + HeadFile = "HEAD" + LastEventFile = "LAST_EVENT" + KeysFile = "keys" + REF_PREFIX = "##REF##:" + + def __init__(self, root=None, sheerka=None): + self.log = get_logger(__name__) + self.init_log = get_logger("init." + __name__) + self.init_log.debug("Initializing sdp.") + + self.sheerka = sheerka + self.io = SheerkaDataProviderIO.get(root) + self.first_time = self.io.first_time + + self.serializer = Serializer() + + @staticmethod + def get_obj_key(obj): + """ + Tries to find the key of an object + Look for .key, .get_key() + :param obj: + :return: String version of that is found, None otherwise + """ + return str(obj.get_key()) if hasattr(obj, "get_key") \ + else str(obj.key) if hasattr(obj, "key") \ + else None + + @staticmethod + def get_obj_digest(obj): + """ + Tries to find the key of an object + Look for .digest, .get_digest() + :param obj: + :return: digest, None otherwise + """ + if isinstance(obj, str) and obj.startswith(SheerkaDataProvider.REF_PREFIX): + return obj[len(SheerkaDataProvider.REF_PREFIX):] + + return obj.digest if hasattr(obj, "digest") \ + else obj.get_digest() if hasattr(obj, "get_digest") \ + else None + + @staticmethod + def get_obj_origin(obj): + """ + Get the digest used to save obj if set + """ + if isinstance(obj, dict) and Serializer.ORIGIN in obj: + return obj[Serializer.ORIGIN] + + if hasattr(obj, Serializer.ORIGIN): + return getattr(obj, Serializer.ORIGIN) + + if isinstance(obj, SheerkaDataProviderRef): + return obj.original_target + + return None + + @staticmethod + def get_stream_digest(stream): + sha256_hash = hashlib.sha256() + for byte_block in iter(lambda: stream.read(4096), b""): + sha256_hash.update(byte_block) + + stream.seek(0) + return sha256_hash.hexdigest() + + @staticmethod + def is_reference(obj): + return isinstance(obj, str) and obj.startswith(SheerkaDataProvider.REF_PREFIX) + + def reset(self): + self.first_time = self.io.first_time + if hasattr(self.io, "reset"): + self.io.reset() + + def add(self, event_digest: str, entry, obj, allow_multiple=True, use_ref=False): + """ + Adds obj to the entry 'entry' + :param event_digest: digest of the event that triggers the modification of the state + :param entry: entry of the state to update + :param obj: obj to insert or add + :param allow_multiple: if set to true, the same key can be added several times. + All entries will be put in a list + :param use_ref: if True the actual object is saved under 'objects' folder, + only a reference is saved in the state + :return: (entry, key) to retrieve the object + """ + + original_obj = obj.copy() if isinstance(obj, dict) else obj + + snapshot = self.get_snapshot(SheerkaDataProvider.HeadFile) + state = self.load_state(snapshot) + + self.log.debug(f"Adding obj '{obj}' in entry '{entry}' (allow_multiple={allow_multiple}, use_ref={use_ref})") + + if not isinstance(obj, ObjToUpdate): + obj = ObjToUpdate(obj) + + # check uniqueness, cannot add the same key twice if allow_multiple == False + key = obj.get_key() + self.log.debug(f"key found : '{key}'") if key else self.log.debug("No key found") + if not allow_multiple: + if isinstance(obj.obj, dict): + for k in obj.obj: + if state.contains(entry, k): + raise IndexError(f"{entry}.{k}") + else: + if state.contains(entry, key): + raise IndexError(f"{entry}.{key}" if key else entry) + + state.parents = [] if snapshot is None else [snapshot] + state.events = [event_digest] + state.date = datetime.now() + + if use_ref: + obj.set_digest(self.save_obj(obj.obj)) + obj.obj = self.REF_PREFIX + obj.get_digest() + + state.update(entry, obj) + + new_snapshot = self.save_state(state) + self.set_snapshot(SheerkaDataProvider.HeadFile, new_snapshot) + return SheerkaDataProviderResult(original_obj, entry, obj.get_key(), obj.get_digest()) + + def add_with_auto_key(self, event_digest: str, entry, obj): + """ + Add obj to entry. An autogenerated key created for obj + :param event_digest: + :param entry: + :param obj: + :return: + """ + + original_obj = obj.copy() if isinstance(obj, dict) else obj + + next_key = self.get_next_key(entry) + if hasattr(obj, "set_key"): + obj.set_key(next_key) + res = self.add(event_digest, entry, ObjToUpdate(obj, next_key)) + return SheerkaDataProviderResult(original_obj, res.entry, res.key, res.digest) + + def add_unique(self, event_digest: str, entry, obj): + """Add an entry and make sure it's unique""" + + original_obj = obj.copy() if isinstance(obj, dict) else obj + + snapshot = self.get_snapshot(SheerkaDataProvider.HeadFile) + state = self.load_state(snapshot) + + state.parents = [] if snapshot is None else [snapshot] + state.events = [event_digest] + state.date = datetime.now() + if entry not in state.data: + state.data[entry] = {obj} + already_exist = False + else: + already_exist = obj in state.data[entry] + if not already_exist: + state.data[entry].add(obj) + + new_snapshot = self.save_state(state) + self.set_snapshot(SheerkaDataProvider.HeadFile, new_snapshot) + return SheerkaDataProviderResult( + original_obj, + entry, + None, + None, + already_exist) + + def set(self, event_digest, entry, obj, use_ref=False, is_ref=False): + """ + Add or replace an entry. The entry is reinitialized. + If the previous value was dict, all keys are lost + :param event_digest: + :param entry: + :param obj: + :param use_ref: Do not save obj in State (save it under objects), use_ref in State + :param is_ref: obj is supposed to be a reference + :return: + """ + + original_obj = obj.copy() if isinstance(obj, dict) else obj + + if use_ref and is_ref: + raise SheerkaDataProviderError("Cannot use use_ref and is_ref at the same time", None) + + if is_ref and not isinstance(obj, dict): + raise SheerkaDataProviderError("is_ref can only be used with dictionaries", obj) + + snapshot = self.get_snapshot(SheerkaDataProvider.HeadFile) + state = self.load_state(snapshot) + + state.parents = [] if snapshot is None else [snapshot] + state.events = [event_digest] + state.date = datetime.now() + + key = self.get_obj_key(obj) + obj = self.save_ref_if_needed(use_ref, obj) + + if is_ref: + for k, v in obj.items(): + obj[k] = self.REF_PREFIX + v + + state.data[entry] = obj if key is None else {key: obj} + + new_snapshot = self.save_state(state) + self.set_snapshot(SheerkaDataProvider.HeadFile, new_snapshot) + return SheerkaDataProviderResult(original_obj, entry, key, self.get_obj_digest(obj)) + + def modify(self, event_digest, entry, key, obj): + """ + Replace an element + If the key is not provided, has the same effect than set eg, the entry is reset + :param event_digest: + :param entry: + :param key: key of the object to update + :param obj: new data + :return: + """ + + original_obj = obj.copy() if isinstance(obj, dict) else obj + + if key is None: + raise SheerkaDataProviderError("Key is mandatory.", None) + + snapshot = self.get_snapshot(SheerkaDataProvider.HeadFile) + state = self.load_state(snapshot) + + if entry not in state.data: + raise IndexError(entry) + + if key is not None and key not in state.data[entry]: + raise IndexError(f"{entry}.{key}") + + state.parents = [] if snapshot is None else [snapshot] + state.events = [event_digest] + state.date = datetime.now() + + # Gets obj original key, it will help to know if the key has changed + obj_key = self.get_obj_key(obj) or key + digest = None + + if isinstance(state.data[entry][key], list): + obj_origin = self.get_obj_origin(obj) + if obj_origin is None: + raise (SheerkaDataProviderError(f"Multiple entries under '{entry}.{key}'", obj)) + + digest = state.modify_in_list( + entry, + key, + obj, + obj_key, + obj_origin, + self.load_ref_if_needed, + self.save_ref_if_needed) + + else: + was_saved_as_reference = self.is_reference(state.data[entry][key]) + if was_saved_as_reference: + obj = self.save_ref_if_needed(True, obj) + digest = self.get_obj_digest(obj) + + state.modify(entry, key, obj, obj_key) + + new_snapshot = self.save_state(state) + self.set_snapshot(SheerkaDataProvider.HeadFile, new_snapshot) + return SheerkaDataProviderResult(original_obj, entry, obj_key, digest) + + def list(self, entry, filter=None): + """ + Lists elements of entry 'entry' + :param entry: name of the entry to list + :param filter: filter to use + :return: list of elements + """ + snapshot = self.get_snapshot(SheerkaDataProvider.HeadFile) + state = self.load_state(snapshot) + if entry not in state.data: + return [] + + elements = state.data[entry] + + if isinstance(elements, dict): + # manage when elements have a key + filter_to_use = (lambda k, o: True) if filter is None else filter + for key, element in elements.items(): + if filter_to_use(key, element): + if isinstance(element, list): + yield [self.load_ref_if_needed(e)[0] for e in element] + else: + yield self.load_ref_if_needed(element)[0] + else: + # manage when no key is defined for the elements + if not isinstance(elements, list) and not isinstance(elements, set): + elements = [elements] + + filter_to_use = (lambda o: True) if filter is None else filter + for element in elements: + if filter_to_use(element): + yield self.load_ref_if_needed(element)[0] + + def remove(self, event_digest, entry, filter=None, silent_remove=True): + """ + Removes elements under the entry 'entry' + :param event_digest: event that triggers the deletion + :param entry: + :param filter: filter to use + :param silent_remove: Do not throw exception if entry does not exist + :return: new sha256 of the state + TODO: Remove by key + """ + snapshot = self.get_snapshot(SheerkaDataProvider.HeadFile) + state = self.load_state(snapshot) + + if entry not in state.data: + if silent_remove: + return snapshot + else: + raise IndexError(entry) + + state.parents = [] if snapshot is None else [snapshot] + state.events = [event_digest] + state.date = datetime.now() + state.remove(entry, filter) + + new_snapshot = self.save_state(state) + self.set_snapshot(SheerkaDataProvider.HeadFile, new_snapshot) + return new_snapshot + + def get(self, entry, key=None, load_origin=True): + """ + Retrieve an element by its key + :param entry: + :param key: + :param load_origin: if True, adds the origin (parent digest) to the object + :return: + """ + snapshot = self.get_snapshot(SheerkaDataProvider.HeadFile) + state = self.load_state(snapshot) + + if entry not in state.data: + raise IndexError(entry) + + if key is not None and key not in state.data[entry]: + raise IndexError(f"{entry}.{key}") + + item = state.data[entry] if key is None else state.data[entry][key] + if isinstance(item, list): + return [self.load_ref_if_needed(i, load_origin)[0] for i in item] + + return self.load_ref_if_needed(item, load_origin)[0] + + def get_safe(self, entry, key=None, load_origin=True): + """ + Retrieve an element by its key. Return None if the element does not exist + :param entry: + :param key: + :param load_origin: if True, adds the origin (parent digest) to the object + :return: + """ + snapshot = self.get_snapshot(SheerkaDataProvider.HeadFile) + state = self.load_state(snapshot) + + if entry not in state.data: + return None + + if key is not None and key not in state.data[entry]: + return None + + item = state.data[entry] if key is None else state.data[entry][key] + if isinstance(item, list): + return [self.load_ref_if_needed(i, load_origin)[0] for i in item] + + return self.load_ref_if_needed(item, load_origin)[0] + + def get_ref(self, entry, key=None): + """ + Returns the reference of an object if the object exists + This function allows to retrieve obj.##origin## without loading the object + :param entry: + :param key: + :return: + """ + + snapshot = self.get_snapshot(SheerkaDataProvider.HeadFile) + state = self.load_state(snapshot) + + if entry not in state.data: + raise IndexError(entry) + + if key is not None and key not in state.data[entry]: + raise IndexError(f"{entry}.{key}") + + item = state.data[entry] if key is None else state.data[entry][key] + if isinstance(item, list): + res = [] + for element in item: + if not self.is_reference(element): + raise SheerkaDataProviderError("Not a reference", f"{entry}.{key}") + res.append(self.get_obj_digest(element)) + return res + + if not self.is_reference(item): + raise SheerkaDataProviderError("Not a reference", f"{entry}.{key}") + + return self.get_obj_digest(item) + + def exists(self, entry, key=None, digest=None): + """ + Returns true if the entry is defined + :param key: + :param entry: + :param digest: digest of the object, when several entries share the same key + :return: + """ + snapshot = self.get_snapshot(SheerkaDataProvider.HeadFile) + state = self.load_state(snapshot) + exist = entry in state.data + + if not exist or key is None: + return exist + + items = state.data[entry] + exist = key in items + if not exist or digest is None: + return exist + + items = items[key] + if not isinstance(items, list): + items = [items] + + for item in items: + item_digest = SheerkaDataProvider.get_obj_digest(item) + if item_digest == digest: + return True + + return False + + def save_event(self, event: Event): + """ + return an event, given its digest + :param event: + :return: digest of the event + """ + parent = self.get_snapshot(SheerkaDataProvider.LastEventFile) + event.parents = [parent] if parent else None + digest = event.get_digest() # must be call after setting the parents + + target_path = self.io.get_obj_path(SheerkaDataProvider.EventFolder, digest) + if self.io.exists(target_path): + return digest + + self.io.write_binary(target_path, self.serializer.serialize(event, None).read()) + self.set_snapshot(SheerkaDataProvider.LastEventFile, digest) + + return digest + + def load_event(self, digest=None): + """ + return an event, given its digest + :param digest: + :return: + """ + digest = digest or self.get_snapshot(SheerkaDataProvider.LastEventFile) + if digest is None: + return None + + target_path = self.io.get_obj_path(SheerkaDataProvider.EventFolder, digest) + + with self.io.open(target_path, "rb") as f: + return self.serializer.deserialize(f, None) + + def load_events(self, page_size, start=0): + """ + Load multiple events in the same command + :param start: + :param page_size: + :return: + """ + + digest = None + if start: + for i in range(start): + event = self.load_event(digest) + if event is None or event.parents is None: + return + digest = event.parents[0] + + count = 0 + while count < page_size or page_size <= 0: + event = self.load_event(digest) + if event is None: + return + + yield event + + if event.parents is None: + return + + digest = event.parents[0] + count += 1 + + def get_result_file_path(self, digest, is_admin): + ext = "_admin_result" if is_admin else "_result" + return self.io.get_obj_path(SheerkaDataProvider.EventFolder, digest) + ext + + def has_result(self, digest, is_admin=False): + """ + Check is a result file was created for a specific event + :param digest: + :param is_admin: True is the result is an internal admin result file + :return: + """ + target_path = self.get_result_file_path(digest, is_admin) + return self.io.exists(target_path) + + def save_result(self, execution_context, is_admin=False): + """ + Save the execution context associated with an event + To make a long story short, + for every single user input, there is an event (which is the first thing that is created) + and a result (the ExecutionContext created by sheerka.evaluate_user_input() + :param execution_context: + :param is_admin: True is the result is an internal admin result file + :return: + """ + start = time.time() + message = execution_context.event.message + digest = execution_context.event.get_digest() + self.log.debug(f"Saving execution context. digest={digest}, message={message}") + target_path = self.get_result_file_path(digest, is_admin) + if self.io.exists(target_path): + return digest + + context = SerializerContext(sheerka=self.sheerka) + length = self.io.write_binary(target_path, self.serializer.serialize(execution_context, context).read()) + elapsed = time.time() - start + self.log.debug(f"Saved execution context. message={message}, length={length}, elapsed={elapsed}") + return digest + + def load_result(self, digest, is_admin=False): + """ + Load and deserialize a result file + :param digest: + :param is_admin: True is the result is an internal admin result file + :return: + """ + target_path = self.get_result_file_path(digest, is_admin) + + with self.io.open(target_path, "rb") as f: + context = SerializerContext(sheerka=self.sheerka) + return self.serializer.deserialize(f, context) + + def save_state(self, state: State): + digest = state.get_digest() + self.log.debug(f"Saving new state. digest={digest}") + target_path = self.io.get_obj_path(SheerkaDataProvider.StateFolder, digest) + if self.io.exists(target_path): + return digest + + context = SerializerContext(sheerka=self.sheerka) + self.io.write_binary(target_path, self.serializer.serialize(state, context).read()) + return digest + + def load_state(self, digest): + if digest is None: + return State() + + target_path = self.io.get_obj_path(SheerkaDataProvider.StateFolder, digest) + with self.io.open(target_path, "rb") as f: + context = SerializerContext(sheerka=self.sheerka) + return self.serializer.deserialize(f, context) + + def save_obj(self, obj): + self.log.debug(f"Saving '{obj}' as reference...") + context = SerializerContext(user_name="kodjo", sheerka=self.sheerka) + stream = self.serializer.serialize(obj, context) + digest = obj.get_digest() if hasattr(obj, "get_digest") else self.get_stream_digest(stream) + + target_path = self.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, digest) + if self.io.exists(target_path): + self.log.debug(f"...already saved. digest is {digest}") + return digest + + self.io.write_binary(target_path, stream.read()) + + self.log.debug(f"...digest={digest}.") + return digest + + def load_obj(self, digest, add_origin=True): + if digest is None: + return None + + target_path = self.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, digest) + if not self.io.exists(target_path): + return None + + with self.io.open(target_path, "rb") as f: + context = SerializerContext(origin=digest, sheerka=self.sheerka) + obj = self.serializer.deserialize(f, context) + + # set the origin of the object + if add_origin: + if isinstance(obj, dict): + obj[Serializer.ORIGIN] = digest + elif not isinstance(obj, str): + setattr(obj, Serializer.ORIGIN, digest) + return obj + + def load_ref_if_needed(self, obj, load_origin=True): + if isinstance(obj, SheerkaDataProviderRef): + resolved = self.load_obj(obj.target, load_origin) + return resolved, False + + if not isinstance(obj, str) or not obj.startswith(SheerkaDataProvider.REF_PREFIX): + return obj, False + + resolved = self.load_obj(obj[len(SheerkaDataProvider.REF_PREFIX):], load_origin) + return (obj, False) if resolved is None else (resolved, True) + + def save_ref_if_needed(self, save_ref, obj): + if not save_ref: + return obj + + digest = self.save_obj(obj) + return self.REF_PREFIX + digest + + def get_cache_params(self, category, key): + digest = hashlib.sha3_256(f"{category}:{key}".encode("utf-8")).hexdigest() + cache_path = self.io.get_obj_path(SheerkaDataProvider.CacheFolder, digest) + return digest, cache_path + + def add_to_cache(self, category, key, obj, update=False): + """ + Save obj in the internal cache system + :param category: + :param key: + :param obj: + :param update: + :return: + """ + digest, cache_path = self.get_cache_params(category, key) + + if self.io.exists(cache_path) and not update: + return digest + + self.io.write_binary(cache_path, zlib.compress(obj.encode("utf-8"), 9)) + return digest + + def load_from_cache(self, category, key): + """ + Reload a compress object from the cache + :param category: + :param key: + :return: + """ + digest, cache_path = self.get_cache_params(category, key) + + if not self.io.exists(cache_path): + raise IndexError(f"{category}.{key}") + + with self.io.open(cache_path, "rb") as f: + return zlib.decompress(f.read()).decode("utf-8") + + def remove_from_cache(self, category, key): + """ + + :param category: + :param key: + :return: + """ + digest, cache_path = self.get_cache_params(category, key) + if self.io.exists(cache_path): + self.io.remove(cache_path) + + return digest + + def in_cache(self, category, key): + """ + Returns true if the key is in cache + :param category: + :param key: + :return: + """ + digest, cache_path = self.get_cache_params(category, key) + return self.io.exists(cache_path) + + def get_snapshot(self, file): + head_file = self.io.path_join(file) + if not self.io.exists(head_file): + return None + return self.io.read_text(head_file) + # with open(head_file, "r") as f: + # return f.read() + + def set_snapshot(self, file, digest): + head_file = self.io.path_join(file) + return self.io.write_text(head_file, digest) + # with open(head_file, "w") as f: + # return f.write(digest) + + def load_keys(self): + keys_file = self.io.path_join(SheerkaDataProvider.KeysFile) + if not self.io.exists(keys_file): + keys = {} + else: + with self.io.open(keys_file, "r") as f: + keys = json.load(f) + return keys + + def save_keys(self, keys): + keys_file = self.io.path_join(SheerkaDataProvider.KeysFile) + with self.io.open(keys_file, "w") as f: + json.dump(keys, f) + + def get_next_key(self, entry): + keys = self.load_keys() + + next_key = keys.get(entry, 0) + 1 + keys[entry] = next_key + + self.save_keys(keys) + return str(next_key) + + def set_key(self, entry, value): + keys = self.load_keys() + keys[entry] = value + self.save_keys(keys) + return str(value) + + def dump_state(self, digest=None): + digest = digest or self.get_snapshot(SheerkaDataProvider.HeadFile) + state = self.load_state(digest) + print(json.dumps(state.data, sort_keys=True, default=json_default_converter, indent=True)) + + def dump_obj(self, digest): + obj = self.load_obj(digest) + print(json.dumps(obj.__dict__, sort_keys=True, default=json_default_converter, indent=True)) diff --git a/src/sheerkapickle/sheerka_handlers.py b/src/sheerkapickle/sheerka_handlers.py index dafc6ef..188c0e2 100644 --- a/src/sheerkapickle/sheerka_handlers.py +++ b/src/sheerkapickle/sheerka_handlers.py @@ -23,25 +23,21 @@ class ConceptHandler(BaseHandler): data[CONCEPT_ID] = (obj.key, obj.id) # transform metadata - for prop in CONCEPT_PROPERTIES_TO_SERIALIZE: - value = getattr(obj.metadata, prop) - ref_value = getattr(ref.metadata, prop) + for name in CONCEPT_PROPERTIES_TO_SERIALIZE: + value = getattr(obj.metadata, name) + ref_value = getattr(ref.metadata, name) if value != ref_value: - data["meta." + prop] = pickler.flatten(value) + value_to_use = [list(t) for t in value] if name == "variables" else value + data["meta." + name] = pickler.flatten(value_to_use) - # transform value - for metadata, value in obj.values.items(): - ref_value = ref.values[metadata] if metadata in ref.values else None - if value != ref_value: - data[metadata.value] = pickler.flatten(value) - - # transform properties - for prop in obj.props: - value = obj.props[prop].value - if prop not in ref.props or value != ref.props[prop].value: - if "props" not in data: - data["props"] = [] - data["props"].append((pickler.flatten(prop), pickler.flatten(value))) + # # transform values + for name in obj.values: + value = obj.get_value(name) + if name not in ref.values or value != ref.get_value(name): + if "values" not in data: + data["values"] = [] + key_to_use = "cParts." + name.value if isinstance(name, ConceptParts) else name + data["values"].append((pickler.flatten(key_to_use), pickler.flatten(value))) return data @@ -61,18 +57,18 @@ class ConceptHandler(BaseHandler): if key.startswith("meta."): # get metadata resolved_prop = key[5:] - if resolved_prop == "props": + if resolved_prop == "variables": for prop_name, prop_value in resolved_value: - instance.def_prop(prop_name, prop_value) + instance.def_var(prop_name, prop_value) else: setattr(instance.metadata, resolved_prop, resolved_value) - elif key == "props": + elif key == "values": # get properties for prop_name, prop_value in resolved_value: - instance.set_prop(prop_name, prop_value) + key_to_use = ConceptParts(prop_name[7:]) if isinstance(prop_name, str) and prop_name.startswith("cParts.") else prop_name + instance.set_value(key_to_use, prop_value) else: - # get value - instance.set_metadata_value(ConceptParts(key), resolved_value) + raise Exception("Sanity check as it's not possible yet") instance.freeze_definition_hash() return instance diff --git a/tests/BaseTest.py b/tests/BaseTest.py index d7306c9..9d0d849 100644 --- a/tests/BaseTest.py +++ b/tests/BaseTest.py @@ -1,8 +1,9 @@ import ast from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts -from core.concept import Concept, DEFINITION_TYPE_BNF +from core.concept import Concept, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF from core.sheerka.ExecutionContext import ExecutionContext +from parsers.BnfNodeParser import StrMatch from parsers.BnfParser import BnfParser from sdp.sheerkaDataProvider import Event @@ -28,8 +29,8 @@ class BaseTest: post="isinstance(res, int)", body="def func(x,y):\n return x+y\nfunc(a,b)", desc="specific description") - concept.def_prop("a", "value1") - concept.def_prop("b", "value2") + concept.def_var("a", "value1") + concept.def_var("b", "value2") return concept @@ -41,15 +42,17 @@ class BaseTest: def init_concepts(self, *concepts, **kwargs): sheerka = self.get_sheerka(**kwargs) - context = self.get_context(sheerka) - create_new = kwargs.get("create_new", False) + + context_args = dict([(k, v) for k, v in kwargs.items() if k in ["sheerka", "eval_body", "eval_where"]]) + context = self.get_context(sheerka, **context_args) + create_new = kwargs.get("create_new", None) result = [] for c in concepts: if isinstance(c, str): c = Concept(c) - if c.metadata.definition: + if c.metadata.definition and c.metadata.definition_type != DEFINITION_TYPE_DEF: bnf_parser = BnfParser() res = bnf_parser.parse(context, c.metadata.definition) if res.status: @@ -57,8 +60,8 @@ class BaseTest: c.metadata.definition_type = DEFINITION_TYPE_BNF else: raise Exception(f"Error in bnf definition '{c.metadata.definition}'", sheerka.get_error(res)) - sheerka.create_new_concept(context, c) - elif create_new: + + if create_new: sheerka.create_new_concept(context, c) else: c.init_key() @@ -79,8 +82,8 @@ class BaseTest: """True ret_val + add concept in cache""" if isinstance(obj, Concept): obj.init_key() - if obj.key not in sheerka.cache_by_key: - sheerka.cache_by_key[obj.key] = obj + if sheerka.has_key(obj.key): + sheerka.add_in_cache(obj) return sheerka.ret(who, True, obj) @staticmethod @@ -93,3 +96,29 @@ class BaseTest: source=source or concept.name, value=concept, try_parsed=concept)) + + @staticmethod + def create_concept_lite(sheerka, name, variables=None, bnf=None): + concept = Concept(name) if isinstance(name, str) else name + if variables: + for v in variables: + concept.def_var(v) + if bnf: + concept.bnf = bnf + concept.metadata.definition_type = DEFINITION_TYPE_BNF + concept.init_key() + sheerka.set_id_if_needed(concept, False) + sheerka.add_in_cache(concept) + return concept + + @staticmethod + def bnf_concept(concept, expression=None): + if isinstance(concept, Concept): + name = concept.name + else: + name = concept + concept = Concept(concept) + + concept.bnf = expression or StrMatch(name) + concept.metadata.definition_type = DEFINITION_TYPE_BNF + return concept diff --git a/tests/TestUsingFileBasedSheerka.py b/tests/TestUsingFileBasedSheerka.py index 6c23638..94069d1 100644 --- a/tests/TestUsingFileBasedSheerka.py +++ b/tests/TestUsingFileBasedSheerka.py @@ -4,11 +4,11 @@ from os import path import pytest from core.sheerka.Sheerka import Sheerka + from tests.BaseTest import BaseTest class TestUsingFileBasedSheerka(BaseTest): - tests_root = path.abspath("../../build/tests") root_folder = "init_folder" @@ -26,11 +26,15 @@ class TestUsingFileBasedSheerka(BaseTest): os.chdir(current_pwd) - def get_sheerka(self, **kwargs): - use_dict = kwargs.get("use_dict", True) - skip_builtins_in_db = kwargs.get("skip_builtins_in_db", True) + def get_sheerka(self, **kwargs): + + # use dictionary based io instead of file + # If you do so, information between two different instances of sheerka + # won't be shared + use_dict = kwargs.get("use_dict", False) + root = "mem://" if use_dict else self.root_folder - sheerka = Sheerka(skip_builtins_in_db=skip_builtins_in_db) - sheerka.initialize(root) + sheerka = Sheerka() + sheerka.initialize(root, save_execution_context=False) return sheerka diff --git a/tests/TestUsingMemoryBasedSheerka.py b/tests/TestUsingMemoryBasedSheerka.py index 81967f3..eaa0785 100644 --- a/tests/TestUsingMemoryBasedSheerka.py +++ b/tests/TestUsingMemoryBasedSheerka.py @@ -1,20 +1,21 @@ from core.sheerka.Sheerka import Sheerka + from tests.BaseTest import BaseTest class TestUsingMemoryBasedSheerka(BaseTest): singleton_instance = None - builtin_concepts = {} + dump = None @staticmethod - def _inner_get_sheerka(skip_builtins_in_db): - sheerka = Sheerka(skip_builtins_in_db=skip_builtins_in_db) - sheerka.initialize("mem://") + def _inner_get_sheerka(cache_only): + sheerka = Sheerka(cache_only=cache_only) + sheerka.initialize("mem://", save_execution_context=False) return sheerka def get_sheerka(self, **kwargs): - skip_builtins_in_db = kwargs.get("skip_builtins_in_db", True) - use_singleton = kwargs.get("singleton", False) + cache_only = kwargs.get("cache_only", True) + use_singleton = kwargs.get("singleton", True) sheerka = kwargs.get("sheerka", None) if sheerka: @@ -23,14 +24,13 @@ class TestUsingMemoryBasedSheerka(BaseTest): if use_singleton: singleton_instance = TestUsingMemoryBasedSheerka.singleton_instance if singleton_instance: - singleton_instance.reset() - singleton_instance.cache_by_key.update(TestUsingMemoryBasedSheerka.builtin_concepts) # quicker ? - # singleton_instance.cache_by_key = TestUsingMemoryBasedSheerka.builtin_concepts + singleton_instance.reset(cache_only) + singleton_instance.cache_manager.init_from(TestUsingMemoryBasedSheerka.dump) return singleton_instance else: - new_instance = self._inner_get_sheerka(skip_builtins_in_db) - TestUsingMemoryBasedSheerka.builtin_concepts.update(new_instance.cache_by_key) + new_instance = self._inner_get_sheerka(cache_only) + TestUsingMemoryBasedSheerka.dump = new_instance.cache_manager.dump() TestUsingMemoryBasedSheerka.singleton_instance = new_instance return TestUsingMemoryBasedSheerka.singleton_instance - return self._inner_get_sheerka(skip_builtins_in_db) + return self._inner_get_sheerka(cache_only) diff --git a/tests/cache/__init__.py b/tests/cache/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/cache/test_cache.py b/tests/cache/test_cache.py new file mode 100644 index 0000000..06b4f33 --- /dev/null +++ b/tests/cache/test_cache.py @@ -0,0 +1,534 @@ +import pytest +from cache.Cache import Cache +from cache.CacheManager import CacheManager +from cache.DictionaryCache import DictionaryCache +from cache.IncCache import IncCache +from cache.ListCache import ListCache +from cache.ListIfNeededCache import ListIfNeededCache +from cache.SetCache import SetCache +from core.concept import Concept + +from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka + + +class TestCache(TestUsingMemoryBasedSheerka): + def test_i_can_get_an_retrieve_value_from_cache(self): + cache = Cache() + cache.put("key", "value") + assert cache.get("key") == "value" + assert len(cache) == 1 + + cache.put("key", "another value") # another value in the cache replace the previous one + assert cache.get("key") == "another value" + assert len(cache) == 1 + + cache.put("key2", "value2") # another key + assert cache.get("key2") == "value2" + assert len(cache) == 2 + assert cache.copy() == {"key": "another value", "key2": "value2"} + + def test_i_can_evict(self): + maxsize = 5 + cache = Cache(max_size=5) + + for key in range(maxsize): + cache.put(key, key) + + assert len(cache) == maxsize + assert cache.has(0) + + for key in range(maxsize, maxsize * 2): + cache.put(key, key) + + assert len(cache) == maxsize + assert not cache.has(key - maxsize) + + def test_i_can_get_default_value_from_simple_cache(self): + cache = Cache() + assert cache.get("key") is None + + cache = Cache(default=10) + assert cache.get("key") == 10 + assert "key" not in cache # default value are not put in cache + + cache = Cache(default=lambda key: key + "_not_found") + assert cache.get("key") == "key_not_found" + assert "key" in cache # default callable are put in cache + + def test_i_can_put_and_retrieve_value_from_list_cache(self): + cache = ListCache() + + cache.put("key", "value") + assert cache.get("key") == ["value"] + assert len(cache) == 1 + + cache.put("key", "value2") # we can append to this list + assert cache.get("key") == ["value", "value2"] + assert len(cache) == 2 + + cache.put("key2", "value") + assert cache.get("key2") == ["value"] + assert len(cache) == 3 + + # duplicates are allowed + cache.put("key", "value") + assert cache.get("key") == ["value", "value2", "value"] + assert len(cache) == 4 + + assert cache.copy() == {'key': ['value', 'value2', 'value'], 'key2': ['value']} + + def test_i_can_put_and_retrieve_value_from_list_if_needed_cache(self): + cache = ListIfNeededCache() + + cache.put("key", "value") + assert cache.get("key") == "value" + + # second time with the same key creates a list + cache.put("key", "value2") + assert cache.get("key") == ["value", "value2"] + assert len(cache) == 2 + + # third time, we now have a list + cache.put("key", "value3") + assert cache.get("key") == ["value", "value2", "value3"] + assert len(cache) == 3 + + # other keys are not affected + cache.put("key2", "value") + assert cache.get("key2") == "value" + assert len(cache) == 4 + + # duplicates are allowed + cache.put("key", "value") + assert cache.get("key") == ["value", "value2", "value3", "value"] + assert len(cache) == 5 + + def test_i_can_put_and_retrieve_values_from_set_cache(self): + cache = SetCache() + + cache.put("key", "value") + assert cache.get("key") == {"value"} + assert len(cache) == 1 + + # we can add to this set + cache.put("key", "value2") + assert cache.get("key") == {"value", "value2"} + assert len(cache) == 2 + + # other keys are not affected + cache.put("key2", "value") + assert cache.get("key2") == {"value"} + assert len(cache) == 3 + + # duplicates are removed + cache.put("key", "value") + assert cache.get("key") == {"value", "value2"} + assert len(cache) == 3 + + assert cache.copy() == {'key': {'value', 'value2'}, 'key2': {'value'}} + + def test_i_can_put_and_retrieve_value_from_dictionary_cache(self): + cache = DictionaryCache() + + # # key must be None + # with pytest.raises(KeyError): + # cache.put("key", None) + # + # # value must be a dictionary + # with pytest.raises(ValueError): + # cache.put(True, "value") + + entry = {"key": "value", "key2": ["value21", "value22"]} + cache.put(False, entry) + assert len(cache) == 3 + assert id(cache._cache) == id(entry) + assert cache.get("key") == "value" + assert cache.get("key2") == ["value21", "value22"] + + # I can append values + cache.put(True, {"key": "another_value", "key3": "value3"}) + assert len(cache) == 4 + assert cache.get("key") == "another_value" + assert cache.get("key2") == ["value21", "value22"] + assert cache.get("key3") == "value3" + + # I can reset + entry = {"key": "value", "key2": ["value21", "value22"]} + cache.put(False, entry) + assert len(cache) == 3 + assert id(cache._cache) == id(entry) + assert cache.get("key") == "value" + assert cache.get("key2") == ["value21", "value22"] + + assert cache.copy() == {'key': 'value', 'key2': ['value21', 'value22']} + + def test_i_can_put_and_retrieve_values_from_inc_cache(self): + cache = IncCache() + + assert cache.get("key") == 1 + assert cache.get("key") == 2 + assert cache.get("key") == 3 + assert cache.get("key2") == 1 + assert cache.get("key2") == 2 + + cache.put("key", 100) + assert cache.get("key") == 101 + + assert cache.copy() == {'key': 101, 'key2': 2} + + @pytest.mark.parametrize("key", [ + None, + "something" + ]) + def test_keys_have_constraints_when_dictionary_cache(self, key): + cache = DictionaryCache() + with pytest.raises(KeyError): + cache.put(key, None) + + @pytest.mark.parametrize("value", [ + None, + "something" + ]) + def test_values_have_constraints_when_dictionary_cache(self, value): + cache = DictionaryCache() + with pytest.raises(ValueError): + cache.put(True, value) + + def test_i_can_append_to_a_dictionary_cache_even_if_it_s_new(self): + cache = DictionaryCache() + + entry = {"key": "value", "key2": ["value21", "value22"]} + cache.put(True, entry) + assert len(cache) == 3 + assert id(cache._cache) != id(entry) + assert cache.get("key") == "value" + assert cache.get("key2") == ["value21", "value22"] + + def test_i_can_update_from_simple_cache(self): + cache = Cache() + cache.put("key", "value") + cache.update("key", "value", "key", "new_value") + + assert len(cache._cache) == 1 + assert len(cache) == 1 + assert cache.get("key") == "new_value" + + cache.update("key", "new_value", "another_key", "another_value") + assert len(cache._cache) == 1 + assert len(cache) == 1 + assert cache.get("key") is None + assert cache.get("another_key") == "another_value" + + with pytest.raises(KeyError): + cache.update("wrong key", "value", "key", "value") + + def test_i_can_update_from_list_cache(self): + cache = ListCache() + + cache.put("key", "value") + cache.put("key", "value2") + cache.put("key", "value") + cache.update("key", "value", "key", "another value") + + assert len(cache._cache) == 1 + assert len(cache) == 3 + assert cache.get("key") == ["another value", "value2", "value"] # only the first one is affected + + cache.update("key", "value2", "key2", "value2") + assert len(cache._cache) == 2 + assert len(cache) == 3 + assert cache.get("key") == ["another value", "value"] + assert cache.get("key2") == ["value2"] + + cache.update("key2", "value2", "key3", "value2") + assert len(cache._cache) == 2 + assert len(cache) == 3 + assert cache.get("key") == ["another value", "value"] + assert cache.get("key3") == ["value2"] + assert cache.get("key2") is None + + with pytest.raises(KeyError): + cache.update("wrong key", "value", "key", "value") + + def test_i_can_update_from_list_if_needed_cache(self): + cache = ListIfNeededCache() + + cache.put("key", "value") + cache.put("key", "value2") + cache.put("key", "value") + cache.update("key", "value", "key", "another value") + + assert len(cache._cache) == 1 + assert len(cache) == 3 + assert cache.get("key") == ["another value", "value2", "value"] # only the first one is affected + + cache.update("key", "value2", "key2", "value2") + assert len(cache._cache) == 2 + assert len(cache) == 3 + assert cache.get("key") == ["another value", "value"] + assert cache.get("key2") == "value2" + + cache.update("key2", "value2", "key3", "value2") + assert len(cache._cache) == 2 + assert len(cache) == 3 + assert cache.get("key") == ["another value", "value"] + assert cache.get("key3") == "value2" + assert cache.get("key2") is None + + with pytest.raises(KeyError): + cache.update("wrong key", "value", "key", "value") + + def test_i_can_update_from_set_cache(self): + cache = SetCache() + + cache.put("key", "value") + cache.put("key", "value2") + cache.update("key", "value", "key", "another value") + + assert len(cache._cache) == 1 + assert len(cache) == 2 + assert cache.get("key") == {"another value", "value2"} + + cache.update("key", "value2", "key2", "value2") + assert len(cache._cache) == 2 + assert len(cache) == 2 + assert cache.get("key") == {"another value"} + assert cache.get("key2") == {"value2"} + + cache.update("key", "another value", "key3", "another value") + assert len(cache._cache) == 2 + assert len(cache) == 2 + assert cache.get("key") is None + assert cache.get("key2") == {"value2"} + assert cache.get("key3") == {"another value"} + + with pytest.raises(KeyError): + cache.update("wrong key", "value", "key", "value") + + @pytest.mark.parametrize("cache", [ + Cache(), ListCache(), ListIfNeededCache(), SetCache(), IncCache() + ]) + def test_i_can_manage_cache_events(self, cache): + cache.put("key", "value") + + assert cache.to_add == {"key"} + assert cache.to_remove == set() + + cache.update("key", "value", "key", "another value") + assert cache.to_add == {"key"} + assert cache.to_remove == set() + + cache.update("key", "another value", "key2", "value2") + assert cache.to_add == {"key2"} + assert cache.to_remove == {"key"} + + cache.update("key2", "value2", "key", "value") + assert cache.to_add == {"key"} + assert cache.to_remove == {"key2"} + + @pytest.mark.parametrize("cache", [ + ListCache(), SetCache(), ListIfNeededCache() + ]) + def test_i_can_manage_list_and_set_cache_events(self, cache): + cache.put("key", "value") + cache.put("key", "value2") + + assert cache.to_add == {"key"} + assert cache.to_remove == set() + + cache.update("key", "value", "key", "another value") + assert cache.to_add == {"key"} + assert cache.to_remove == set() + + cache.update("key", "value2", "key2", "value2") + assert cache.to_add == {"key", "key2"} + assert cache.to_remove == set() + + cache.update("key", "another value", "key3", "another value") + assert cache.to_add == {"key2", "key3"} + assert cache.to_remove == {"key"} + + @pytest.mark.parametrize("cache", [ + Cache(), ListCache(), SetCache(), ListIfNeededCache(), IncCache() + ]) + def test_exists(self, cache): + assert not cache.exists("key") + cache.put("key", "value") + assert cache.exists("key") + + def test_exists_in_dictionary_cache(self): + cache = DictionaryCache() + assert not cache.exists("key") + + cache.put(True, {"key": "value"}) + assert cache.exists("key") + + def test_exists_extend(self): + cache = Cache(extend_exists=lambda k: True if k == "special_key" else False) + assert not cache.exists("key") + assert cache.exists("special_key") + + def test_add_concept_fills_all_dependent_caches(self): + sheerka, context, one, two, two_2, three = self.init_concepts("one", "two", Concept("two"), "three") + cache_manager = CacheManager(None) + + cache_manager.register_concept_cache("by_id", Cache(), lambda obj: obj.id, True) + cache_manager.register_concept_cache("by_name", ListCache(), lambda obj: obj.name, True) + cache_manager.register_concept_cache("by_name2", ListIfNeededCache(), lambda obj: obj.name, True) + + cache_manager.add_concept(one) + cache_manager.add_concept(two) + cache_manager.add_concept(two_2) + cache_manager.add_concept(three) + + assert len(cache_manager.caches) == 3 + assert cache_manager.caches["by_id"].cache._cache == { + "1001": one, + "1002": two, + "1003": two_2, + "1004": three, + } + assert cache_manager.caches["by_name"].cache._cache == { + "one": [one], + "two": [two, two_2], + "three": [three] + } + assert cache_manager.caches["by_name2"].cache._cache == { + "one": one, + "two": [two, two_2], + "three": three + } + + assert cache_manager.get("by_id", "1002") == two + assert cache_manager.get("by_name", "two") == [two, two_2] + assert cache_manager.get("by_name2", "two") == [two, two_2] + + def test_default_for_dictionary_cache(self): + cache = DictionaryCache(default={"key": "value", "key2": "value2"}) + + assert cache.get("key") == "value" + assert "key2" in cache + assert len(cache) == 2 + + cache.clear() + assert cache.get("key3") is None + assert len(cache) == 2 + assert "key" in cache + assert "key2" in cache + + # default is not modified + cache._cache["key"] = "another value" # operation that is normally not possible + cache.clear() + assert cache.get("key") == "value" + + def test_default_callable_for_dictionary_cache(self): + cache = DictionaryCache(default=lambda k: {"key": "value", "key2": "value2"}) + + assert cache.get("key") == "value" + assert "key2" in cache + assert len(cache) == 2 + + cache.clear() + assert cache.get("key3") is None + assert len(cache) == 2 + assert "key" in cache + assert "key2" in cache + + def test_dictionary_cache_cannot_be_null(self): + cache = DictionaryCache(default=lambda k: None) + assert cache.get("key") is None + assert cache._cache == {} + + cache = DictionaryCache(default=None) + assert cache.get("key") is None + assert cache._cache == {} + + @pytest.mark.parametrize("cache, default, new_value, expected", [ + (ListCache(), lambda k: None, "value", ["value"]), + (ListCache(), lambda k: ["value"], "value", ["value", "value"]), + (ListIfNeededCache(), lambda k: None, "value", "value"), + (ListIfNeededCache(), lambda k: "value", "value1", ["value", "value1"]), + (ListIfNeededCache(), lambda k: ["value1", "value2"], "value1", ["value1", "value2", "value1"]), + (SetCache(), lambda k: None, "value", {"value"}), + (SetCache(), lambda k: {"value"}, "value", {"value"}), + (SetCache(), lambda k: {"value1"}, "value2", {"value1", "value2"}), + ]) + def test_default_is_called_before_put_to_keep_in_sync(self, cache, default, new_value, expected): + cache.configure(default=default) + cache.put("key", new_value) + + assert cache.get("key") == expected + + def test_default_is_called_before_updating_simple_cache(self): + cache = Cache(default=lambda k: None) + with pytest.raises(KeyError): + cache.update("old_key", "old_value", "new_key", "new_value") + + cache = Cache(default=lambda k: "old_value") + cache.update("old_key", "old_value", "new_key", "new_value") + assert cache.get("new_key") == "new_value" + + def test_default_is_called_before_updating_list_cache(self): + cache = ListCache(default=lambda k: None) + with pytest.raises(KeyError): + cache.update("old_key", "old_value", "new_key", "new_value") + + cache = ListCache(default=lambda k: ["old_value", "other old value"]) + cache.update("old_key", "old_value", "old_key", "new_value") + assert cache.get("old_key") == ["new_value", "other old value"] + + cache = ListCache(default=lambda k: ["old_value", "other old value"] if k == "old_key" else None) + cache.update("old_key", "old_value", "new_key", "new_value") + assert cache.get("old_key") == ["other old value"] + assert cache.get("new_key") == ["new_value"] + + cache = ListCache(default=lambda k: ["old_value", "other old value"] if k == "old_key" else ["other new"]) + cache.update("old_key", "old_value", "new_key", "new_value") + assert cache.get("old_key") == ["other old value"] + assert cache.get("new_key") == ["other new", "new_value"] + + def test_default_is_called_before_updating_list_if_needed_cache(self): + cache = ListIfNeededCache(default=lambda k: None) + with pytest.raises(KeyError): + cache.update("old_key", "old_value", "new_key", "new_value") + + cache = ListIfNeededCache(default=lambda k: "old_value") + cache.update("old_key", "old_value", "old_key", "new_value") + assert cache.get("old_key") == "new_value" + + cache = ListIfNeededCache(default=lambda k: ["old_value", "other old value"]) + cache.update("old_key", "old_value", "old_key", "new_value") + assert cache.get("old_key") == ["new_value", "other old value"] + + cache = ListIfNeededCache(default=lambda k: ["old_value", "other old value"] if k == "old_key" else None) + cache.update("old_key", "old_value", "new_key", "new_value") + assert cache.get("old_key") == ["other old value"] + assert cache.get("new_key") == "new_value" + + def test_default_is_called_before_updating_set_cache(self): + cache = SetCache(default=lambda k: None) + with pytest.raises(KeyError): + cache.update("old_key", "old_value", "new_key", "new_value") + + cache = SetCache(default=lambda k: {"old_value", "other old value"}) + cache.update("old_key", "old_value", "old_key", "new_value") + assert cache.get("old_key") == {"new_value", "other old value"} + + cache = SetCache(default=lambda k: {"old_value", "other old value"} if k == "old_key" else None) + cache.update("old_key", "old_value", "new_key", "new_value") + assert cache.get("old_key") == {"other old value"} + assert cache.get("new_key") == {"new_value"} + + cache = SetCache(default=lambda k: {"old_value", "other old value"} if k == "old_key" else {"other new"}) + cache.update("old_key", "old_value", "new_key", "new_value") + assert cache.get("old_key") == {"other old value"} + assert cache.get("new_key") == {"other new", "new_value"} + + def test_i_can_delete_an_entry_from_cache(self): + cache = Cache() + cache.put("key", "value") + + assert cache.get("key") == "value" + cache.delete("key") + assert cache.get("value") is None + assert cache.to_remove == {"key"} diff --git a/tests/cache/test_cache_manager.py b/tests/cache/test_cache_manager.py new file mode 100644 index 0000000..fc55cc8 --- /dev/null +++ b/tests/cache/test_cache_manager.py @@ -0,0 +1,111 @@ +from cache.Cache import Cache +from cache.CacheManager import CacheManager +from cache.DictionaryCache import DictionaryCache +from cache.ListCache import ListCache + +from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka + + +class TestCacheManager(TestUsingMemoryBasedSheerka): + def test_i_do_not_push_into_sdp_when_cache_only(self): + sheerka = self.get_sheerka() + context = self.get_context(sheerka) + + cache_manager = CacheManager(True) + cache_manager.register_cache("test", Cache(), persist=True) + cache_manager.put("test", "key", "value") + + cache_manager.commit(context) + assert not sheerka.sdp.exists("test", "key") + + def test_i_do_not_get_value_from_sdp_when_cache_only_is_true(self): + sheerka = self.get_sheerka() + context = self.get_context(sheerka) + with sheerka.sdp.get_transaction(context.event) as transaction: + transaction.add("test", "key", "value") + + cache = Cache(default=lambda k: sheerka.sdp.get("test", k)) + + cache_manager = CacheManager(True) + cache_manager.register_cache("test", cache, persist=True) + + assert cache_manager.get("test", "key") is None + + def test_i_can_get_value_from_sdp_when_cache_only_is_false(self): + sheerka = self.get_sheerka() + context = self.get_context(sheerka) + with sheerka.sdp.get_transaction(context.event) as transaction: + transaction.add("test", "key", "value") + + cache = Cache(default=lambda k: sheerka.sdp.get("test", k)) + + cache_manager = CacheManager(False) + cache_manager.register_cache("test", cache, persist=True) + + assert cache_manager.get("test", "key") == "value" + + def test_i_can_commit_simple_cache(self): + sheerka = self.get_sheerka() + context = self.get_context(sheerka) + + cache_manager = CacheManager(False) + cache_manager.register_cache("test", Cache(), persist=True) + cache = cache_manager.caches["test"].cache + + cache_manager.put("test", "key", "value") + + cache_manager.commit(context) + assert sheerka.sdp.get("test", "key") == "value" + + cache.update("key", "value", "key", "another_value") + cache_manager.commit(context) + assert sheerka.sdp.get("test", "key") == "another_value" + + cache.update("key", "another_value", "key2", "another_value") + cache_manager.commit(context) + assert sheerka.sdp.get("test", "key") is None + assert sheerka.sdp.get("test", "key2") == "another_value" + + def test_i_can_commit_list_cache(self): + sheerka = self.get_sheerka() + context = self.get_context(sheerka) + + cache_manager = CacheManager(False) + cache_manager.register_cache("test", ListCache(), persist=True) + cache = cache_manager.caches["test"].cache + + cache.put("key", "value") + cache_manager.commit(context) + assert sheerka.sdp.get("test", "key") == ["value"] + + cache.put("key", "value2") + cache_manager.commit(context) + assert sheerka.sdp.get("test", "key") == ["value", "value2"] + + cache.update("key", "value2", "key2", "value2") + cache_manager.commit(context) + assert sheerka.sdp.get("test", "key") == ["value"] + assert sheerka.sdp.get("test", "key2") == ["value2"] + + cache.update("key2", "value2", "key3", "value2") + cache_manager.commit(context) + assert sheerka.sdp.get("test", "key") == ["value"] + assert sheerka.sdp.get("test", "key2") is None + assert sheerka.sdp.get("test", "key3") == ["value2"] + + def test_i_can_commit_dictionary_cache(self): + sheerka = self.get_sheerka() + context = self.get_context(sheerka) + + cache_manager = CacheManager(False) + cache_manager.register_cache("test", DictionaryCache(), persist=True) + cache = cache_manager.caches["test"].cache + + cache.put(False, {"key": "value", "key2": "value2"}) + cache_manager.commit(context) + assert sheerka.sdp.get("test") == {"key": "value", "key2": "value2"} + assert sheerka.sdp.get("test", "key") == "value" + + cache.put(False, {"key": "value", "key2": "value2", "key3": "value3"}) + cache_manager.commit(context) + assert sheerka.sdp.get("test") == {"key": "value", "key2": "value2", "key3": "value3"} diff --git a/tests/core/test_SheerkaCreateNewConcept.py b/tests/core/test_SheerkaCreateNewConcept.py index bc37167..76ac9ae 100644 --- a/tests/core/test_SheerkaCreateNewConcept.py +++ b/tests/core/test_SheerkaCreateNewConcept.py @@ -1,18 +1,20 @@ from core.builtin_concepts import BuiltinConcepts from core.concept import PROPERTIES_TO_SERIALIZE, Concept, DEFINITION_TYPE_DEF from core.sheerka.Sheerka import Sheerka -from sdp.sheerkaDataProvider import SheerkaDataProvider from tests.TestUsingFileBasedSheerka import TestUsingFileBasedSheerka from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka class TestSheerkaCreateNewConcept(TestUsingMemoryBasedSheerka): - def test_i_can_add_a_concept(self): - sheerka = self.get_sheerka() + + def test_i_can_create_a_concept(self): + sheerka = self.get_sheerka(cache_only=False) + context = self.get_context(sheerka) concept = self.get_default_concept() - res = sheerka.create_new_concept(self.get_context(sheerka), concept) + res = sheerka.create_new_concept(context, concept) + sheerka.cache_manager.commit(context) assert res.status assert sheerka.isinstance(res.value, BuiltinConcepts.NEW_CONCEPT) @@ -24,22 +26,36 @@ class TestSheerkaCreateNewConcept(TestUsingMemoryBasedSheerka): assert concept_found.key == "__var__0 + __var__1" assert concept_found.id == "1001" - assert concept.key in sheerka.cache_by_key - assert concept.id in sheerka.cache_by_id - assert concept.name in sheerka.cache_by_name - assert sheerka.sdp.io.exists( - sheerka.sdp.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, concept_found.get_origin())) - assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_HASH_ENTRY, concept.get_definition_hash()) + # saved in cache + assert sheerka.has_id(concept.id) + assert sheerka.has_key(concept.key) + assert sheerka.has_name(concept.name) + assert sheerka.has_hash(concept.get_definition_hash()) + + # I can get the concept using various index + assert sheerka.get_by_id(concept.id) == concept + assert sheerka.get_by_key(concept.key) == concept + assert sheerka.get_by_name(concept.name) == concept + assert sheerka.get_by_hash(concept.get_definition_hash()) == concept + + # I can get by the first entry + assert sheerka.cache_manager.get(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "+") == [concept.id] + assert sheerka.cache_manager.get(sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "+") == [concept.id] + + # saved in sdp assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_ID_ENTRY, concept.id) + assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_KEY_ENTRY, concept.key) assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_NAME_ENTRY, concept.name) - assert sheerka.sdp.exists(Sheerka.CONCEPTS_ENTRY, concept.key) + assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_HASH_ENTRY, concept.get_definition_hash()) assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "+") def test_i_can_add_a_concept_when_name_differs_from_the_key(self): - sheerka = self.get_sheerka() - concept = Concept("greetings", definition="hello a", definition_type=DEFINITION_TYPE_DEF).def_prop("a") + sheerka = self.get_sheerka(cache_only=False) + context = self.get_context(sheerka) + concept = Concept("greetings", definition="hello a", definition_type=DEFINITION_TYPE_DEF).def_var("a") res = sheerka.create_new_concept(self.get_context(sheerka), concept) + sheerka.cache_manager.commit(context) assert res.status assert sheerka.isinstance(res.value, BuiltinConcepts.NEW_CONCEPT) @@ -51,15 +67,24 @@ class TestSheerkaCreateNewConcept(TestUsingMemoryBasedSheerka): assert concept_found.key == "hello __var__0" assert concept_found.id == "1001" - assert concept.key in sheerka.cache_by_key - assert concept.id in sheerka.cache_by_id - assert concept.name in sheerka.cache_by_name - assert sheerka.sdp.io.exists( - sheerka.sdp.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, concept_found.get_origin())) - assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_HASH_ENTRY, concept.get_definition_hash()) + # saved in cache + assert sheerka.has_id(concept.id) + assert sheerka.has_key(concept.key) + assert sheerka.has_name(concept.name) + assert sheerka.has_hash(concept.get_definition_hash()) + + # I can get the concept using various index + assert sheerka.get_by_id(concept.id) == concept + assert sheerka.get_by_key(concept.key) == concept + assert sheerka.get_by_name(concept.name) == concept + assert sheerka.get_by_hash(concept.get_definition_hash()) == concept + + # saved in sdp assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_ID_ENTRY, concept.id) + assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_KEY_ENTRY, concept.key) assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_NAME_ENTRY, concept.name) - assert sheerka.sdp.exists(Sheerka.CONCEPTS_ENTRY, concept.key) + assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_HASH_ENTRY, concept.get_definition_hash()) + assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "hello") def test_i_cannot_add_the_same_concept_twice(self): """ @@ -82,7 +107,7 @@ class TestSheerkaCreateNewConcept(TestUsingMemoryBasedSheerka): sheerka.create_new_concept(self.get_context(sheerka), concept) - from_cache = sheerka.get(concept.key) + from_cache = sheerka.get_by_key(concept.key) assert from_cache is not None assert from_cache == concept @@ -90,57 +115,7 @@ class TestSheerkaCreateNewConcept(TestUsingMemoryBasedSheerka): assert from_cache is not None assert from_cache == concept - def test_i_first_look_in_local_cache(self): - sheerka = self.get_sheerka() - concept = self.get_default_concept() - - sheerka.create_new_concept(self.get_context(sheerka), concept) - sheerka.cache_by_key[concept.key].pre = "I have modified the concept in cache" - - from_cache = sheerka.get(concept.key) - assert from_cache is not None - assert from_cache.key == concept.key - assert from_cache.pre == "I have modified the concept in cache" - - def test_i_can_get_a_known_concept_when_not_in_cache(self): - """ - When not in cache, uses sdp - :return: - """ - sheerka = self.get_sheerka() - concept = self.get_default_concept() - sheerka.create_new_concept(self.get_context(sheerka), concept) - - sheerka.reset_cache() - loaded = sheerka.get(concept.key) - - assert loaded == concept - - # I can also get it by its id - loaded = sheerka.sdp.get(sheerka.CONCEPTS_BY_ID_ENTRY, concept.id) - assert loaded == concept - - def test_i_can_instantiate_a_concept_from_sdp(self): - sheerka = self.get_sheerka() - concept = Concept("foo") - sheerka.create_new_concept(self.get_context(sheerka), concept) - - sheerka.reset_cache() - loaded = sheerka.new("foo") - - assert loaded == concept - - def test_i_can_get_a_concept_by_its_id(self): - sheerka = self.get_sheerka() - concept = self.get_default_concept() - sheerka.create_new_concept(self.get_context(sheerka), concept) - - sheerka.cache_by_key = {} # reset the cache - loaded = sheerka.get_by_id(concept.id) - - assert loaded == concept - - def test_i_can_get_list_of_concept_when_same_key_when_no_cache(self): + def test_i_can_get_list_of_concept_when_same_key_using_cache(self): sheerka = self.get_sheerka() concept1 = self.get_default_concept() concept2 = self.get_default_concept() @@ -149,79 +124,13 @@ class TestSheerkaCreateNewConcept(TestUsingMemoryBasedSheerka): res1 = sheerka.create_new_concept(self.get_context(sheerka), concept1) res2 = sheerka.create_new_concept(self.get_context(sheerka), concept2) - assert res1.status - assert res2.status assert res1.value.body.key == res2.value.body.key # same key - sheerka.cache_by_key = {} # reset the cache - - result = sheerka.get(concept1.key) + result = sheerka.get_by_key(concept1.key) assert len(result) == 2 assert result[0] == concept1 assert result[1] == concept2 - def test_i_can_get_list_of_concept_when_same_key_when_cache(self): - sheerka = self.get_sheerka() - concept1 = self.get_default_concept() - concept2 = self.get_default_concept() - concept2.metadata.body = "a+b" - - res1 = sheerka.create_new_concept(self.get_context(sheerka), concept1) - res2 = sheerka.create_new_concept(self.get_context(sheerka), concept2) - - assert res1.value.body.key == res2.value.body.key # same key - - # sheerka.cache_by_key = {} # Do not reset the cache - - result = sheerka.get(concept1.key) - assert len(result) == 2 - assert result[0] == concept1 - assert result[1] == concept2 - - def test_i_can_get_the_correct_concept_using_the_id_when_same_key_when_no_cache(self): - sheerka = self.get_sheerka() - concept1 = self.get_default_concept() - concept2 = self.get_default_concept() - concept2.metadata.body = "a+b" - - res1 = sheerka.create_new_concept(self.get_context(sheerka), concept1) - res2 = sheerka.create_new_concept(self.get_context(sheerka), concept2) - - assert res1.value.body.key == res2.value.body.key # same key - - result = sheerka.get(concept1.key, res2.body.body.id) - assert result.name == "a + b" - assert result.metadata.body == "a+b" - - def test_i_can_get_the_correct_concept_using_the_id__when_same_key_when_cache(self): - sheerka = self.get_sheerka() - concept1 = self.get_default_concept() - concept2 = self.get_default_concept() - concept2.metadata.body = "a+b" - - res1 = sheerka.create_new_concept(self.get_context(sheerka), concept1) - res2 = sheerka.create_new_concept(self.get_context(sheerka), concept2) - - assert res1.value.body.key == res2.value.body.key # same key - - result = sheerka.get(concept1.key, res2.body.body.id) - assert result.name == "a + b" - assert result.metadata.body == "a+b" - - def test_i_cannot_get_the_correct_concept_id_the_id_is_wrong(self): - sheerka = self.get_sheerka() - concept1 = self.get_default_concept() - concept2 = self.get_default_concept() - concept2.metadata.body = "a+b" - - res1 = sheerka.create_new_concept(self.get_context(sheerka), concept1) - res2 = sheerka.create_new_concept(self.get_context(sheerka), concept2) - - assert res1.value.body.key == res2.value.body.key # same key - - result = sheerka.get(concept1.key, "wrong id") - assert sheerka.isinstance(result, BuiltinConcepts.UNKNOWN_CONCEPT) - def test_concept_that_references_itself_is_correctly_created(self): sheerka = self.get_sheerka() concept = Concept("foo", body="foo") @@ -233,27 +142,63 @@ class TestSheerkaCreateNewConcept(TestUsingMemoryBasedSheerka): class TestSheerkaCreateNewConceptFileBased(TestUsingFileBasedSheerka): def test_i_can_add_several_concepts(self): - sheerka, context, hello, greeting = self.init_concepts( - Concept("Hello world a").def_prop("a"), - Concept("Greeting a").def_prop("a"), - use_dict=False - ) - - res = sheerka.create_new_concept(self.get_context(sheerka), hello) + sheerka = self.get_sheerka() + context = self.get_context(sheerka) + hello = Concept("Hello world a").def_var("a") + res = sheerka.create_new_concept(context, hello) + sheerka.cache_manager.commit(context) assert res.status - sheerka = self.get_sheerka(use_dict=False) - res = sheerka.create_new_concept(self.get_context(sheerka), greeting) + sheerka = self.get_sheerka() # another instance + context = self.get_context(sheerka) + greeting = Concept("Greeting a").def_var("a") + res = sheerka.create_new_concept(context, greeting) + sheerka.cache_manager.commit(context) assert res.status - assert sheerka.sdp.exists(Sheerka.CONCEPTS_ENTRY, hello.key) - assert sheerka.sdp.exists(Sheerka.CONCEPTS_ENTRY, greeting.key) + sheerka = self.get_sheerka() # another instance again + assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_KEY_ENTRY, hello.key) + assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_KEY_ENTRY, greeting.key) assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_ID_ENTRY, hello.id) assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_ID_ENTRY, greeting.id) assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_NAME_ENTRY, "Hello world a") assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_NAME_ENTRY, "Greeting a") + assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_HASH_ENTRY, hello.get_definition_hash()) + assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_HASH_ENTRY, greeting.get_definition_hash()) + assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "Hello") assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "Greeting") + def test_i_cannot_add_the_same_concept_twice_using_sdp(self): + """ + Checks that duplicated concepts are managed by sheerka, not by sheerka.sdp + :return: + """ + sheerka = self.get_sheerka(cache_only=False) + context = self.get_context(sheerka) + concept = self.get_default_concept() + sheerka.create_new_concept(context, concept) + sheerka.cache_manager.commit(context) + sheerka.cache_manager.clear() + res = sheerka.create_new_concept(context, concept) + + assert not res.status + assert sheerka.isinstance(res.value, BuiltinConcepts.CONCEPT_ALREADY_DEFINED) + assert res.value.body == concept + + def test_new_entry_does_not_override_the_previous_ones(self): + sheerka = self.get_sheerka() + context = self.get_context(sheerka) + sheerka.create_new_concept(context, Concept("foo", body="1")) + sheerka.create_new_concept(context, Concept("foo", body="2")) + sheerka.cache_manager.commit(context) + + assert len(sheerka.sdp.get(Sheerka.CONCEPTS_BY_KEY_ENTRY, "foo")) == 2 + + sheerka = self.get_sheerka() # new instance + sheerka.create_new_concept(context, Concept("foo", body="3")) + sheerka.cache_manager.commit(context) + + assert len(sheerka.sdp.get(Sheerka.CONCEPTS_BY_KEY_ENTRY, "foo")) == 3 diff --git a/tests/core/test_SheerkaEvaluateConcept.py b/tests/core/test_SheerkaEvaluateConcept.py index 927d88d..c55e417 100644 --- a/tests/core/test_SheerkaEvaluateConcept.py +++ b/tests/core/test_SheerkaEvaluateConcept.py @@ -1,6 +1,6 @@ import pytest from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserResultConcept -from core.concept import Concept, simplec, DoNotResolve, ConceptParts, Property, InfiniteRecursionResolved +from core.concept import Concept, DoNotResolve, ConceptParts, Property, InfiniteRecursionResolved, CB from parsers.PythonParser import PythonNode from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka @@ -9,8 +9,8 @@ from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): @pytest.mark.parametrize("body, expected", [ - # (None, None), - # ("", ""), + (None, None), + ("", ""), ("1", 1), ("1+1", 2), ("'one'", "one"), @@ -19,10 +19,9 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): ("1 > 2", False), ]) def test_i_can_evaluate_a_concept_with_simple_body(self, body, expected): - sheerka = self.get_sheerka() + sheerka, context, concept = self.init_concepts(Concept("foo", body=body), eval_body=True) - concept = Concept("foo", body=body).init_key() - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), concept) + evaluated = sheerka.evaluate_concept(context, concept) assert evaluated.key == concept.key assert evaluated.body == expected @@ -30,7 +29,7 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): assert evaluated.metadata.pre is None assert evaluated.metadata.post is None assert evaluated.metadata.where is None - assert evaluated.props == {} + assert evaluated.variables() == {} assert evaluated.metadata.is_evaluated assert len(evaluated.values) == 0 if body is None else 1 @@ -51,18 +50,17 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): :return: """ - sheerka = self.get_sheerka() + sheerka, context, concept = self.init_concepts(Concept("foo", pre=expr)) - concept = Concept("foo", pre=expr).init_key() - evaluated = sheerka.evaluate_concept(self.get_context(sheerka), concept) + evaluated = sheerka.evaluate_concept(context, concept) assert evaluated.key == concept.key assert evaluated.metadata.body is None assert evaluated.metadata.pre == expr assert evaluated.metadata.post is None assert evaluated.metadata.where is None - assert evaluated.get_metadata_value(ConceptParts.PRE) == expected - assert evaluated.props == {} + assert evaluated.get_value(ConceptParts.PRE) == expected + assert evaluated.variables() == {} assert not evaluated.metadata.is_evaluated assert len(evaluated.values) == 0 if expr is None else 1 @@ -77,124 +75,121 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): ("1 > 2", False), ]) def test_i_can_evaluate_a_concept_with_prop(self, expr, expected): - sheerka = self.get_sheerka() + sheerka, context, concept = self.init_concepts(Concept("foo").def_var("a", expr), eval_body=True) - concept = Concept("foo").def_prop("a", expr) - evaluated = sheerka.evaluate_concept(self.get_context(sheerka,True), concept) + evaluated = sheerka.evaluate_concept(context, concept) assert evaluated.key == concept.key assert evaluated.metadata.pre is None assert evaluated.metadata.pre is None assert evaluated.metadata.post is None assert evaluated.metadata.where is None - assert evaluated.props == {"a": Property("a", expected)} + assert evaluated.variables() == {"a": Property("a", expected)} assert evaluated.metadata.is_evaluated def test_i_can_evaluate_metadata_using_do_not_resolve(self): - sheerka = self.get_sheerka() - concept = Concept("foo") + sheerka, context, concept = self.init_concepts(Concept("foo"), eval_body=True) concept.compiled[ConceptParts.BODY] = DoNotResolve("do not resolve") - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), concept) + evaluated = sheerka.evaluate_concept(context, concept) assert evaluated.body == "do not resolve" assert evaluated.metadata.is_evaluated def test_i_can_evaluate_property_using_do_not_resolve(self): - sheerka = self.get_sheerka() - concept = Concept("foo").def_prop("a") + sheerka, context, concept = self.init_concepts(Concept("foo").def_var("a"), eval_body=True) concept.compiled["a"] = DoNotResolve("do not resolve") - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), concept) + evaluated = sheerka.evaluate_concept(context, concept) - assert evaluated.get_prop("a") == "do not resolve" + assert evaluated.get_value("a") == "do not resolve" assert evaluated.metadata.is_evaluated def test_original_value_is_overridden_when_using_do_no_resolve(self): - sheerka = self.get_sheerka() - concept = Concept("foo", body="original value").set_prop("a", "original value") + concept = Concept("foo", body="original value").def_var("a", "original value") + sheerka, context, concept = self.init_concepts(concept, eval_body=True) concept.compiled["a"] = DoNotResolve("do not resolve") concept.compiled[ConceptParts.BODY] = DoNotResolve("do not resolve") - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), concept) + evaluated = sheerka.evaluate_concept(context, concept) assert evaluated.body == "do not resolve" - assert evaluated.get_prop("a") == "do not resolve" + assert evaluated.get_value("a") == "do not resolve" assert evaluated.metadata.is_evaluated - def test_props_are_evaluated_before_body(self): - sheerka = self.get_sheerka() + def test_variables_are_evaluated_before_body(self): + sheerka, context, concept = self.init_concepts(Concept("foo", body="a+1").def_var("a", "10"), eval_body=True) - concept = Concept("foo", body="a+1").def_prop("a", "10").init_key() - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), concept) + evaluated = sheerka.evaluate_concept(context, concept) - assert evaluated.key == concept.key - assert evaluated.body == 11 + assert evaluated == CB(concept, 11) def test_i_can_evaluate_when_another_concept_is_referenced(self): - sheerka = self.get_sheerka() - concept_a = Concept("a") - sheerka.add_in_cache(concept_a) + sheerka, context, concept_a, concept = self.init_concepts( + Concept("a"), + Concept("foo", body="a"), + eval_body=True) - concept = Concept("foo", body="a").init_key() - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), concept) + evaluated = sheerka.evaluate_concept(context, concept) - assert evaluated == simplec("foo", simplec("a", None)) - assert id(evaluated.body) != id(concept_a) + assert evaluated == CB("foo", CB("a", None)) assert evaluated.metadata.is_evaluated assert evaluated.body.metadata.is_evaluated def test_i_can_evaluate_when_the_referenced_concept_has_a_body(self): - sheerka = self.get_sheerka() - concept_a = Concept("a", body="1") - sheerka.add_in_cache(concept_a) + sheerka, context, concept_a, concept = self.init_concepts( + Concept("a", body="1"), + Concept("foo", body="a"), + eval_body=True) - concept = Concept("foo", body="a") - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), concept) + evaluated = sheerka.evaluate_concept(context, concept) assert evaluated.key == concept.key - assert evaluated.body == simplec("a", 1) + assert evaluated.body == CB("a", 1) assert not concept_a.metadata.is_evaluated assert evaluated.metadata.is_evaluated def test_i_can_evaluate_concept_of_concept_when_the_leaf_has_a_body(self): - sheerka = self.get_sheerka() - sheerka.add_in_cache(Concept(name="a", body="'a'")) - sheerka.add_in_cache(Concept(name="b", body="a")) - sheerka.add_in_cache(Concept(name="c", body="b")) - concept_d = sheerka.add_in_cache(Concept(name="d", body="c")) + sheerka, context, concept_a, concept_b, concept_c, concept_d = self.init_concepts( + Concept("a", body="'a'"), + Concept("b", body="a"), + Concept("c", body="b"), + Concept("d", body="c"), + eval_body=True) - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), concept_d) + evaluated = sheerka.evaluate_concept(context, concept_d) assert evaluated.key == concept_d.key - expected = simplec("c", simplec("b", simplec("a", "a"))) + expected = CB("c", CB("b", CB("a", "a"))) assert evaluated.body == expected - assert sheerka.value(evaluated) == 'a' + assert sheerka.objvalue(evaluated) == 'a' assert evaluated.metadata.is_evaluated def test_i_can_evaluate_concept_of_concept_does_not_have_a_body(self): - sheerka = self.get_sheerka() - sheerka.add_in_cache(Concept(name="a")) - sheerka.add_in_cache(Concept(name="b", body="a")) - sheerka.add_in_cache(Concept(name="c", body="b")) - concept_d = sheerka.add_in_cache(Concept(name="d", body="c")) + sheerka, context, concept_a, concept_b, concept_c, concept_d = self.init_concepts( + Concept("a"), + Concept("b", body="a"), + Concept("c", body="b"), + Concept("d", body="c"), + eval_body=True) - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), concept_d) + evaluated = sheerka.evaluate_concept(context, concept_d) assert evaluated.key == concept_d.key - expected = simplec("c", simplec("b", simplec("a", None))) + expected = CB("c", CB("b", CB("a", None))) assert evaluated.body == expected - assert sheerka.value(evaluated) == Concept(name="a").init_key() + assert sheerka.objvalue(evaluated) == CB("a", None) assert evaluated.metadata.is_evaluated def test_i_can_evaluate_concept_when_properties_reference_others_concepts(self): - sheerka = self.get_sheerka() - concept_a = sheerka.add_in_cache(Concept(name="a").init_key()) + sheerka, context, concept_a, concept = self.init_concepts( + Concept("a"), + Concept("foo", body="a").def_var("a", "a"), + eval_body=True) - concept = Concept("foo", body="a").def_prop("a", "a").init_key() - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), concept) + evaluated = sheerka.evaluate_concept(context, concept) - # first prop a is evaluated to concept_a + # first, prop a is evaluated to concept_a # then body is evaluated to prop a -> concept_a assert evaluated.key == concept.key assert evaluated.body == concept_a @@ -205,49 +200,49 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): but the name of the property and the name of the concept are different :return: """ - sheerka = self.get_sheerka() - concept_a = sheerka.add_in_cache(Concept(name="a")) + sheerka, context, concept_a = self.init_concepts(Concept(name="a"), eval_body=True) - concept = Concept("foo", body="concept_a").def_prop("concept_a", "a") - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), concept) + concept = Concept("foo", body="concept_a").def_var("concept_a", "a") + evaluated = sheerka.evaluate_concept(context, concept) assert evaluated.key == concept.key assert evaluated.body == concept_a def test_i_can_evaluate_concept_when_properties_reference_others_concepts_with_body(self): - sheerka = self.get_sheerka() - sheerka.add_in_cache(Concept(name="a", body="1")) - sheerka.add_in_cache(Concept(name="b", body="2")) + sheerka, context, *concepts = self.init_concepts( + Concept(name="a", body="1"), + Concept(name="b", body="2"), + eval_body=True + ) - concept = Concept("foo", body="propA + propB").def_prop("propA", "a").def_prop("propB", "b") - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), concept) + concept = Concept("foo", body="propA + propB").def_var("propA", "a").def_var("propB", "b") + evaluated = sheerka.evaluate_concept(context, concept) assert evaluated.key == concept.key assert evaluated.body == 3 def test_i_can_evaluate_concept_when_properties_is_a_concept(self): - sheerka = self.get_sheerka() - concept_a = sheerka.add_in_cache(Concept(name="a", body="'a'").init_key()) + sheerka, context, concept_a = self.init_concepts(Concept(name="a", body="'a'"), eval_body=True) - concept = Concept("foo").def_prop("a") + concept = Concept("foo").def_var("a") concept.compiled["a"] = concept_a - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), concept) + evaluated = sheerka.evaluate_concept(context, concept) assert evaluated.key == concept.key - assert evaluated.get_prop("a") == simplec("a", "a") + assert evaluated.get_value("a") == CB("a", "a") def test_i_can_evaluate_when_property_asts_is_a_list(self): sheerka = self.get_sheerka() foo = Concept("foo", body="1") - concept = Concept("to_eval").def_prop("prop") + concept = Concept("to_eval").def_var("prop") concept.compiled["prop"] = [foo, DoNotResolve("1")] evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), concept) - props = evaluated.get_prop("prop") - assert len(props) == 2 - assert props[0] == simplec("foo", 1) - assert props[1] == "1" + variables = evaluated.get_value("prop") + assert len(variables) == 2 + assert variables[0] == CB("foo", 1) + assert variables[1] == "1" def test_i_can_evaluate_when_compiled_is_set_up_with_return_value(self): sheerka = self.get_sheerka() @@ -255,16 +250,16 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): python_node = PythonNode("1 +1 ") parser_result = ParserResultConcept(parser="who", value=python_node) - concept = Concept("to_eval").def_prop("prop") + concept = Concept("to_eval").def_var("prop") concept.compiled["prop"] = [ReturnValueConcept("who", True, parser_result)] evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), concept) - assert evaluated.get_prop("prop") == 2 + assert evaluated.get_value("prop") == 2 # also works when only one return value - concept = Concept("to_eval").def_prop("prop") + concept = Concept("to_eval").def_var("prop") concept.compiled["prop"] = ReturnValueConcept("who", True, parser_result) evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), concept) - assert evaluated.get_prop("prop") == 2 + assert evaluated.get_value("prop") == 2 def test_i_can_reference_sheerka(self): sheerka = self.get_sheerka() @@ -276,49 +271,55 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): assert evaluated.body == sheerka.test() def test_properties_values_takes_precedence_over_the_outside_world(self): - sheerka = self.get_sheerka() - sheerka.add_in_cache(Concept(name="a", body="'concept_a'")) - sheerka.add_in_cache(Concept(name="b", body="'concept_b'")) + sheerka, context, concept_a, concept_b = self.init_concepts( + Concept(name="a", body="'concept_a'"), + Concept(name="b", body="'concept_b'"), + eval_body=True + ) concept = Concept("foo", body="a") - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), concept) + evaluated = sheerka.evaluate_concept(context, concept) assert evaluated.key == concept.key - assert evaluated.body == simplec("a", "concept_a") # this test was already done + assert evaluated.body == CB("a", "concept_a") # this test was already done # so check this one. - concept = Concept("foo", body="a").def_prop("a", "'property_a'") - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), concept) + concept = Concept("foo", body="a").def_var("a", "'property_a'") + evaluated = sheerka.evaluate_concept(context, concept) assert evaluated.key == concept.key assert evaluated.body == 'property_a' # or this one. - concept = Concept("foo", body="a").def_prop("a", "b") - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), concept) + concept = Concept("foo", body="a").def_var("a", "b") + evaluated = sheerka.evaluate_concept(context, concept) assert evaluated.key == concept.key - assert evaluated.body == simplec(name="b", body="concept_b") + assert evaluated.body == CB("b", "concept_b") def test_properties_values_takes_precedence(self): - sheerka = self.get_sheerka() - sheerka.add_in_cache(Concept(name="a", body="'concept_a'")) - sheerka.add_in_cache(Concept(name="b", body="'concept_b'")) + sheerka, context, concept_a, concept_b = self.init_concepts( + Concept(name="a", body="'concept_a'"), + Concept(name="b", body="'concept_b'"), + eval_body=True + ) - concept = Concept("foo", body="a + b").def_prop("a", "'prop_a'").init_key() - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), concept) + concept = Concept("foo", body="a + b").def_var("a", "'prop_a'").init_key() + evaluated = sheerka.evaluate_concept(context, concept) assert evaluated.key == concept.key assert evaluated.body == 'prop_aconcept_b' def test_i_can_reference_sub_property_of_a_property(self): - sheerka = self.get_sheerka() - sheerka.add_in_cache(Concept(name="concept_a").def_prop("subProp", "'sub_a'")) + sheerka, context, concept_a = self.init_concepts( + Concept(name="concept_a").def_var("subProp", "'sub_a'"), + eval_body=True + ) - concept = Concept("foo", body="a.props['subProp'].value").def_prop("a", "concept_a") - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), concept) - assert evaluated == simplec(concept.key, "sub_a") + concept = Concept("foo", body="a.subProp").def_var("a", "concept_a") + evaluated = sheerka.evaluate_concept(context, concept) + assert evaluated == CB(concept.key, "sub_a") def test_i_cannot_evaluate_concept_if_property_is_in_error(self): sheerka = self.get_sheerka() - concept = Concept(name="concept_a").def_prop("subProp", "undef_concept") + concept = Concept(name="concept_a").def_var("subProp", "undef_concept") evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), concept) assert sheerka.isinstance(evaluated, BuiltinConcepts.CONCEPT_EVAL_ERROR) @@ -340,11 +341,10 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): ("a + self > 20", True), ]) def test_i_can_evaluate_simple_where(self, where_clause, expected): - sheerka = self.get_sheerka() - - concept = Concept("foo", body="10", where=where_clause).def_prop("a", "20") - sheerka.add_in_cache(concept) - + sheerka, context, concept = self.init_concepts( + Concept("foo", body="10", where=where_clause).def_var("a", "20"), + ) + evaluated = sheerka.evaluate_concept(self.get_context(sheerka, False, True), concept) if expected: @@ -354,12 +354,10 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): assert evaluated.body == concept def test_i_can_evaluate_where_when_using_other_concept(self): - sheerka = self.get_sheerka() - - foo_true = Concept("foo_true", body="True").init_key() - foo_false = Concept("foo_false", body="False").init_key() - sheerka.add_in_cache(foo_false) - sheerka.add_in_cache(foo_true) + sheerka, context, foo_true, foo_false = self.init_concepts( + Concept("foo_true", body="True"), + Concept("foo_false", body="False"), + ) concept = Concept("foo", where="foo_true").init_key() evaluated = sheerka.evaluate_concept(self.get_context(sheerka, False, True), concept) @@ -375,12 +373,9 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): assert evaluated.key == concept.key def test_i_can_evaluate_disable_where_clause_evaluation(self): - sheerka = self.get_sheerka() - - concept = Concept("foo", body="10", where="a > 10").def_prop("a", None) - sheerka.add_in_cache(concept) - - context = self.get_context(sheerka) + sheerka, context, concept = self.init_concepts( + Concept("foo", body="10", where="a > 10").def_var("a", None), + ) evaluated = sheerka.evaluate_concept(context, concept) assert evaluated.key == concept.key @@ -389,82 +384,84 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): assert sheerka.isinstance(evaluated, BuiltinConcepts.CONCEPT_EVAL_ERROR) def test_i_can_detect_infinite_recursion_with_numeric_constant(self): - sheerka = self.get_sheerka() - - one_str = Concept("one", body="1") - one_digit = Concept("1", body="one") + sheerka, context, one_str, one_digit = self.init_concepts( + Concept("one", body="1"), + Concept("1", body="one"), + eval_body=True + ) sheerka.add_in_cache(one_str) sheerka.add_in_cache(one_digit) - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), one_digit) + evaluated = sheerka.evaluate_concept(context, one_digit) assert evaluated.key == one_digit.key assert evaluated.body == InfiniteRecursionResolved(1) - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), one_str) + evaluated = sheerka.evaluate_concept(context, one_str) assert evaluated.key == one_str.key assert evaluated.body == InfiniteRecursionResolved(1) def test_i_can_detect_infinite_recursion_with_boolean_constant(self): - sheerka = self.get_sheerka() + sheerka, context, true_str, true_bool = self.init_concepts( + Concept("true", body="True"), + Concept("True", body="true"), + eval_body=True + ) - true_str = Concept("true", body="True") - true_bool = Concept("True", body="true") - - sheerka.add_in_cache(true_str) - sheerka.add_in_cache(true_bool) - - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), true_str) + evaluated = sheerka.evaluate_concept(context, true_str) assert evaluated.key == true_str.key assert evaluated.body == InfiniteRecursionResolved(True) - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), true_bool) + evaluated = sheerka.evaluate_concept(context, true_bool) assert evaluated.key == true_bool.key assert evaluated.body == InfiniteRecursionResolved(True) def test_i_can_detect_infinite_recursion_with_constant_with_more_concepts(self): - sheerka = self.get_sheerka() - - c1 = sheerka.add_in_cache(Concept("one", body="1")) - c2 = sheerka.add_in_cache(Concept("1", body="2")) - c3 = sheerka.add_in_cache(Concept("2", body="3")) - c4 = sheerka.add_in_cache(Concept("3", body="one")) + sheerka, context, c1, c2, c3, c4 = self.init_concepts( + Concept("one", body="1"), + Concept("1", body="2"), + Concept("2", body="3"), + Concept("3", body="one"), + eval_body=True + ) for concept in (c1, c2, c3, c4): - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), concept) + evaluated = sheerka.evaluate_concept(context, concept) assert evaluated.key == concept.key assert evaluated.body == InfiniteRecursionResolved(3) def test_i_can_detect_infinite_recursion_when_no_constant(self): - sheerka = self.get_sheerka() + sheerka, context, foo, bar, baz, qux = self.init_concepts( + Concept("foo", body="bar"), + Concept("bar", body="baz"), + Concept("baz", body="qux"), + Concept("qux", body="foo"), + eval_body=True + ) - foo = sheerka.add_in_cache(Concept("foo", body="bar")) - bar = sheerka.add_in_cache(Concept("bar", body="baz")) - baz = sheerka.add_in_cache(Concept("baz", body="qux")) - qux = sheerka.add_in_cache(Concept("qux", body="foo")) - - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), foo) + evaluated = sheerka.evaluate_concept(context, foo) assert sheerka.isinstance(evaluated, BuiltinConcepts.CHICKEN_AND_EGG) assert evaluated.body == {foo, bar, baz, qux} - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), bar) + evaluated = sheerka.evaluate_concept(context, bar) assert sheerka.isinstance(evaluated, BuiltinConcepts.CHICKEN_AND_EGG) assert evaluated.body == {foo, bar, baz, qux} - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), baz) + evaluated = sheerka.evaluate_concept(context, baz) assert sheerka.isinstance(evaluated, BuiltinConcepts.CHICKEN_AND_EGG) assert evaluated.body == {foo, bar, baz, qux} - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), qux) + evaluated = sheerka.evaluate_concept(context, qux) assert sheerka.isinstance(evaluated, BuiltinConcepts.CHICKEN_AND_EGG) assert evaluated.body == {foo, bar, baz, qux} def test_i_can_detect_auto_recursion(self): - sheerka = self.get_sheerka() + sheerka, context, foo = self.init_concepts( + Concept("foo", body="foo"), + eval_body=True + ) - foo = sheerka.add_in_cache(Concept("foo", body="foo")) - - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), foo) + evaluated = sheerka.evaluate_concept(context, foo) assert sheerka.isinstance(evaluated, BuiltinConcepts.CHICKEN_AND_EGG) assert evaluated.body == {foo} @@ -475,6 +472,3 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), one) assert evaluated.key == one.key assert evaluated.body == 1 - - - diff --git a/tests/core/test_SheerkaHistoryManager.py b/tests/core/test_SheerkaHistoryManager.py index 92db4f6..b1dbee6 100644 --- a/tests/core/test_SheerkaHistoryManager.py +++ b/tests/core/test_SheerkaHistoryManager.py @@ -4,7 +4,8 @@ from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka class TestSheerkaHistoryManager(TestUsingMemoryBasedSheerka): def test_i_can_retrieve_history(self): - sheerka = self.get_sheerka(skip_builtins_in_db=False, singleton=False) + sheerka = self.get_sheerka(singleton=False) + sheerka.save_execution_context = True sheerka.evaluate_user_input("def concept one as 1") sheerka.evaluate_user_input("one") diff --git a/tests/core/test_SheerkaModifyConcept.py b/tests/core/test_SheerkaModifyConcept.py index c19af6c..d95a673 100644 --- a/tests/core/test_SheerkaModifyConcept.py +++ b/tests/core/test_SheerkaModifyConcept.py @@ -1,96 +1,79 @@ -import pytest from core.builtin_concepts import BuiltinConcepts from core.concept import Concept, ConceptParts -from core.sheerka.Sheerka import Sheerka +from tests.TestUsingFileBasedSheerka import TestUsingFileBasedSheerka from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka class TestSheerkaModifyConcept(TestUsingMemoryBasedSheerka): + def test_i_can_modify_a_concept(self): - sheerka, context, foo, bar = self.init_concepts("foo", "bar", create_new=True) + sheerka, context, foo, bar = self.init_concepts("foo", "bar", create_new=True, cache_only=False) foo_instance = sheerka.new("foo") - foo_instance.metadata.body = "value" # modify metadata - foo_instance.set_prop(BuiltinConcepts.ISA, bar) # modify property - foo_instance.set_metadata_value(ConceptParts.BODY, "body value") # modify value + foo_instance.metadata.body = "metadata value" # modify metadata + foo_instance.def_var("var_name", "default value") # modify definition of variables + foo_instance.add_prop(BuiltinConcepts.ISA, bar) # modify property + foo_instance.set_value(ConceptParts.BODY, "body value") # modify value + foo_instance.set_value("var_name", "var value") # modify value res = sheerka.modify_concept(context, foo_instance) assert res.status assert sheerka.isinstance(res.body, BuiltinConcepts.NEW_CONCEPT) - assert res.body.body.metadata.body == "value" - assert res.body.body.get_prop(BuiltinConcepts.ISA) == bar + assert res.body.body.metadata.body == "metadata value" + assert res.body.body.metadata.variables == [("var_name", "default value")] + assert res.body.body.get_prop(BuiltinConcepts.ISA) == {bar} assert res.body.body.body == "body value" + assert res.body.body.get_value("var_name") == "var value" # test that object - sheerka.reset_cache() - foo_from_sheerka = sheerka.new("foo") - assert foo_from_sheerka.metadata.body == "value" - assert foo_from_sheerka.get_prop(BuiltinConcepts.ISA) == bar + foo_from_sheerka = sheerka.get_by_key("foo") + assert foo_from_sheerka.metadata.body == "metadata value" + assert foo_from_sheerka.metadata.variables == [("var_name", "default value")] + assert foo_from_sheerka.get_prop(BuiltinConcepts.ISA) == {bar} assert foo_from_sheerka.body == "body value" + assert foo_from_sheerka.get_value("var_name") == "var value" - # test that ref by id is updated - sheerka.reset_cache() - foo_from_sheerka = sheerka.get_by_id(foo.id) - assert foo_from_sheerka.metadata.body == "value" - assert foo_from_sheerka.get_prop(BuiltinConcepts.ISA) == bar - assert foo_from_sheerka.body == "body value" + # other caches are also updated + assert sheerka.get_by_id(foo.id).metadata.body == "metadata value" + assert sheerka.get_by_name(foo.name).metadata.body == "metadata value" + assert sheerka.get_by_hash(foo_instance.get_definition_hash()).metadata.body == "metadata value" - # test that ref by name is updated - sheerka.reset_cache() - foo_from_sheerka = sheerka.get_by_name(foo.name) - assert foo_from_sheerka.metadata.body == "value" - assert foo_from_sheerka.get_prop(BuiltinConcepts.ISA) == bar - assert foo_from_sheerka.body == "body value" - - # test that ref by hash is updated - foo_from_sdp = sheerka.sdp.get(Sheerka.CONCEPTS_BY_HASH_ENTRY, foo_instance.get_definition_hash()) - assert foo_from_sdp.metadata.body == "value" - assert foo_from_sdp.get_prop(BuiltinConcepts.ISA) == bar - assert foo_from_sdp.body == "body value" - - # previous ref by hash is removed (since that definition hash has changed) - with pytest.raises(IndexError): - sheerka.sdp.get(Sheerka.CONCEPTS_BY_HASH_ENTRY, foo_instance.get_original_definition_hash()) - - def test_i_can_modify_concept_modifying_only_properties_and_body(self): - sheerka, context, foo, bar = self.init_concepts("foo", "bar", create_new=True) - - foo_instance = sheerka.new("foo") - foo_instance.set_prop(BuiltinConcepts.ISA, bar) - foo_instance.set_metadata_value(ConceptParts.BODY, "body value") - res = sheerka.modify_concept(context, foo_instance) - - assert res.status - - foo_from_sheerka = sheerka.new("foo") - assert foo_from_sheerka.get_prop(BuiltinConcepts.ISA) == bar - assert foo_from_sheerka.body == "body value" - - def test_cache_is_updated_when_a_concept_is_modified(self): - sheerka, context, foo = self.init_concepts("foo", create_new=True) - - foo_instance = sheerka.new("foo") - foo_instance.metadata.body = "value" - res = sheerka.modify_concept(context, foo_instance) - assert res.status - - foo_from_sheerka = sheerka.get("foo") - assert foo_from_sheerka.metadata.body == "value" - - foo_by_id_from_sheerka = sheerka.get_by_id(foo.id) - assert foo_by_id_from_sheerka.metadata.body == "value" + # sdp can be updated + sheerka.cache_manager.commit(context) + from_sdp = sheerka.sdp.get(sheerka.CONCEPTS_BY_ID_ENTRY, foo.id) + assert from_sdp.metadata.body == "metadata value" + assert from_sdp.metadata.variables == [("var_name", "default value")] + assert from_sdp.get_prop(BuiltinConcepts.ISA) == {bar} + assert from_sdp.body == "body value" + assert from_sdp.get_value("var_name") == "var value" def test_i_cannot_modify_a_concept_that_does_not_exists(self): - sheerka, context, foo = self.init_concepts("foo", create_new=False) + sheerka, context = self.init_concepts() - foo_instance = sheerka.new("foo") - foo_instance.metadata.body = "value" - res = sheerka.modify_concept(context, foo_instance) + foo = Concept("foo").init_key() + sheerka.set_id_if_needed(foo, False) + res = sheerka.modify_concept(context, foo) assert not res.status assert sheerka.isinstance(res.body, BuiltinConcepts.UNKNOWN_CONCEPT) - assert res.body.body.key == foo.key + assert res.body.body == ("id", foo.id) + + def test_i_cannot_modify_a_concept_that_returns_an_error(self): + sheerka, context = self.init_concepts() + + foo = Concept("foo").init_key() + res = sheerka.modify_concept(context, foo) + + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR) + + def test_i_cannot_modify_if_the_concept_has_not_changed(self): + sheerka, context, foo = self.init_concepts("foo", create_new=True) + res = sheerka.modify_concept(context, foo) + + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.CONCEPT_ALREADY_DEFINED) def test_i_can_modify_a_concept_that_is_in_a_list(self): sheerka, context, foo1, foo2 = self.init_concepts( @@ -105,7 +88,19 @@ class TestSheerkaModifyConcept(TestUsingMemoryBasedSheerka): assert sheerka.isinstance(res.body, BuiltinConcepts.NEW_CONCEPT) assert res.body.body.metadata.body == "value" - sheerka.reset_cache() foo_from_sheerka = sheerka.new("foo") assert foo_from_sheerka[0].metadata.body == "1" assert foo_from_sheerka[1].metadata.body == "value" + + +class TestSheerkaModifyConceptUsingFile(TestUsingFileBasedSheerka): + + def test_i_can_modify_a_concept_from_a_new_sheerka(self): + sheerka, context, foo = self.init_concepts("foo", create_new=True) + sheerka.cache_manager.commit(context) + + sheerka = self.get_sheerka() + foo.add_prop("a", "b") + res = sheerka.modify_concept(context, foo) + + assert res.status diff --git a/tests/core/test_SheerkaSetsManager.py b/tests/core/test_SheerkaSetsManager.py index a660987..7db7f48 100644 --- a/tests/core/test_SheerkaSetsManager.py +++ b/tests/core/test_SheerkaSetsManager.py @@ -1,79 +1,65 @@ -from core.builtin_concepts import ConceptAlreadyInSet, BuiltinConcepts +from core.builtin_concepts import BuiltinConcepts from core.concept import Concept from tests.TestUsingFileBasedSheerka import TestUsingFileBasedSheerka +from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka -class TestSheerkaSetsManager(TestUsingFileBasedSheerka): +class TestSheerkaSetsManager(TestUsingMemoryBasedSheerka): - # def init(self, use_dict, *concepts): - # sheerka = self.get_sheerka(use_dict, True) - # for c in concepts: - # sheerka.set_id_if_needed(c, False) - # sheerka.add_in_cache(c) - # - # context = self.get_context(sheerka) - # return sheerka, context + def test_i_can_add_a_concept_to_a_set(self): + sheerka, context, foo, group = self.init_concepts( + Concept("foo"), + Concept("group"), + cache_only=False + ) + assert sheerka.add_concept_to_set(context, foo, group).status - def test_i_can_add_concept_to_set(self): - sheerka, context, foo, all_foos = self.init_concepts(Concept("foo"), Concept("all_foo"), use_dict=False) + group_elements = sheerka.cache_manager.get(sheerka.CONCEPTS_GROUPS_ENTRY, group.id) + assert group_elements == {foo.id} - res = sheerka.add_concept_to_set(context, foo, all_foos) - - assert res.status - assert sheerka.isinstance(res.body, BuiltinConcepts.SUCCESS) - - all_entries = self.get_sheerka(use_dict=False).sdp.get("All_" + all_foos.id, None, False) - assert len(all_entries) == 1 - assert foo.id in all_entries - - def test_i_can_add_several_concepts_to_set(self): - sheerka, context, foo1, foo2, all_foos = self.init_concepts( - Concept("foo1"), - Concept("foo2"), - Concept("all_foo"), use_dict=False) - - res = sheerka.sets_handler.add_concepts_to_set(context, (foo1, foo2), all_foos) - - assert res.status - assert sheerka.isinstance(res.body, BuiltinConcepts.SUCCESS) - - all_entries = self.get_sheerka(use_dict=False).sdp.get("All_" + all_foos.id, None, False) - assert len(all_entries) == 2 - assert foo1.id in all_entries - assert foo2.id in all_entries - - # I can add another elements - foo3 = Concept("foo3") - foo4 = Concept("foo4") - for c in [foo3, foo4]: - sheerka.set_id_if_needed(c, False) - sheerka.add_in_cache(c) - - res = sheerka.sets_handler.add_concepts_to_set(context, (foo3, foo4), all_foos) - - assert res.status - assert sheerka.isinstance(res.body, BuiltinConcepts.SUCCESS) - - all_entries = self.get_sheerka(use_dict=False).sdp.get("All_" + all_foos.id, None, False) - assert len(all_entries) == 4 - assert foo1.id in all_entries - assert foo2.id in all_entries - assert foo3.id in all_entries - assert foo4.id in all_entries + # it can be persisted + sheerka.cache_manager.commit(context) + assert sheerka.sdp.get(sheerka.CONCEPTS_GROUPS_ENTRY, group.id) == {foo.id} def test_i_cannot_add_the_same_concept_twice_in_a_set(self): - sheerka, context, foo, all_foos = self.init_concepts(Concept("foo"), Concept("all_foos")) + sheerka, context, foo, group = self.init_concepts(Concept("foo"), Concept("group")) + sheerka.add_concept_to_set(context, foo, group) - sheerka.add_concept_to_set(context, foo, all_foos) - res = sheerka.add_concept_to_set(context, foo, all_foos) + # add again + res = sheerka.add_concept_to_set(context, foo, group) assert not res.status - assert res.body == ConceptAlreadyInSet(foo, all_foos) + assert sheerka.isinstance(res.body, BuiltinConcepts.CONCEPT_ALREADY_IN_SET) + assert res.body.body == foo + assert res.body.concept_set == group - all_entries = sheerka.sdp.get("All_" + all_foos.id, None, False) - assert len(all_entries) == 1 - assert foo.id in all_entries + all_entries = sheerka.cache_manager.get(sheerka.CONCEPTS_GROUPS_ENTRY, group.id) + assert all_entries == {foo.id} + + def test_i_can_have_multiple_groups(self): + sheerka, context, foo, bar, baz, group1, group2 = self.init_concepts( + Concept("foo"), + Concept("bar"), + Concept("baz"), + Concept("group1"), + Concept("group2"), + cache_only=False + ) + + assert sheerka.add_concept_to_set(context, foo, group1).status + assert sheerka.add_concept_to_set(context, bar, group1).status + assert sheerka.add_concept_to_set(context, bar, group2).status + assert sheerka.add_concept_to_set(context, baz, group2).status + + assert sheerka.cache_manager.get(sheerka.CONCEPTS_GROUPS_ENTRY, group1.id) == {foo.id, bar.id} + assert sheerka.cache_manager.get(sheerka.CONCEPTS_GROUPS_ENTRY, group2.id) == {baz.id, bar.id} + + # I can save in db + sheerka.cache_manager.commit(context) + assert sheerka.sdp.get(sheerka.CONCEPTS_GROUPS_ENTRY) == { + '1004': {'1001', '1002'}, '1005': {'1002', '1003'} + } def test_i_get_elements_from_a_set(self): sheerka, context, one, two, three, number = self.init_concepts( @@ -83,7 +69,6 @@ class TestSheerkaSetsManager(TestUsingFileBasedSheerka): sheerka.add_concept_to_set(context, c, number) elements = sheerka.get_set_elements(context, number) - assert set(elements) == {one, two, three} def test_i_cannot_get_elements_if_not_a_set(self): @@ -100,8 +85,9 @@ class TestSheerkaSetsManager(TestUsingFileBasedSheerka): assert not sheerka.isaset(context, group) assert not sheerka.isinset(foo, group) - context = self.get_context(sheerka) + context = self.get_context(sheerka) # another context ? sheerka.add_concept_to_set(context, foo, group) + assert sheerka.isaset(context, group) assert sheerka.isinset(foo, group) @@ -112,7 +98,7 @@ class TestSheerkaSetsManager(TestUsingFileBasedSheerka): sheerka.sets_handler.add_concepts_to_set(context, [foo, bar], group1) assert sheerka.isaset(context, group2) - assert sheerka.get_set_elements(context, group2) == [foo, bar] + assert set(sheerka.get_set_elements(context, group2)) == {foo, bar} def test_i_can_define_subset_of_another_group(self): sheerka, context, one, two, three, four, five, number, sub_number = self.init_concepts( @@ -122,15 +108,30 @@ class TestSheerkaSetsManager(TestUsingFileBasedSheerka): Concept("four", body="4"), Concept("five", body="5"), Concept("number"), - Concept("sub_number", body="number", where="number < 4"), - create_new=True + Concept("sub_number", body="number", where="number < 4") ) sheerka.sets_handler.add_concepts_to_set(context, [one, two, three, four, five], number) assert sheerka.isaset(context, sub_number) - assert sheerka.get_set_elements(context, sub_number) == [one, two, three] + assert set(sheerka.get_set_elements(context, sub_number)) == {one, two, three} - def test_i_can_define_subset_of_another_set_when_some_concept_dont_have_a_defined_body(self): + def test_i_can_define_subset_of_subset(self): + sheerka, context, one, two, three, four, five, number, sub_number, sub_sub_number = self.init_concepts( + Concept("one", body="1"), + Concept("two", body="2"), + Concept("three", body="3"), + Concept("four", body="4"), + Concept("five", body="5"), + Concept("number"), + Concept("sub_number", body="number", where="number < 4"), + Concept("sub_sub_number", body="sub_number", where="sub_number > 2") + ) + sheerka.sets_handler.add_concepts_to_set(context, [one, two, three, four, five], number) + + assert sheerka.isaset(context, sub_sub_number) + assert set(sheerka.get_set_elements(context, sub_sub_number)) == {three} + + def test_i_can_define_subset_of_another_set_when_some_concept_do_not_have_a_defined_body(self): """ Example def concept unit from number where number <10 @@ -151,7 +152,7 @@ class TestSheerkaSetsManager(TestUsingFileBasedSheerka): sheerka.sets_handler.add_concepts_to_set(context, [one, two, three, four, five], number) assert sheerka.isaset(context, sub_number) - assert sheerka.get_set_elements(context, sub_number) == [one, three] + assert set(sheerka.get_set_elements(context, sub_number)) == {one, three} def test_i_can_define_subset_of_another_set_when_some_concept_are_bnf(self): """ @@ -190,8 +191,9 @@ class TestSheerkaSetsManager(TestUsingFileBasedSheerka): Concept("one", body="1"), Concept("two", body="2"), Concept("twenty", body="20"), - Concept("twenties", definition="twenty (one|two)=unit", body="twenty + unit").def_prop("unit"), + Concept("twenties", definition="twenty (one|two)=unit", body="twenty + unit").def_var("unit"), Concept("number"), + create_new=True ) sheerka.sets_handler.add_concepts_to_set(context, [one, two, twenty, twenties], number) @@ -200,39 +202,27 @@ class TestSheerkaSetsManager(TestUsingFileBasedSheerka): twenty_one = sheerka.evaluate_user_input("twenty one", "")[0].body assert sheerka.isinset(twenty_one, number) - def test_i_can_set_isa(self): - sheerka, context, foo, all_foos = self.init_concepts( - "foo", "all_foo", - create_new=True, - use_dict=False) - - assert BuiltinConcepts.ISA not in foo.props - - sheerka.set_isa(context, foo, all_foos) - - assert foo.get_prop(BuiltinConcepts.ISA) == [all_foos] - assert sheerka.isa(foo, all_foos) - assert sheerka.isinset(foo, all_foos) - assert sheerka.isaset(context, all_foos) - def test_a_concept_can_be_in_multiple_sets(self): - sheerka, context, foo, all_foos, all_bars = self.init_concepts( + sheerka, context, foo, all_foo, all_bar = self.init_concepts( Concept("foo"), Concept("all_foo"), Concept("all_bar"), - use_dict=False) - sheerka.create_new_concept(context, foo) + create_new=True + ) - sheerka.set_isa(context, foo, all_foos) - sheerka.set_isa(context, foo, all_bars) + foo = sheerka.new(foo.key) # new instance + sheerka.set_isa(context, foo, all_foo) - assert foo.get_prop(BuiltinConcepts.ISA) == [all_foos, all_bars] - assert sheerka.isa(foo, all_foos) - assert sheerka.isa(foo, all_bars) - assert sheerka.isinset(foo, all_foos) - assert sheerka.isinset(foo, all_bars) - assert sheerka.isaset(context, all_foos) - assert sheerka.isaset(context, all_bars) + foo = sheerka.new(foo.key) # new instance + sheerka.set_isa(context, foo, all_bar) + + assert foo.get_prop(BuiltinConcepts.ISA) == {all_foo, all_bar} + assert sheerka.isa(foo, all_foo) + assert sheerka.isa(foo, all_bar) + assert sheerka.isinset(foo, all_foo) + assert sheerka.isinset(foo, all_bar) + assert sheerka.isaset(context, all_foo) + assert sheerka.isaset(context, all_bar) def test_i_can_manage_isa_transitivity(self): """ @@ -244,10 +234,9 @@ class TestSheerkaSetsManager(TestUsingFileBasedSheerka): Concept("foo"), Concept("bar"), Concept("baz"), + create_new=True + ) - sheerka.create_new_concept(context, foo) - sheerka.create_new_concept(context, bar) - sheerka.create_new_concept(context, baz) sheerka.set_isa(context, foo, bar) sheerka.set_isa(context, bar, baz) @@ -255,3 +244,88 @@ class TestSheerkaSetsManager(TestUsingFileBasedSheerka): assert sheerka.isa(foo, bar) assert sheerka.isa(bar, baz) assert sheerka.isa(foo, baz) + + +class TestSheerkaSetsManagerUsingFileBasedSheerka(TestUsingFileBasedSheerka): + def test_i_can_add_concept_to_set_and_retrieve_it_in_another_session(self): + sheerka, context, foo, bar, group = self.init_concepts( + Concept("foo"), + Concept("bar"), + Concept("group"), + create_new=True) + + assert sheerka.add_concept_to_set(context, foo, group).status + sheerka.cache_manager.commit(context) + + sheerka = self.get_sheerka() # another session + assert sheerka.add_concept_to_set(context, bar, group).status + + # I can get the elements + assert set(sheerka.get_set_elements(context, group)) == {foo, bar} + + sheerka.cache_manager.commit(context) # save in db + all_entries = sheerka.sdp.get(sheerka.CONCEPTS_GROUPS_ENTRY) # check the db + assert all_entries == { + group.id: {foo.id, bar.id} + } + + # I can also add a group another elements + sheerka = self.get_sheerka() + foo3 = Concept("foo3") + foo4 = Concept("foo4") + for c in [foo3, foo4]: + sheerka.create_new_concept(context, c) + + res = sheerka.sets_handler.add_concepts_to_set(context, (foo3, foo4), group) + assert res.status + + # I can get the elements + assert set(sheerka.get_set_elements(context, group)) == {foo, bar, foo3, foo4} + + sheerka.cache_manager.commit(context) # save in db + all_entries = sheerka.sdp.get(sheerka.CONCEPTS_GROUPS_ENTRY) # check the db + assert all_entries == { + group.id: {foo.id, bar.id, foo3.id, foo4.id} + } + + def test_i_can_set_isa(self): + sheerka, context, foo, bar, group = self.init_concepts( + "foo", + "bar", + "group", + create_new=True, # needed by modify + ) + + # nothing was previously in ISA + foo = sheerka.new(foo.key) + assert BuiltinConcepts.ISA not in foo.metadata.props + res = sheerka.set_isa(context, foo, group) + assert res.status + sheerka.cache_manager.commit(context) + + sheerka = self.get_sheerka() + assert foo.get_prop(BuiltinConcepts.ISA) == {group} + assert sheerka.isa(foo, group) + assert sheerka.isinset(foo, group) + assert sheerka.isaset(context, group) + + # I can do the same for bar + sheerka = self.get_sheerka() + res = sheerka.set_isa(context, bar, group) + assert res.status + assert bar.get_prop(BuiltinConcepts.ISA) == {group} + assert sheerka.isa(bar, group) + assert sheerka.isinset(bar, group) + assert sheerka.isaset(context, group) + + sheerka.cache_manager.commit(context) + + # they are both in the same group + sheerka = self.get_sheerka() + all_entries = sheerka.sdp.get(sheerka.CONCEPTS_GROUPS_ENTRY) + assert all_entries == { + group.id: {foo.id, bar.id} + } + + elements = sheerka.get_set_elements(context, group) + assert set(elements) == {foo, bar} diff --git a/tests/core/test_SheerkaVariableManager.py b/tests/core/test_SheerkaVariableManager.py index e6349d4..bba3cb7 100644 --- a/tests/core/test_SheerkaVariableManager.py +++ b/tests/core/test_SheerkaVariableManager.py @@ -1,20 +1,23 @@ from core.concept import Concept, ConceptParts -from core.sheerka.Services.SheerkaVariableManager import SheerkaVariableManager +from core.sheerka.Sheerka import Sheerka from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka class TestSheerkaVariable(TestUsingMemoryBasedSheerka): def test_i_can_record_and_load_a_constant(self): - sheerka = self.get_sheerka() + sheerka = self.get_sheerka(cache_only=False) context = self.get_context(sheerka) sheerka.record(context, "TestSheerkaVariable", "my_variable", 1) res = sheerka.load("TestSheerkaVariable", "my_variable") assert res == 1 - assert sheerka.sdp.exists(SheerkaVariableManager.VARIABLES_ENTRY, "TestSheerkaVariable.my_variable") - loaded = sheerka.sdp.get(SheerkaVariableManager.VARIABLES_ENTRY, "TestSheerkaVariable.my_variable") + # I can persist in db + sheerka.cache_manager.commit(context) + + assert sheerka.sdp.exists(Sheerka.VARIABLES_ENTRY, "TestSheerkaVariable.my_variable") + loaded = sheerka.sdp.get(Sheerka.VARIABLES_ENTRY, "TestSheerkaVariable.my_variable") assert loaded.event_id == context.event.get_digest() assert loaded.key == "my_variable" assert loaded.value == 1 @@ -25,47 +28,60 @@ class TestSheerkaVariable(TestUsingMemoryBasedSheerka): sheerka = self.get_sheerka() context = self.get_context(sheerka) - concept = Concept("foo").set_prop("a", "alpha").set_metadata_value(ConceptParts.BODY, 3.14) + concept = Concept("foo").set_value("a", "alpha").set_value(ConceptParts.BODY, 3.14) sheerka.record(context, "TestSheerkaVariable", "my_variable", concept) res = sheerka.load("TestSheerkaVariable", "my_variable") assert res == concept assert res.body == concept.body + assert res.a == concept.a - def test_i_can_get_the_parent_when_modified(self): + def test_i_can_delete_an_entry(self): sheerka = self.get_sheerka() context = self.get_context(sheerka) - sheerka.record(context, "TestSheerkaVariable", "my_variable", 1) - sheerka.record(context, "TestSheerkaVariable", "my_variable", 2) - res = sheerka.load("TestSheerkaVariable", "my_variable") - assert res == 2 + concept = Concept("foo") - loaded = sheerka.sdp.get(SheerkaVariableManager.VARIABLES_ENTRY, "TestSheerkaVariable.my_variable") - assert loaded.event_id == context.event.get_digest() - assert loaded.key == "my_variable" - assert loaded.value == 2 - assert loaded.who == "TestSheerkaVariable" - assert loaded.parents == ['8c9ada7bf488d84229f6539f76042431638f16d600fe3b7ec7e7161043a40d59'] + sheerka.record(context, "TestSheerkaVariable", "my_variable", concept) + assert sheerka.load("TestSheerkaVariable", "my_variable") is not None - parent = sheerka.sdp.load_obj(loaded.parents[0]) - assert parent.event_id == context.event.get_digest() - assert parent.key == "my_variable" - assert parent.value == 1 - assert parent.who == "TestSheerkaVariable" - assert parent.parents is None + sheerka.delete(context, "TestSheerkaVariable", "my_variable") + assert sheerka.load("TestSheerkaVariable", "my_variable") is None - def test_variable_is_not_persisted_if_the_value_is_the_same(self): - sheerka = self.get_sheerka() - context = self.get_context(sheerka) + # def test_i_can_get_the_parent_when_modified(self): + # sheerka = self.get_sheerka() + # context = self.get_context(sheerka) + # + # sheerka.record(context, "TestSheerkaVariable", "my_variable", 1) + # sheerka.record(context, "TestSheerkaVariable", "my_variable", 2) + # res = sheerka.load("TestSheerkaVariable", "my_variable") + # assert res == 2 + # + # loaded = sheerka.sdp.get(Sheerka.VARIABLES_ENTRY, "TestSheerkaVariable.my_variable") + # assert loaded.event_id == context.event.get_digest() + # assert loaded.key == "my_variable" + # assert loaded.value == 2 + # assert loaded.who == "TestSheerkaVariable" + # assert loaded.parents == ['8c9ada7bf488d84229f6539f76042431638f16d600fe3b7ec7e7161043a40d59'] + # + # parent = sheerka.sdp.load_obj(loaded.parents[0]) + # assert parent.event_id == context.event.get_digest() + # assert parent.key == "my_variable" + # assert parent.value == 1 + # assert parent.who == "TestSheerkaVariable" + # assert parent.parents is None - sheerka.record(context, "TestSheerkaVariable", "my_variable", 1) - sheerka.record(context, "TestSheerkaVariable", "my_variable", 1) - - loaded = sheerka.sdp.get(SheerkaVariableManager.VARIABLES_ENTRY, "TestSheerkaVariable.my_variable") - assert loaded.event_id == context.event.get_digest() - assert loaded.key == "my_variable" - assert loaded.value == 1 - assert loaded.who == "TestSheerkaVariable" - assert loaded.parents is None + # def test_variable_is_not_persisted_if_the_value_is_the_same(self): + # sheerka = self.get_sheerka(singleton=True) + # context = self.get_context(sheerka) + # + # sheerka.record(context, "TestSheerkaVariable", "my_variable", 1) + # sheerka.record(context, "TestSheerkaVariable", "my_variable", 1) + # + # loaded = sheerka.sdp.get(Sheerka.VARIABLES_ENTRY, "TestSheerkaVariable.my_variable") + # assert loaded.event_id == context.event.get_digest() + # assert loaded.key == "my_variable" + # assert loaded.value == 1 + # assert loaded.who == "TestSheerkaVariable" + # assert loaded.parents is None diff --git a/tests/core/test_ast.py b/tests/core/test_ast.py index e44cb38..c3f29ac 100644 --- a/tests/core/test_ast.py +++ b/tests/core/test_ast.py @@ -1,16 +1,15 @@ import ast -import pytest - -from core.ast.nodes import NodeParent, GenericNodeConcept import core.ast.nodes +import pytest +from core.ast.nodes import NodeParent, GenericNodeConcept from core.ast.visitors import ConceptNodeVisitor, UnreferencedNamesVisitor from core.builtin_concepts import BuiltinConcepts from core.sheerka.Sheerka import Sheerka def get_sheerka(): - sheerka = Sheerka(skip_builtins_in_db=True) + sheerka = Sheerka(cache_only=True) sheerka.initialize("mem://") return sheerka @@ -40,32 +39,32 @@ def my_function(a,b): sheerka = get_sheerka() assert tree_as_concept.node_type == "Module" - assert sheerka.isinstance(tree_as_concept.get_prop("body"), BuiltinConcepts.LIST) + assert sheerka.isinstance(tree_as_concept.get_value("body"), BuiltinConcepts.LIST) - def_func = tree_as_concept.get_prop("body").body[0] + def_func = tree_as_concept.get_value("body").body[0] assert sheerka.isinstance(def_func, BuiltinConcepts.GENERIC_NODE) assert def_func.node_type == "FunctionDef" assert def_func.parent == NodeParent(tree_as_concept, "body") - assert def_func.get_prop("name") == "my_function" + assert def_func.get_value("name") == "my_function" - def_func_args = def_func.get_prop("args") + def_func_args = def_func.get_value("args") assert sheerka.isinstance(def_func_args, BuiltinConcepts.GENERIC_NODE) assert def_func_args.node_type == "arguments" - def_func_args_real_args = def_func_args.get_prop("args") + def_func_args_real_args = def_func_args.get_value("args") assert sheerka.isinstance(def_func_args_real_args, BuiltinConcepts.LIST) assert len(def_func_args_real_args.body) == 2 assert sheerka.isinstance(def_func_args_real_args.body[0], BuiltinConcepts.GENERIC_NODE) assert def_func_args_real_args.body[0].node_type == "arg" assert def_func_args_real_args.body[0].parent == NodeParent(def_func_args, "args") - assert def_func_args_real_args.body[0].get_prop("arg") == "a" + assert def_func_args_real_args.body[0].get_value("arg") == "a" assert sheerka.isinstance(def_func_args_real_args.body[1], BuiltinConcepts.GENERIC_NODE) assert def_func_args_real_args.body[1].node_type == "arg" assert def_func_args_real_args.body[1].parent == NodeParent(def_func_args, "args") - assert def_func_args_real_args.body[1].get_prop("arg") == "b" + assert def_func_args_real_args.body[1].get_value("arg") == "b" - def_fun_body = def_func.get_prop("body") + def_fun_body = def_func.get_value("body") assert sheerka.isinstance(def_fun_body, BuiltinConcepts.LIST) assert len(def_fun_body.body) == 2 @@ -95,13 +94,13 @@ def my_function(a,b): visitor.visit(concept_node) sheerka = get_sheerka() - assert sheerka.value(visitor.names[0]) == "i" - assert sheerka.value(visitor.names[1]) == "range" - assert sheerka.value(visitor.names[2]) == "b" - assert sheerka.value(visitor.names[3]) == "a" - assert sheerka.value(visitor.names[4]) == "a" - assert sheerka.value(visitor.names[5]) == "b" - assert sheerka.value(visitor.names[6]) == "a" + assert sheerka.objvalue(visitor.names[0]) == "i" + assert sheerka.objvalue(visitor.names[1]) == "range" + assert sheerka.objvalue(visitor.names[2]) == "b" + assert sheerka.objvalue(visitor.names[3]) == "a" + assert sheerka.objvalue(visitor.names[4]) == "a" + assert sheerka.objvalue(visitor.names[5]) == "b" + assert sheerka.objvalue(visitor.names[6]) == "a" def test_i_can_get_unreferenced_variables(): diff --git a/tests/core/test_builtin_helpers.py b/tests/core/test_builtin_helpers.py index 92f93f8..7d5c6bf 100644 --- a/tests/core/test_builtin_helpers.py +++ b/tests/core/test_builtin_helpers.py @@ -1,7 +1,7 @@ import ast -import pytest -import core.builtin_helpers +import core.builtin_helpers +import pytest from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka diff --git a/tests/core/test_concept.py b/tests/core/test_concept.py index 9fed84b..1723d81 100644 --- a/tests/core/test_concept.py +++ b/tests/core/test_concept.py @@ -3,7 +3,7 @@ import pytest from core.concept import Concept, ConceptParts, DEFINITION_TYPE_DEF -@pytest.mark.parametrize("name, properties, expected", [ +@pytest.mark.parametrize("name, variables, expected", [ ("foo", [], "foo"), ("foo(bar)", [], "foo ( bar )"), ("foo a", ["a"], "foo __var__0"), @@ -20,21 +20,20 @@ from core.concept import Concept, ConceptParts, DEFINITION_TYPE_DEF ("a b a c", ["a", "b"], "__var__0 __var__1 __var__0 c"), ("a b a c", ["b", "a"], "__var__1 __var__0 __var__1 c"), ]) -def test_i_can_compute_the_key(name, properties, expected): +def test_i_can_compute_the_key(name, variables, expected): concept = Concept(name) - for prop in properties: - concept.metadata.props.append((prop, None)) + for var_name in variables: + concept.metadata.variables.append((var_name, None)) concept.init_key() assert concept.key == expected def test_i_can_compute_the_key_when_from_definition(): - # if definition is not defined, use the name concept = Concept() concept.metadata.name = "hello a" - concept.metadata.props = [("a", None)] + concept.metadata.variables = [("a", None)] concept.init_key() assert concept.key == "hello __var__0" @@ -43,13 +42,13 @@ def test_i_can_compute_the_key_when_from_definition(): concept.metadata.name = "greetings" concept.metadata.definition = "hello a" concept.metadata.definition_type = DEFINITION_TYPE_DEF - concept.metadata.props = [("a", None)] + concept.metadata.variables = [("a", None)] concept.init_key() assert concept.key == "hello __var__0" def test_key_does_not_use_variable_when_definition_is_set(): - concept = Concept("plus").def_prop('plus') + concept = Concept("plus").def_var('plus') concept.init_key() assert concept.metadata.key == "plus" @@ -73,7 +72,7 @@ def test_i_can_serialize(): definition_type="def type", desc="this this the desc", id="123456" - ).def_prop("a", "10").def_prop("b", None) + ).def_var("a", "10").def_var("b", None) to_dict = concept.to_dict() assert to_dict == { @@ -88,7 +87,8 @@ def test_i_can_serialize(): 'name': 'concept_name', 'post': 'definition of the post', 'pre': 'definition of the pre', - 'props': [('a', "10"), ('b', None)], + 'props': {}, + 'variables': [('a', "10"), ('b', None)], 'where': 'definition of the where' } @@ -111,7 +111,8 @@ def test_i_can_deserialize(): 'name': 'concept_name', 'post': 'definition of the post', 'pre': 'definition of the pre', - 'props': [('a', "10"), ('b', None)], + 'props': {}, + 'variables': [('a', "10"), ('b', None)], 'where': 'definition of the where' } @@ -130,16 +131,16 @@ def test_i_can_deserialize(): definition_type="def type", desc="this this the desc", id="123456" - ).def_prop("a", "10").def_prop("b", None) + ).def_var("a", "10").def_var("b", None) -def test_i_can_deserialize_props_coming_from_sdp(): +def test_i_can_deserialize_variables_coming_from_sdp(): from_dict = { - 'props': [['a', "10"], ['b', None]], # JSON transform set into list + 'variables': [['a', "10"], ['b', None]], # JSON transform set into list } concept = Concept().from_dict(from_dict) - assert concept == Concept().def_prop("a", "10").def_prop("b", None) + assert concept == Concept().def_var("a", "10").def_var("b", None) def test_i_can_compare_concepts(): @@ -156,7 +157,7 @@ def test_i_can_compare_concepts(): definition_type="def type", desc="this this the desc", id="123456" - ).def_prop("a", "10").def_prop("b", None) + ).def_var("a", "10").def_var("b", None).add_prop("prop", "prop_val") concept_b = Concept( name="concept_name", @@ -171,11 +172,33 @@ def test_i_can_compare_concepts(): definition_type="def type", desc="this this the desc", id="123456" - ).def_prop("a", "10").def_prop("b", None) + ).def_var("a", "10").def_var("b", None).add_prop("prop", "prop_val") assert concept_a == concept_b +def test_i_can_detect_concept_differences(): + assert Concept(name="concept_name") != Concept() + assert Concept(is_builtin=True) != Concept() + assert Concept(is_unique=True) != Concept() + assert Concept(key="concept_key") != Concept() + assert Concept(body="concept_body") != Concept() + assert Concept(where="concept_where") != Concept() + assert Concept(pre="concept_pre") != Concept() + assert Concept(post="concept_post") != Concept() + assert Concept(definition="def") != Concept() + assert Concept(definition_type="def type") != Concept() + assert Concept(desc="desc") != Concept() + assert Concept(id="concept_id") != Concept() + assert Concept().def_var("a") != Concept() + assert Concept().add_prop("a", "b") != Concept() + assert Concept().set_value("a", "b") != Concept() + + concept = Concept() + concept.compiled["foo"] = "value" + assert concept == Concept() # compiled is not used in the comparison + + def test_i_can_compare_concept_with_circular_reference(): foo = Concept("foo") foo.metadata.body = foo @@ -215,17 +238,17 @@ def test_i_can_update_from(): definition_type="def type", desc="this this the desc", id="123456" - ).def_prop("a", "10").def_prop("b", None) + ).def_var("a", "10").def_var("b", None) # make sure origin is preserved setattr(template, "##origin##", "digest") - template.values[ConceptParts.BODY] = "value in body" - template.values[ConceptParts.WHERE] = "value in where" - template.values[ConceptParts.PRE] = "value in pre" - template.values[ConceptParts.POST] = "value in post" - template.set_prop("a", 10) - template.set_prop("b", 20) + template.set_value(ConceptParts.BODY, "value in body") + template.set_value(ConceptParts.WHERE, "value in where") + template.set_value(ConceptParts.PRE, "value in pre") + template.set_value(ConceptParts.POST, "value in post") + template.set_value("a", 10) + template.set_value("b", 20) concept = Concept().update_from(template) diff --git a/tests/core/test_sheerka.py b/tests/core/test_sheerka.py index b11b5cb..36ed7a0 100644 --- a/tests/core/test_sheerka.py +++ b/tests/core/test_sheerka.py @@ -1,24 +1,33 @@ -import pytest import os +import pytest from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, UserInputConcept -from core.concept import Concept, PROPERTIES_TO_SERIALIZE -from core.sheerka.Sheerka import Sheerka +from core.concept import Concept, PROPERTIES_TO_SERIALIZE, ConceptParts +from core.sheerka.Sheerka import Sheerka, BASE_NODE_PARSER_CLASS from tests.TestUsingFileBasedSheerka import TestUsingFileBasedSheerka +from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka -class ConceptWithGetValue(Concept): - def get_value(self): - return self.get_prop("my_prop") +class ConceptWithGetObjValue(Concept): + def get_obj_value(self): + return self.get_value("my_prop") -class TestSheerka(TestUsingFileBasedSheerka): +class TestSheerkaUsingMemoryBasedSheerka(TestUsingMemoryBasedSheerka): - def test_root_folder_is_created_after_initialization(self): - return_value = Sheerka().initialize(self.root_folder) - assert return_value.status, "initialisation should be successful" - assert os.path.exists(self.root_folder), "init folder should be created" + def test_i_can_initialize_builtin_parsers(self): + sheerka = self.get_sheerka() + + # test existence of some parser (not all) + assert "parsers.DefaultParser.DefaultParser" in sheerka.parsers + assert "parsers.BnfNodeParser.BnfNodeParser" in sheerka.parsers + assert "parsers.SyaNodeParser.SyaNodeParser" in sheerka.parsers + assert "parsers.AtomNodeParser.AtomNodeParser" in sheerka.parsers + + # make sure BaseNodeParser is properly initialized + assert BASE_NODE_PARSER_CLASS not in sheerka.parsers + assert sheerka.bnp is not None def test_i_can_list_builtin_concepts(self): sheerka = self.get_sheerka() @@ -27,27 +36,6 @@ class TestSheerka(TestUsingFileBasedSheerka): assert str(BuiltinConcepts.ERROR) in builtins assert str(BuiltinConcepts.RETURN_VALUE) in builtins - def test_builtin_concepts_are_initialized(self): - sheerka = self.get_sheerka(skip_builtins_in_db=False) - assert len(sheerka.cache_by_key) == len(BuiltinConcepts) - for concept_name in BuiltinConcepts: - assert str(concept_name) in sheerka.cache_by_key - assert sheerka.sdp.get_safe(sheerka.CONCEPTS_ENTRY, str(concept_name)) is not None - - for key, concept_class in sheerka.get_builtins_classes_as_dict().items(): - assert isinstance(sheerka.cache_by_key[key], concept_class) - - def test_builtin_concepts_can_be_updated(self): - sheerka = self.get_sheerka(use_dict=False, skip_builtins_in_db=False) - loaded_sheerka = sheerka.get(BuiltinConcepts.SHEERKA) - loaded_sheerka.metadata.desc = "I have a description" - sheerka.sdp.modify("Test", sheerka.CONCEPTS_ENTRY, loaded_sheerka.key, loaded_sheerka) - - sheerka = self.get_sheerka(use_dict=False, skip_builtins_in_db=False) - loaded_sheerka = sheerka.get(BuiltinConcepts.SHEERKA) - - assert loaded_sheerka.metadata.desc == "I have a description" - def test_i_can_get_a_builtin_concept_by_their_enum_or_the_string(self): """ Checks that a concept can be found its name @@ -56,27 +44,27 @@ class TestSheerka(TestUsingFileBasedSheerka): """ sheerka = self.get_sheerka() for key in sheerka.get_builtins_classes_as_dict(): - assert sheerka.get(key) is not None - assert sheerka.get(str(key)) is not None + assert sheerka.get_by_key(key) is not None + assert sheerka.get_by_key(str(key)) is not None def test_i_cannot_get_when_key_is_none(self): sheerka = self.get_sheerka() - res = sheerka.get(None) + res = sheerka.get_by_key(None) assert sheerka.isinstance(res, BuiltinConcepts.ERROR) - assert res.body == "Concept key is undefined." + assert res.body == "Concept 'None' is undefined." - def test_unknown_concept_is_return_when_the_concept_key_is_not_found(self): + def test_i_cannot_get_when_key_is_not_found(self): sheerka = self.get_sheerka() - loaded = sheerka.get("key_that_does_not_exist") + loaded = sheerka.get_by_key("key_that_does_not_exist") assert loaded is not None assert sheerka.isinstance(loaded, BuiltinConcepts.UNKNOWN_CONCEPT) assert loaded.body == ("key", "key_that_does_not_exist") assert loaded.metadata.is_evaluated - def test_unknown_concept_is_return_when_the_concept_id_is_not_found(self): + def test_i_cannot_get_when_id_is_not_found(self): sheerka = self.get_sheerka() loaded = sheerka.get_by_id("id_that_does_not_exist") @@ -97,6 +85,10 @@ class TestSheerka(TestUsingFileBasedSheerka): assert ret.value == "value" assert ret.message == "message" + # check the others + for key, concept_class in sheerka.get_builtins_classes_as_dict().items(): + assert isinstance(sheerka.get_by_key(key), concept_class) + def test_i_can_instantiate_a_builtin_concept_when_no_specific_class(self): sheerka = self.get_sheerka() ret = sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body="fake_concept") @@ -106,9 +98,7 @@ class TestSheerka(TestUsingFileBasedSheerka): assert ret.body == "fake_concept" def test_i_can_instantiate_a_concept(self): - sheerka = self.get_sheerka() - concept = self.get_default_concept() - sheerka.create_new_concept(self.get_context(sheerka), concept) + sheerka, context, concept = self.init_concepts(self.get_default_concept(), create_new=True) new = sheerka.new(concept.key, a=10, b="value") @@ -116,27 +106,33 @@ class TestSheerka(TestUsingFileBasedSheerka): for prop in PROPERTIES_TO_SERIALIZE: assert getattr(new.metadata, prop) == getattr(concept.metadata, prop) - assert new.props["a"].value == 10 - assert new.props["b"].value == "value" + assert new.get_value("a") == 10 + assert new.get_value("b") == "value" - def test_i_can_instantiate_with_the_name_and_the_id(self): - sheerka = self.get_sheerka() - sheerka.create_new_concept(self.get_context(sheerka), Concept("foo", body="foo1")) - sheerka.create_new_concept(self.get_context(sheerka), Concept("foo", body="foo2")) + def test_i_can_instantiate_multiple_when_same_key(self): + sheerka, context, *concepts = self.init_concepts( + Concept("foo", body="foo1"), + Concept("foo", body="foo2"), + create_new=True) + # when no id, i get two instances concepts = sheerka.new("foo") assert len(concepts) == 2 + assert concepts[0].id == "1001" + assert concepts[0].metadata.body == "foo1" + assert concepts[1].id == "1002" + assert concepts[1].metadata.body == "foo2" + # only one instance if the id is given foo1 = sheerka.new(("foo", "1001")) assert foo1.metadata.body == "foo1" + # only one instance if the id is given foo2 = sheerka.new(("foo", "1002")) assert foo2.metadata.body == "foo2" def test_instances_are_different_when_asking_for_new(self): - sheerka = self.get_sheerka() - concept = self.get_default_concept() - sheerka.create_new_concept(self.get_context(sheerka), concept) + sheerka, context, concept = self.init_concepts(self.get_default_concept(), create_new=True) new1 = sheerka.new(concept.key, a=10, b="value") new2 = sheerka.new(concept.key, a=10, b="value") @@ -144,17 +140,50 @@ class TestSheerka(TestUsingFileBasedSheerka): assert new1 == new2 assert id(new1) != id(new2) - def test_i_get_the_same_instance_when_is_unique_is_true(self): - sheerka = self.get_sheerka() - concept = Concept(name="unique", is_unique=True) - sheerka.create_new_concept(self.get_context(sheerka), concept) + def test_new_instance_does_not_impact_each_others(self): + sheerka, context, foo, bar = self.init_concepts("foo", "bar", create_new=True) - new1 = sheerka.new(concept.key, a=10, b="value") - new2 = sheerka.new(concept.key, a=10, b="value") + new_foo = sheerka.new("foo") + new_foo.metadata.body = "metadata value" # modify metadata + new_foo.def_var("var_name", "default value") # modify definition of variables + new_foo.add_prop(BuiltinConcepts.ISA, bar) # modify property + new_foo.compiled["var_name"] = "'var value'" + new_foo.set_value(ConceptParts.BODY, "body value") # modify value + new_foo.set_value("var_name", "var value") # modify value + + assert new_foo.metadata.body != foo.metadata.body + assert new_foo.metadata.variables != foo.metadata.variables + assert new_foo.metadata.props != foo.metadata.props + assert new_foo.values != foo.values + assert new_foo.compiled != foo.compiled + + def test_i_get_the_same_instance_when_is_unique_is_true(self): + sheerka, context, concept = self.init_concepts(Concept(name="unique", is_unique=True), create_new=True) + + new1 = sheerka.new(concept.key) + new2 = sheerka.new(concept.key, a=10, b="value") # not that variables are simply discareded assert new1 == new2 assert id(new1) == id(new2) + def test_values_are_reset_when_asking_for_a_new_instance(self): + sheerka, context, template = self.init_concepts( + Concept("foo", body="'foo body'"), + create_new=True, + eval_body=True) + + sheerka.evaluate_concept(context, sheerka.get_by_id(template.id)) + assert template.metadata.is_evaluated + assert template.body == "foo body" + + new = sheerka.new(template.key) + assert not new.metadata.is_evaluated + assert not new.body + + new = sheerka.new((None, template.id)) + assert not new.metadata.is_evaluated + assert not new.body + def test_i_cannot_instantiate_an_unknown_concept(self): sheerka = self.get_sheerka() @@ -164,9 +193,10 @@ class TestSheerka(TestUsingFileBasedSheerka): assert new.body == ('key', 'fake_concept') def test_i_cannot_instantiate_with_invalid_id(self): - sheerka = self.get_sheerka() - sheerka.create_new_concept(self.get_context(sheerka), Concept("foo", body="foo1")) - sheerka.create_new_concept(self.get_context(sheerka), Concept("foo", body="foo2")) + sheerka, context, *concepts = self.init_concepts( + Concept("foo", body="foo1"), + Concept("foo", body="foo2"), + create_new=True) new = sheerka.new(("foo", "invalid_id")) @@ -174,9 +204,10 @@ class TestSheerka(TestUsingFileBasedSheerka): assert new.body == [('key', 'foo'), ('id', 'invalid_id')] def test_i_cannot_instantiate_with_invalid_key(self): - sheerka = self.get_sheerka() - sheerka.create_new_concept(self.get_context(sheerka), Concept("foo", body="foo1")) - sheerka.create_new_concept(self.get_context(sheerka), Concept("foo", body="foo2")) + sheerka, context, *concepts = self.init_concepts( + Concept("foo", body="foo1"), + Concept("foo", body="foo2"), + create_new=True) new = sheerka.new(("invalid_key", "1001")) @@ -184,8 +215,9 @@ class TestSheerka(TestUsingFileBasedSheerka): assert new.body == [('key', 'invalid_key'), ('id', '1001')] def test_concept_id_is_irrelevant_when_only_one_concept(self): - sheerka = self.get_sheerka() - sheerka.create_new_concept(self.get_context(sheerka), Concept("foo", body="foo1")) + sheerka, context, *concepts = self.init_concepts( + Concept("foo", body="foo1"), + create_new=True) new = sheerka.new(("foo", "invalid_id")) @@ -193,9 +225,7 @@ class TestSheerka(TestUsingFileBasedSheerka): assert new.metadata.body == "foo1" def test_i_cannot_instantiate_when_properties_are_not_recognized(self): - sheerka = self.get_sheerka() - concept = self.get_default_concept() - sheerka.create_new_concept(self.get_context(sheerka), concept) + sheerka, context, concept = self.init_concepts(self.get_default_concept(), create_new=True) new = sheerka.new(concept.key, a=10, c="value") @@ -210,7 +240,7 @@ class TestSheerka(TestUsingFileBasedSheerka): (True, False, True), (Concept("name", body="foo"), False, "foo"), (Concept("name"), False, Concept("name")), - (ConceptWithGetValue("name").set_prop("my_prop", "my_value"), False, "my_value"), + (ConceptWithGetObjValue("name").set_value("my_prop", "my_value"), False, "my_value"), (ReturnValueConcept(value="return_value"), False, "return_value"), (ReturnValueConcept(value=Concept(key=BuiltinConcepts.USER_INPUT, body="text"), status=True), False, "text"), (ReturnValueConcept(value=UserInputConcept("text"), status=True), False, "text"), @@ -230,53 +260,138 @@ class TestSheerka(TestUsingFileBasedSheerka): c.auto_init() c = c.body - assert sheerka.value(concept, reduce_simple_list) == expected - - def test_list_of_concept_is_sorted_by_id(self): - sheerka = self.get_sheerka(use_dict=False, skip_builtins_in_db=False) - concepts = sheerka.concepts() - - assert concepts[0].id < concepts[-1].id + assert sheerka.objvalue(concept, reduce_simple_list) == expected def test_builtin_error_concept_are_errors(self): # only test a random one, it will be the same for the others sheerka = self.get_sheerka() assert not sheerka.is_success(sheerka.new(BuiltinConcepts.TOO_MANY_SUCCESS)) - def test_cache_is_updated_after_get(self): - sheerka = self.get_sheerka(skip_builtins_in_db=False) - # updated when by_key returns one element - sheerka.create_new_concept(self.get_context(sheerka), Concept("foo", body="1")) - sheerka.reset_cache() - sheerka.get("foo") - assert "foo" in sheerka.cache_by_key - assert "1001" in sheerka.cache_by_id +class TestSheerkaUsingFileBasedSheerka(TestUsingFileBasedSheerka): - # updated when by_key returns two elements - sheerka.create_new_concept(self.get_context(sheerka), Concept("foo", body="2")) - sheerka.reset_cache() - sheerka.get("foo") - assert "foo" in sheerka.cache_by_key - assert "1001" in sheerka.cache_by_id - assert "1002" in sheerka.cache_by_id + def test_root_folder_is_created_after_initialization(self): + return_value = Sheerka().initialize(self.root_folder) + assert return_value.status, "initialisation should be successful" + assert os.path.exists(self.root_folder), "init folder should be created" + + def test_builtin_concepts_are_initialized(self): + sheerka = self.get_sheerka() + for concept_name in BuiltinConcepts: + assert sheerka.has_key(str(concept_name)) + assert sheerka.sdp.get(sheerka.CONCEPTS_BY_KEY_ENTRY, str(concept_name)) is not None + + # I can get back data from the sdp when the cache is empty + sheerka.cache_manager.clear() + + # caches are empty + assert not sheerka.has_id("1") + assert not sheerka.has_key(str(BuiltinConcepts.SHEERKA)) + + assert sheerka.get_by_id("1") == sheerka # use sdp + + # assert sheerka.has_key(str(BuiltinConcepts.SHEERKA)) # auto update the other caches + + def test_builtin_concepts_can_be_updated(self): + sheerka = self.get_sheerka() + before_parsing = sheerka.get_by_key(BuiltinConcepts.BEFORE_PARSING) + before_parsing.metadata.desc = "I have a description" + before_parsing.metadata.full_serialization = True + with sheerka.sdp.get_transaction("Test") as transac: + transac.add(sheerka.CONCEPTS_BY_KEY_ENTRY, before_parsing.key, before_parsing, use_ref=True) + + sheerka = self.get_sheerka() # another fresh new instance + before_parsing = sheerka.get_by_key(BuiltinConcepts.BEFORE_PARSING) + + assert before_parsing.metadata.desc == "I have a description" + + def test_i_first_look_in_local_cache(self): + sheerka, context, concept = self.init_concepts("foo", create_new=True) + sheerka.cache_manager.commit(context) + + sheerka.get_by_key(concept.key).new_property = "I have modified the concept in cache" + + from_cache = sheerka.get_by_key(concept.key) + assert from_cache is not None + assert from_cache.key == concept.key + assert from_cache.new_property == "I have modified the concept in cache" + + # sdp instance is not modified + sheerka.cache_manager.clear() + from_sdp = sheerka.get_by_key(concept.key) + assert from_sdp is not None + assert from_sdp.key == concept.key + assert not hasattr(from_sdp, "new_property") + + def test_i_can_retrieve_from_sdp_when_cache_is_reset(self): + sheerka, context, concept = self.init_concepts(Concept("foo", body="1")) + sheerka.cache_manager.commit(context) + + sheerka.cache_manager.clear() + sheerka.get_by_key("foo") + assert sheerka.has_key("foo") + + # It's also updated when sdp returns more than one element + concept2 = Concept("foo", body="2") + sheerka.create_new_concept(context, concept2) + sheerka.cache_manager.commit(context) + + sheerka.cache_manager.clear() + assert len(sheerka.get_by_key("foo")) == 2 + assert sheerka.has_key("foo") # updated when by_id - sheerka.reset_cache() - sheerka.get_by_id("1001") - assert "1001" in sheerka.cache_by_id - assert "foo" not in sheerka.cache_by_key # cache_by_key not updated as "1001" is not the only one + sheerka.cache_manager.clear() + assert sheerka.get_by_id("1001") == concept + assert sheerka.has_id("1001") - def test_i_can_get_by_key_several_times(self): - sheerka = self.get_sheerka() - sheerka.create_new_concept(self.get_context(sheerka), Concept("foo", body="1")) - sheerka.create_new_concept(self.get_context(sheerka), Concept("foo", body="2")) + sheerka.cache_manager.clear() + assert sheerka.get_by_name("foo") == [concept, concept2] + assert sheerka.has_name("foo") - sheerka.reset_cache() - sheerka.get("foo", "1001") # only one element requested. But the cache must be updated with two elements + sheerka.cache_manager.clear() + assert sheerka.get_by_hash(concept.get_definition_hash()) == concept + assert sheerka.has_hash(concept.get_definition_hash()) - # let's check it - concepts = sheerka.get("foo") + def test_get_by_key_retrieve_all_elements(self): + sheerka, context, *concepts = self.init_concepts( + Concept("foo", body="1"), + Concept("foo", body="2"), + create_new=True) + sheerka.cache_manager.commit(context) + + sheerka.cache_manager.clear() + sheerka.get_by_key("foo", "1001") # I ask only for the one with id = "1001" + + # but the two keys are returned + concepts = sheerka.get_by_key("foo") assert len(concepts) == 2 assert concepts[0].id == "1001" assert concepts[1].id == "1002" + + def test_concept_node_parsing_is_initialized_at_startup(self): + sheerka, context, foo, bar, baz = self.init_concepts( + "foo", + "bar", + Concept("baz", definition="foo"), + create_new=True) + sheerka.cache_manager.commit(context) + + assert sheerka.cache_manager.copy(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == { + 'bar': ['1002'], + 'c:|1001:': ['1003'], + 'foo': ['1001']} + assert sheerka.cache_manager.copy(sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == { + 'bar': ['1002'], + 'foo': ['1001', '1003'] + } + + sheerka = self.get_sheerka() # another instance + assert sheerka.cache_manager.copy(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == { + 'bar': ['1002'], + 'c:|1001:': ['1003'], + 'foo': ['1001']} + assert sheerka.cache_manager.copy(sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == { + 'bar': ['1002'], + 'foo': ['1001', '1003'] + } diff --git a/tests/core/test_sheerka_call_evaluators.py b/tests/core/test_sheerka_call_evaluators.py index 5aa7369..242fc1a 100644 --- a/tests/core/test_sheerka_call_evaluators.py +++ b/tests/core/test_sheerka_call_evaluators.py @@ -189,6 +189,12 @@ class EvaluatorAllSuppressFooEntry(EvaluatorAllWithPriority): class TestSheerkaExecuteEvaluators(TestUsingMemoryBasedSheerka): + @classmethod + def teardown_class(cls): + # At the end of the tests, sheerka singleton instance will be corrupted + # Ask for a new one + TestUsingMemoryBasedSheerka.singleton_instance = None + def test_that_return_values_is_unchanged_when_no_evaluator(self): sheerka = self.get_sheerka() sheerka.evaluators = [] diff --git a/tests/core/test_sheerka_call_parsers.py b/tests/core/test_sheerka_call_parsers.py index a4cbe2b..71ef47d 100644 --- a/tests/core/test_sheerka_call_parsers.py +++ b/tests/core/test_sheerka_call_parsers.py @@ -135,6 +135,11 @@ class ListOfNoneParser(BaseTestParser): class TestSheerkaExecuteParsers(TestUsingMemoryBasedSheerka): + @classmethod + def teardown_class(cls): + # At the end of the tests, sheerka singleton instance will be corrupted + # Ask for a new one + TestUsingMemoryBasedSheerka.singleton_instance = None def test_disabled_parsers_are_not_executed(self): sheerka = self.get_sheerka() diff --git a/tests/core/test_sheerka_printer.py b/tests/core/test_sheerka_printer.py index 7b41d44..5411e87 100644 --- a/tests/core/test_sheerka_printer.py +++ b/tests/core/test_sheerka_printer.py @@ -22,6 +22,7 @@ class ObjLongProp: class TestSheerkaPrinter(TestUsingMemoryBasedSheerka): + @pytest.mark.parametrize("text, expected", [ ("Hello world!", "Hello world!\n"), ("%black%%red%%green%%yellow%%reset%", "\x1b[30m\x1b[31m\x1b[32m\x1b[33m\x1b[0m\n"), @@ -45,7 +46,7 @@ class TestSheerkaPrinter(TestUsingMemoryBasedSheerka): def test_i_can_print_concept(self, capsys): sheerka = self.get_sheerka() - foo = Concept("foo a b").def_prop("a").def_prop("b") + foo = Concept("foo a b").def_var("a").def_var("b") sheerka.print(foo) captured = capsys.readouterr() @@ -53,11 +54,11 @@ class TestSheerkaPrinter(TestUsingMemoryBasedSheerka): def test_i_can_use_custom_format(self, capsys): sheerka = self.get_sheerka() - foo = Concept("foo a b").def_prop("a").def_prop("b").init_key() + foo = Concept("foo a b").def_var("a").def_var("b").init_key() sheerka.printer_handler.register_custom_printer( foo, lambda printer, instr, item: printer.fp(instr, f"foo a={item.a}, b={item.b}")) - foo.set_prop("a", "value a").set_prop("b", "value b") + foo.set_value("a", "value a").set_value("b", "value b") sheerka.print(foo) captured = capsys.readouterr() @@ -66,8 +67,8 @@ class TestSheerkaPrinter(TestUsingMemoryBasedSheerka): def test_i_can_print_and_recurse(self, capsys): sheerka = self.get_sheerka() level3 = Concept("level3") - level2 = Concept("level2").set_metadata_value(ConceptParts.BODY, level3) - level1 = Concept("level1").set_metadata_value(ConceptParts.BODY, level2) + level2 = Concept("level2").set_value(ConceptParts.BODY, level3) + level1 = Concept("level1").set_value(ConceptParts.BODY, level2) sheerka.print(level1) captured = capsys.readouterr() @@ -90,9 +91,9 @@ class TestSheerkaPrinter(TestUsingMemoryBasedSheerka): level31 = Concept("level31") level32 = Concept("level32") level33 = Concept("level33") - level21 = Concept("level21").set_metadata_value(ConceptParts.BODY, [level31, level32]) - level22 = Concept("level22").set_metadata_value(ConceptParts.BODY, [level33]) - level1 = Concept("level1").set_metadata_value(ConceptParts.BODY, [level21, level22]) + level21 = Concept("level21").set_value(ConceptParts.BODY, [level31, level32]) + level22 = Concept("level22").set_value(ConceptParts.BODY, [level33]) + level1 = Concept("level1").set_value(ConceptParts.BODY, [level21, level22]) sheerka.print(level1) captured = capsys.readouterr() @@ -111,9 +112,9 @@ class TestSheerkaPrinter(TestUsingMemoryBasedSheerka): level31 = Concept("level31") level32 = Concept("level32") level33 = Concept("level33") - level21 = Concept("level21").set_metadata_value(ConceptParts.BODY, [level31, level32]) - level22 = Concept("level22").set_metadata_value(ConceptParts.BODY, [level33]) - level1 = Concept("level1").set_metadata_value(ConceptParts.BODY, [level21, level22]) + level21 = Concept("level21").set_value(ConceptParts.BODY, [level31, level32]) + level22 = Concept("level22").set_value(ConceptParts.BODY, [level33]) + level1 = Concept("level1").set_value(ConceptParts.BODY, [level21, level22]) instructions = FormatInstructions(no_color=True) explanation = sheerka.new( @@ -150,9 +151,9 @@ class TestSheerkaPrinter(TestUsingMemoryBasedSheerka): def test_i_can_format_concept(self, capsys): sheerka = self.get_sheerka() - foo = Concept("foo a b").def_prop("a").def_prop("b").init_key() - foo.set_prop("a", "value a").set_prop("b", "value b") - foo.set_metadata_value(ConceptParts.BODY, "body") + foo = Concept("foo a b").def_var("a").def_var("b").init_key() + foo.set_value("a", "value a").set_value("b", "value b") + foo.set_value(ConceptParts.BODY, "body") sheerka.set_id_if_needed(foo, False) sheerka.printer_handler.register_format_l(foo, "{id}-{name}-{key}-{body}-{a}-{b}") diff --git a/tests/core/test_utils.py b/tests/core/test_utils.py index c556146..7dd2896 100644 --- a/tests/core/test_utils.py +++ b/tests/core/test_utils.py @@ -69,13 +69,13 @@ def test_i_can_get_sub_classes(): default_parser = core.utils.get_class("parsers.DefaultParser.DefaultParser") exact_concept_parser = core.utils.get_class("parsers.ExactConceptParser.ExactConceptParser") python_parser = core.utils.get_class("parsers.PythonParser.PythonParser") - concept_lexer_parser = core.utils.get_class("parsers.BnfNodeParser.BnfNodeParser") + bnf_node_parser = core.utils.get_class("parsers.BnfNodeParser.BnfNodeParser") assert base_parser not in sub_classes assert default_parser in sub_classes assert exact_concept_parser in sub_classes assert python_parser in sub_classes - assert concept_lexer_parser in sub_classes + assert bnf_node_parser in sub_classes @pytest.mark.parametrize("a,b, expected", [ @@ -170,12 +170,14 @@ def test_i_can_str_concept(): assert core.utils.str_concept((None, "id")) == "c:|id:" assert core.utils.str_concept(("key", None)) == "c:key:" assert core.utils.str_concept((None, None)) == "" + assert core.utils.str_concept(("key", "id"), skip_key=True) == "c:|id:" concept = Concept("foo").init_key() assert core.utils.str_concept(concept) == "c:foo:" concept.metadata.id = "1001" assert core.utils.str_concept(concept) == "c:foo|1001:" + assert core.utils.str_concept(concept, skip_key=True) == "c:|1001:" @pytest.mark.parametrize("text, expected", [ diff --git a/tests/evaluators/test_AddConceptEvaluator.py b/tests/evaluators/test_AddConceptEvaluator.py index a74d7f3..8bff3ed 100644 --- a/tests/evaluators/test_AddConceptEvaluator.py +++ b/tests/evaluators/test_AddConceptEvaluator.py @@ -142,12 +142,12 @@ class TestAddConceptEvaluator(TestUsingMemoryBasedSheerka): body="print('hello' + a)") # sanity. Make sure that the concept does not already exist - from_db = context.sheerka.get("hello " + VARIABLE_PREFIX + "0") + from_db = context.sheerka.get_by_key("hello " + VARIABLE_PREFIX + "0") assert context.sheerka.isinstance(from_db, BuiltinConcepts.UNKNOWN_CONCEPT) AddConceptEvaluator().eval(context, def_concept_return_value) context.sheerka.concepts_cache = {} # reset cache - from_db = context.sheerka.get("hello " + VARIABLE_PREFIX + "0") + from_db = context.sheerka.get_by_key("hello " + VARIABLE_PREFIX + "0") assert from_db.metadata.key == f"hello {VARIABLE_PREFIX}0" assert from_db.metadata.name == "hello a" @@ -157,38 +157,38 @@ class TestAddConceptEvaluator(TestUsingMemoryBasedSheerka): assert from_db.metadata.body == "print('hello' + a)" assert from_db.metadata.definition == "hello a" assert from_db.metadata.definition_type == "bnf" - assert len(from_db.metadata.props) == 1 - assert from_db.metadata.props[0] == ("a", None) - assert "a" in from_db.props + assert len(from_db.metadata.variables) == 1 + assert from_db.metadata.variables[0] == ("a", None) + assert "a" in from_db.values assert from_db.compiled == {} # ast is not saved in db - def test_i_can_get_props_from_python_node_when_long_name(self): + def test_i_can_get_variables_from_python_node_when_long_name(self): ret_val = self.get_concept_part("isinstance(a, str)") context = self.get_context() - assert AddConceptEvaluator.get_props(context.sheerka, ret_val, ["a", "b"]) == ["a"] + assert AddConceptEvaluator.get_variables(context.sheerka, ret_val, ["a", "b"]) == ["a"] - def test_i_cannot_get_props_from_python_node_when_name_has_only_one_token(self): + def test_i_cannot_get_variables_from_python_node_when_name_has_only_one_token(self): ret_val = self.get_concept_part("isinstance(a, str)") context = self.get_context() - assert AddConceptEvaluator.get_props(context.sheerka, ret_val, ["a"]) == [] + assert AddConceptEvaluator.get_variables(context.sheerka, ret_val, ["a"]) == [] - def test_i_can_get_props_from_another_concept(self): - concept = Concept("hello").def_prop("a").def_prop("b") + def test_i_can_get_variables_from_another_concept(self): + concept = Concept("hello").def_var("a").def_var("b") ret_val = ReturnValueConcept(who="some_parser", status=True, value=ParserResultConcept(value=concept)) - assert AddConceptEvaluator.get_props(self.get_sheerka(), ret_val, []) == ["a", "b"] + assert AddConceptEvaluator.get_variables(self.get_sheerka(), ret_val, []) == ["a", "b"] - def test_i_can_get_props_from_definition(self): + def test_i_can_get_variables_from_definition(self): parsing_expression = Sequence(ConceptExpression('mult'), ZeroOrMore(Sequence(StrMatch("+"), ConceptExpression("add")))) ret_val = self.get_return_value("mult (('+'|'-') add)?", parsing_expression) - assert AddConceptEvaluator.get_props(self.get_sheerka(), ret_val, []) == ["add", "mult"] + assert AddConceptEvaluator.get_variables(self.get_sheerka(), ret_val, []) == ["add", "mult"] def test_concept_that_references_itself_is_correctly_created(self): context = self.get_context() @@ -200,5 +200,5 @@ class TestAddConceptEvaluator(TestUsingMemoryBasedSheerka): new_concept = ret_val.body.body assert new_concept.name == 'foo' assert new_concept.metadata.body == 'foo' - assert new_concept.props == {} - assert new_concept.metadata.props == [] + assert new_concept.values == {} + assert new_concept.metadata.variables == [] diff --git a/tests/evaluators/test_AddConceptInSetEvaluator.py b/tests/evaluators/test_AddConceptInSetEvaluator.py index 56cb2d2..e61e0c2 100644 --- a/tests/evaluators/test_AddConceptInSetEvaluator.py +++ b/tests/evaluators/test_AddConceptInSetEvaluator.py @@ -1,14 +1,14 @@ import pytest - from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts from core.concept import Concept from core.tokenizer import Tokenizer from evaluators.AddConceptInSetEvaluator import AddConceptInSetEvaluator from parsers.DefaultParser import IsaConceptNode, NameNode + from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka -def get_ret_val(concept_name, concept_set_name): +def get_isa_ret_val(concept_name, concept_set_name): n1 = NameNode(list(Tokenizer(concept_name))) n2 = NameNode(list(Tokenizer(concept_set_name))) @@ -30,7 +30,7 @@ class TestAddConceptInSetEvaluator(TestUsingMemoryBasedSheerka): def test_i_cannot_add_if_the_concept_does_not_exists(self): context = self.get_context() - ret_val = get_ret_val("foo", "bar") + ret_val = get_isa_ret_val("foo", "bar") res = AddConceptInSetEvaluator().eval(context, ret_val) assert not res.status @@ -43,7 +43,7 @@ class TestAddConceptInSetEvaluator(TestUsingMemoryBasedSheerka): context.sheerka.set_id_if_needed(foo, False) context.sheerka.add_in_cache(foo) - ret_val = get_ret_val("foo", "bar") + ret_val = get_isa_ret_val("foo", "bar") res = AddConceptInSetEvaluator().eval(context, ret_val) assert not res.status @@ -52,10 +52,10 @@ class TestAddConceptInSetEvaluator(TestUsingMemoryBasedSheerka): def test_i_can_add_concept_to_a_set_of_concept(self): sheerka, context, foo, bar = self.init_concepts("foo", "bar", create_new=True) - ret_val = get_ret_val("foo", "bar") + ret_val = get_isa_ret_val("foo", "bar") res = AddConceptInSetEvaluator().eval(context, ret_val) - foo = sheerka.new("foo") # reload it + foo = sheerka.new("foo") # get a new instance assert res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.SUCCESS) @@ -63,8 +63,7 @@ class TestAddConceptInSetEvaluator(TestUsingMemoryBasedSheerka): assert context.sheerka.isinset(foo, bar) assert context.sheerka.isa(foo, bar) - foo_from_sheerka = context.sheerka.get("foo") - assert foo_from_sheerka.get_prop(BuiltinConcepts.ISA) == [bar] + assert foo.get_prop(BuiltinConcepts.ISA) == {bar} def test_i_can_add_bnf_concept_to_a_set_of_concept(self): """ @@ -76,11 +75,11 @@ class TestAddConceptInSetEvaluator(TestUsingMemoryBasedSheerka): sheerka, context, one, two, foo, bar = self.init_concepts( "one", "two", - Concept("foo", definition="(one|two)=a 'plus' (one|two)=b", body="a + b").def_prop("a").def_prop("b"), + Concept("foo", definition="(one|two)=a 'plus' (one|two)=b", body="a + b").def_var("a").def_var("b"), "bar", create_new=True) - ret_val = get_ret_val("foo", "bar") + ret_val = get_isa_ret_val("foo", "bar") res = AddConceptInSetEvaluator().eval(context, ret_val) foo = sheerka.new("foo") # reload it @@ -90,8 +89,8 @@ class TestAddConceptInSetEvaluator(TestUsingMemoryBasedSheerka): assert context.sheerka.isinset(foo, bar) assert context.sheerka.isa(foo, bar) - foo_from_sheerka = context.sheerka.get("foo") - assert foo_from_sheerka.get_prop(BuiltinConcepts.ISA) == [bar] + foo_from_sheerka = context.sheerka.get_by_key("foo") + assert foo_from_sheerka.get_prop(BuiltinConcepts.ISA) == {bar} def test_i_can_add_concept_with_a_body_to_a_set_of_concept(self): context = self.get_context() @@ -101,23 +100,18 @@ class TestAddConceptInSetEvaluator(TestUsingMemoryBasedSheerka): bar = Concept("bar") context.sheerka.create_new_concept(context, bar) - ret_val = get_ret_val("foo", "bar") + ret_val = get_isa_ret_val("foo", "bar") res = AddConceptInSetEvaluator().eval(context, ret_val) assert res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.SUCCESS) def test_i_cannot_add_the_same_concept_twice(self): - context = self.get_context() - foo = Concept("foo") - context.sheerka.create_new_concept(context, foo) + sheerka, context, foo, bar = self.init_concepts("foo", "bar", create_new=True) - bar = Concept("bar") - context.sheerka.create_new_concept(context, bar) - - ret_val = get_ret_val("foo", "bar") + ret_val = get_isa_ret_val("foo", "bar") AddConceptInSetEvaluator().eval(context, ret_val) - res = AddConceptInSetEvaluator().eval(context, ret_val) + res = AddConceptInSetEvaluator().eval(context, ret_val) # again assert not res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.CONCEPT_ALREADY_IN_SET) diff --git a/tests/evaluators/test_ConceptEvaluator.py b/tests/evaluators/test_ConceptEvaluator.py index 68a1688..767b58c 100644 --- a/tests/evaluators/test_ConceptEvaluator.py +++ b/tests/evaluators/test_ConceptEvaluator.py @@ -24,7 +24,7 @@ class TestAddConceptEvaluator(TestUsingMemoryBasedSheerka): concept = Concept(name="foo", where="True", pre="2", - post="3").def_prop("a", "4").def_prop("b", "5") + post="3").def_var("a", "4").def_var("b", "5") evaluator = ConceptEvaluator() item = self.pretval(concept) @@ -33,11 +33,11 @@ class TestAddConceptEvaluator(TestUsingMemoryBasedSheerka): assert result.who == evaluator.name assert result.status assert result.value.name == "foo" - assert result.value.get_metadata_value(ConceptParts.WHERE) == True - assert result.value.get_metadata_value(ConceptParts.PRE) == 2 - assert result.value.get_metadata_value(ConceptParts.POST) == 3 - assert result.value.get_prop("a") == 4 - assert result.value.get_prop("b") == 5 + assert result.value.get_value(ConceptParts.WHERE) == True + assert result.value.get_value(ConceptParts.PRE) == 2 + assert result.value.get_value(ConceptParts.POST) == 3 + assert result.value.get_value("a") == 4 + assert result.value.get_value("b") == 5 assert result.value.key == "foo" assert result.parents == [item] @@ -48,7 +48,7 @@ class TestAddConceptEvaluator(TestUsingMemoryBasedSheerka): body="'I have a value'", where="True", pre="2", - post="3").set_prop("a", "4").set_prop("b", "5") + post="3").set_value("a", "4").set_value("b", "5") evaluator = ConceptEvaluator(return_body=True) item = self.pretval(concept) @@ -65,7 +65,7 @@ class TestAddConceptEvaluator(TestUsingMemoryBasedSheerka): body="'I have a value'", where="True", pre="2", - post="3").set_prop("a", "4").set_prop("b", "5") + post="3").set_value("a", "4").set_value("b", "5") evaluator = ConceptEvaluator(return_body=False) # which is the default behaviour item = self.pretval(concept) @@ -77,11 +77,11 @@ class TestAddConceptEvaluator(TestUsingMemoryBasedSheerka): assert result.parents == [item] def test_i_can_eval_if_with_the_same_name_is_defined_in_the_context(self): - # If we evaluate Concept("foo", body="a").set_prop("a", "'property_a'") + # If we evaluate Concept("foo", body="a").set_value("a", "'property_a'") # ConceptEvaluator will be called to resolve 'a' while we know that 'a' refers to the string 'property_a' context = self.get_context() - context.obj = Concept("other").set_prop("foo", "'some_other_value'") + context.obj = Concept("other").set_value("foo", "'some_other_value'") concept = Concept(name="foo") item = self.pretval(concept) @@ -95,8 +95,8 @@ class TestAddConceptEvaluator(TestUsingMemoryBasedSheerka): context.local_hints.add(BuiltinConcepts.EVAL_BODY_REQUESTED) context.sheerka.add_in_cache(Concept(name="one").init_key()) concept_plus = context.sheerka.add_in_cache(Concept(name="a plus b") - .def_prop("a", "one") - .def_prop("b", "two").init_key()) + .def_var("a", "one") + .def_var("b", "two").init_key()) evaluator = ConceptEvaluator() item = self.pretval(concept_plus) diff --git a/tests/evaluators/test_EvalEvaluator.py b/tests/evaluators/test_EvalEvaluator.py index e586fb1..9189beb 100644 --- a/tests/evaluators/test_EvalEvaluator.py +++ b/tests/evaluators/test_EvalEvaluator.py @@ -75,4 +75,4 @@ class TestEvalEvaluator(TestUsingMemoryBasedSheerka): assert len(evaluated) == 1 assert evaluated[0].status - assert evaluated[0].body == [foo, bar, baz] + assert set(evaluated[0].body) == {foo, bar, baz} diff --git a/tests/evaluators/test_LexerNodeEvaluator.py b/tests/evaluators/test_LexerNodeEvaluator.py index 8306364..3e4ed7a 100644 --- a/tests/evaluators/test_LexerNodeEvaluator.py +++ b/tests/evaluators/test_LexerNodeEvaluator.py @@ -1,20 +1,21 @@ import ast -import pytest +import pytest from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts from core.concept import Concept, ConceptParts, DoNotResolve from evaluators.LexerNodeEvaluator import LexerNodeEvaluator from parsers.BaseNodeParser import SourceCodeNode -from parsers.BnfNodeParser import ConceptNode, BnfNodeParser, StrMatch, UnrecognizedTokensNode +from parsers.BnfNodeParser import ConceptNode, BnfNodeParser, UnrecognizedTokensNode from parsers.PythonParser import PythonNode + from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka class TestLexerNodeEvaluator(TestUsingMemoryBasedSheerka): - def from_parsing(self, context, grammar, expression): + def init_from_concepts(self, context, concepts, expression): parser = BnfNodeParser() - parser.initialize(context, grammar) + parser.init_from_concepts(context, concepts) ret_val = parser.parse(context, expression) assert ret_val.status @@ -31,18 +32,6 @@ class TestLexerNodeEvaluator(TestUsingMemoryBasedSheerka): return ReturnValueConcept("somme_name", True, ParserResultConcept(value=nodes)) - def init(self, concept, grammar, text): - context = self.get_context() - if isinstance(concept, list): - for c in concept: - context.sheerka.add_in_cache(c) - else: - context.sheerka.add_in_cache(concept) - ret_val = self.from_parsing(context, grammar, text) - node = ret_val.value.value[0] - - return context, node - @pytest.mark.parametrize("ret_val, expected", [ (ReturnValueConcept("some_name", True, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), True), (ReturnValueConcept("some_name", True, ParserResultConcept(value=ConceptNode(Concept(), 0, 0))), True), @@ -60,15 +49,13 @@ class TestLexerNodeEvaluator(TestUsingMemoryBasedSheerka): (ReturnValueConcept("some_name", True, ConceptNode(Concept(), 0, 0)), False), ]) def test_i_can_match(self, ret_val, expected): - context = self.get_context() + sheerka = self.get_sheerka(singleton=True) + context = self.get_context(sheerka) assert LexerNodeEvaluator().matches(context, ret_val) == expected def test_concept_is_returned_when_only_one_in_the_list(self): - foo = Concept("foo") - context = self.get_context() - context.sheerka.add_in_cache(foo) - - ret_val = self.from_parsing(context, {foo: StrMatch("foo")}, "foo") + sheerka, context, foo = self.init_concepts(self.bnf_concept("foo"), singleton=True) + ret_val = self.init_from_concepts(context, [foo], "foo") evaluator = LexerNodeEvaluator() result = evaluator.eval(context, ret_val) @@ -80,7 +67,7 @@ class TestLexerNodeEvaluator(TestUsingMemoryBasedSheerka): assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert wrapper.parser == evaluator assert wrapper.source == "foo" - assert return_value == Concept("foo").init_key() + assert return_value == foo assert return_value.compiled[ConceptParts.BODY] == DoNotResolve("foo") assert result.parents == [ret_val] diff --git a/tests/evaluators/test_PythonEvaluator.py b/tests/evaluators/test_PythonEvaluator.py index 0bec46b..60f4924 100644 --- a/tests/evaluators/test_PythonEvaluator.py +++ b/tests/evaluators/test_PythonEvaluator.py @@ -40,7 +40,7 @@ class TestPythonEvaluator(TestUsingMemoryBasedSheerka): @pytest.mark.parametrize("concept", [ Concept("foo"), Concept("foo", body="2"), - Concept("foo").set_prop("prop", "'a'"), + Concept("foo").def_var("prop", "'a'"), Concept("foo", body="bar") ]) def test_i_cannot_eval_simple_concept(self, concept): diff --git a/tests/non_reg/test_sheerka_non_reg.py b/tests/non_reg/test_sheerka_non_reg.py index 59f8f17..85b1e45 100644 --- a/tests/non_reg/test_sheerka_non_reg.py +++ b/tests/non_reg/test_sheerka_non_reg.py @@ -1,15 +1,15 @@ import pytest from core.builtin_concepts import BuiltinConcepts -from core.concept import Concept, PROPERTIES_TO_SERIALIZE, Property, simplec +from core.concept import Concept, PROPERTIES_TO_SERIALIZE, simplec from evaluators.MutipleSameSuccessEvaluator import MultipleSameSuccessEvaluator from parsers.BaseNodeParser import SyaAssociativity from parsers.BnfNodeParser import Sequence, StrMatch, OrderedChoice, Optional, ConceptExpression -from sdp.sheerkaDataProvider import SheerkaDataProvider from tests.TestUsingFileBasedSheerka import TestUsingFileBasedSheerka +from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka -class TestSheerkaNonReg(TestUsingFileBasedSheerka): +class TestSheerkaNonRegMemory(TestUsingMemoryBasedSheerka): def init_scenario(self, init_expressions): sheerka = self.get_sheerka() @@ -105,10 +105,10 @@ as: expected = self.get_default_concept() expected.metadata.id = "1001" expected.metadata.desc = None - expected.metadata.props = [("a", None), ("b", None)] + expected.metadata.variables = [("a", None), ("b", None)] expected.init_key() - sheerka = self.get_sheerka() + sheerka = self.get_sheerka(cache_only=False) res = sheerka.evaluate_user_input(text) assert len(res) == 1 @@ -120,21 +120,15 @@ as: for prop in PROPERTIES_TO_SERIALIZE: assert getattr(concept_saved.metadata, prop) == getattr(expected.metadata, prop) - assert concept_saved.key in sheerka.cache_by_key - assert concept_saved.id in sheerka.cache_by_id - assert sheerka.sdp.io.exists( - sheerka.sdp.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, concept_saved.get_origin())) + # cache is up to date + assert sheerka.has_key(concept_saved.key) + assert sheerka.has_id(concept_saved.id) + assert sheerka.has_name(concept_saved.name) + assert sheerka.has_hash(concept_saved.get_definition_hash()) + assert sheerka.cache_manager.copy(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {'+': ['1001']} - def test_i_can_def_several_concepts(self): - sheerka = self.get_sheerka(use_dict=False) - sheerka.evaluate_user_input("def concept foo") - - sheerka = self.get_sheerka(use_dict=False) - res = sheerka.evaluate_user_input("def concept bar") - - assert len(res) == 1 - assert res[0].status - assert res[0].body.body.id == "1002" + # sdp is up to date + assert sheerka.sdp.exists(sheerka.CONCEPTS_BY_KEY_ENTRY, expected.key) def test_i_can_evaluate_def_concept_part_when_one_part_is_a_ref_of_another_concept(self): """ @@ -145,11 +139,11 @@ as: sheerka = self.get_sheerka() # concept 'a plus b' is known - concept_a_plus_b = Concept(name="a plus b").def_prop("a").def_prop("b") + concept_a_plus_b = Concept(name="a plus b").def_var("a").def_var("b").init_key() sheerka.add_in_cache(concept_a_plus_b) res = sheerka.evaluate_user_input("def concept a xx b as a plus b") - expected = Concept(name="a xx b", body="a plus b").def_prop("a").def_prop("b").init_key() + expected = Concept(name="a xx b", body="a plus b").def_var("a").def_var("b").init_key() expected.metadata.id = "1001" assert len(res) == 1 @@ -161,10 +155,7 @@ as: for prop in PROPERTIES_TO_SERIALIZE: assert getattr(concept_saved.metadata, prop) == getattr(expected.metadata, prop) - assert concept_saved.key in sheerka.cache_by_key - assert concept_saved.id in sheerka.cache_by_id - assert sheerka.sdp.io.exists( - sheerka.sdp.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, concept_saved.get_origin())) + assert sheerka.has_key(concept_saved.key) def test_i_cannot_evaluate_the_same_def_concept_twice(self): text = """ @@ -202,7 +193,7 @@ as: def test_i_can_recognize_concept_with_variable(self): sheerka = self.get_sheerka() - concept_hello = Concept(name="hello a").def_prop("a") + concept_hello = Concept(name="hello a").def_var("a") concept_foo = Concept(name="foo") sheerka.add_in_cache(concept_hello) sheerka.add_in_cache(concept_foo) @@ -212,15 +203,15 @@ as: assert len(res) == 1 assert res[0].status assert sheerka.isinstance(return_value, concept_hello) - assert return_value.metadata.props[0] == ('a', "foo") + assert return_value.metadata.variables[0] == ('a', "foo") # sanity check evaluated = sheerka.evaluate_concept(self.get_context(eval_body=True), return_value) - assert evaluated.props["a"].value == concept_foo + assert evaluated.get_value("a") == concept_foo def test_i_can_recognize_concept_with_variable_and_python_as_body(self): sheerka = self.get_sheerka() - hello_a = sheerka.add_in_cache(Concept(name="hello a", body="'hello ' + a").def_prop("a")) + hello_a = sheerka.add_in_cache(Concept(name="hello a", body="'hello ' + a").def_var("a")) sheerka.add_in_cache(Concept(name="foo", body="'foo'")) res = sheerka.evaluate_user_input("hello foo") @@ -232,15 +223,14 @@ as: evaluated = sheerka.evaluate_concept(self.get_context(eval_body=True), res[0].value) assert evaluated.body == "hello foo" assert evaluated.metadata.is_evaluated - assert evaluated.props["a"].value == simplec("foo", "foo") - assert evaluated.props["a"].value.metadata.is_evaluated + assert evaluated.get_value("a") == simplec("foo", "foo") + assert evaluated.get_value("a").metadata.is_evaluated def test_i_can_recognize_duplicate_concepts_with_same_value(self): sheerka = self.get_sheerka() - - sheerka.add_in_cache(Concept(name="hello a", body="'hello ' + a").def_prop("a")) - sheerka.add_in_cache(Concept(name="hello foo", body="'hello foo'")) - sheerka.add_in_cache(Concept(name="foo", body="'foo'")) + self.create_concept_lite(sheerka, Concept(name="hello a", body="'hello ' + a"), variables=["a"]) + self.create_concept_lite(sheerka, Concept(name="hello foo", body="'hello foo'")) + self.create_concept_lite(sheerka, Concept(name="foo", body="'foo'")) res = sheerka.evaluate_user_input("hello foo") assert len(res) == 1 @@ -250,10 +240,9 @@ as: def test_i_cannot_manage_duplicate_concepts_when_the_values_are_different(self): sheerka = self.get_sheerka() - - sheerka.add_in_cache(Concept(name="hello a", body="'hello ' + a").def_prop("a")) - sheerka.add_in_cache(Concept(name="hello foo", body="'hello foo'")) - sheerka.add_in_cache(Concept(name="foo", body="'another value'")) + self.create_concept_lite(sheerka, Concept(name="hello a", body="'hello ' + a"), variables=["a"]) + self.create_concept_lite(sheerka, Concept(name="hello foo", body="'hello foo'")) + self.create_concept_lite(sheerka, Concept(name="foo", body="'another value'")) res = sheerka.evaluate_user_input("hello foo") assert len(res) == 1 @@ -270,8 +259,8 @@ as: sheerka = self.get_sheerka() context = self.get_context(sheerka) - sheerka.create_new_concept(context, Concept(name="hello a", body="'hello ' + a").def_prop("a")) - sheerka.create_new_concept(context, Concept(name="hello b", body="'hello ' + b").def_prop("b")) + sheerka.create_new_concept(context, Concept(name="hello a", body="'hello ' + a").def_var("a")) + sheerka.create_new_concept(context, Concept(name="hello b", body="'hello ' + b").def_var("b")) res = sheerka.evaluate_user_input("hello 'foo'") assert len(res) == 1 @@ -283,42 +272,12 @@ as: sheerka = self.get_sheerka() context = self.get_context(sheerka) - sheerka.create_new_concept(context, Concept(name="concepts", body="sheerka.concepts()")) + sheerka.create_new_concept(context, Concept(name="concepts", body="sheerka.test()")) res = sheerka.evaluate_user_input("eval concepts") assert len(res) == 1 assert res[0].status - assert isinstance(res[0].value, list) - - def test_i_can_create_concept_with_bnf_definition(self): - sheerka = self.get_sheerka(use_dict=False, skip_builtins_in_db=False) - a = Concept("a") - sheerka.add_in_cache(a) - sheerka.concepts_definition_cache = {a: OrderedChoice("one", "two")} - - res = sheerka.evaluate_user_input("def concept plus from bnf a ('plus' plus)?") - assert len(res) == 1 - assert res[0].status - assert sheerka.isinstance(res[0].value, BuiltinConcepts.NEW_CONCEPT) - - saved_concept = sheerka.sdp.get_safe(sheerka.CONCEPTS_ENTRY, "plus") - assert saved_concept.key == "plus" - assert saved_concept.metadata.definition == "a ('plus' plus)?" - assert "a" in saved_concept.props - assert "plus" in saved_concept.props - - saved_definitions = sheerka.sdp.get_safe(sheerka.CONCEPTS_DEFINITIONS_ENTRY) - expected_bnf = Sequence( - ConceptExpression(a, rule_name="a"), - Optional(Sequence(StrMatch("plus"), ConceptExpression(saved_concept, rule_name="plus")))) - assert saved_definitions["c:plus|1001:"] == "(c:a:=a ('plus' c:plus|1001:=plus)?)" - - new_concept = res[0].value.body - assert new_concept.metadata.name == "plus" - assert new_concept.metadata.definition == "a ('plus' plus)?" - assert new_concept.bnf == expected_bnf - assert "a" in new_concept.props - assert "plus" in new_concept.props + assert res[0].value == sheerka.test() def test_i_can_recognize_bnf_definitions(self): sheerka = self.get_sheerka() @@ -344,41 +303,11 @@ as: assert sheerka.isinstance(return_value, concept_b) # sanity check - evaluated = sheerka.evaluate_concept(self.get_context(eval_body=True), return_value) + evaluated = sheerka.evaluate_concept(self.get_context(sheerka=sheerka, eval_body=True), return_value) assert evaluated.body == "one three" assert evaluated.metadata.is_evaluated - assert evaluated.props["a"] == Property("a", sheerka.new(concept_a.key, body="one").init_key()) - assert evaluated.props["a"].value.metadata.is_evaluated - - def test_i_can_recognize_bnf_definitions_from_separate_instances(self): - """ - Same test then before, - but make sure that the BNF are correctly persisted and loaded - """ - sheerka = self.get_sheerka(use_dict=False) - concept_a = sheerka.evaluate_user_input("def concept a from bnf 'one' 'two'")[0].body.body - - res = self.get_sheerka(use_dict=False).evaluate_user_input("one two") - assert len(res) == 1 - assert res[0].status - assert sheerka.isinstance(res[0].value, concept_a) - - # add another bnf definition - concept_b = sheerka.evaluate_user_input("def concept b from bnf a 'three'")[0].body.body - - res = self.get_sheerka(use_dict=False).evaluate_user_input("one two") # previous one still works - assert len(res) == 1 - assert res[0].status - assert sheerka.isinstance(res[0].value, concept_a) - - res = self.get_sheerka(use_dict=False).evaluate_user_input("one two three") # new one works - assert len(res) == 1 - assert res[0].status - assert sheerka.isinstance(res[0].value, concept_b) - - evaluated = sheerka.evaluate_concept(self.get_context(eval_body=True), res[0].value) - assert evaluated.body == "one two three" - assert evaluated.props["a"] == Property("a", sheerka.new(concept_a.key, body="one two").init_key()) + assert evaluated.get_value("a") == sheerka.new(concept_a.key, body="one").init_key() + assert evaluated.get_value("a").metadata.is_evaluated @pytest.mark.parametrize("user_input", [ "def concept greetings from def hello a where a", @@ -393,7 +322,7 @@ as: assert res[0].status concept_found = res[0].value assert sheerka.isinstance(concept_found, greetings) - assert concept_found.get_prop("a") == "foo" + assert concept_found.get_value("a") == "foo" assert concept_found.metadata.need_validation res = sheerka.evaluate_user_input("greetings") @@ -401,10 +330,9 @@ as: assert res[0].status concept_found = res[0].value assert sheerka.isinstance(concept_found, greetings) - assert concept_found.get_prop("a") is None + assert concept_found.get_value("a") is None assert not concept_found.metadata.need_validation - # @pytest.mark.xfail @pytest.mark.parametrize("desc, definitions", [ ("Simple form", [ "def concept one as 1", @@ -452,7 +380,7 @@ as: assert len(res) == 1 assert res[0].status assert sheerka.isinstance(res[0].body, "twenties") - assert sheerka.evaluate_concept(self.get_context(eval_body=True), res[0].body).body == 21 + assert sheerka.evaluate_concept(self.get_context(sheerka=sheerka, eval_body=True), res[0].body).body == 21 res = sheerka.evaluate_user_input("twenty one + 1") assert len(res) == 1 @@ -479,7 +407,6 @@ as: assert res[0].status assert res[0].body == 23 - # @pytest.mark.xfail def test_i_can_mix_bnf_and_isa(self): """ if 'one' isa 'number, twenty number should be recognized @@ -497,7 +424,7 @@ as: assert len(res) == 1 assert res[0].status assert sheerka.isinstance(res[0].body, "twenties") - assert sheerka.evaluate_concept(self.get_context(eval_body=True), res[0].value).body == 21 + assert sheerka.evaluate_concept(self.get_context(sheerka=sheerka, eval_body=True), res[0].value).body == 21 res = sheerka.evaluate_user_input("twenty one + 1") assert len(res) == 1 @@ -524,7 +451,7 @@ as: assert res[0].status assert res[0].body == 23 - def test_i_can_mix_bnf_and_isa_when_concept_other_case(self): + def test_i_can_mix_bnf_and_isa_2(self): sheerka = self.get_sheerka() init = [ @@ -544,7 +471,6 @@ as: assert res[0].status assert res[0].body == 21 - # @pytest.mark.xfail def test_i_can_use_concepts_defined_with_from(self): sheerka = self.get_sheerka() @@ -656,31 +582,6 @@ as: assert res[0].status assert res[0].body == 3 - @pytest.mark.xfail - def test_i_can_recognize_composition_of_concept(self): - sheerka = self.get_sheerka() - - definitions = [ - "def concept little a where a", - "def concept blue a where a", - "def concept house" - ] - - for definition in definitions: - sheerka.evaluate_user_input(definition) - - ### CAUTION #### - # this test cannot work !! - # it is just to hint the result that I would like to achieve - - res = sheerka.evaluate_user_input("little blue house") - assert len(res) == 2 - assert res[0].status - assert res[0].body == "little(blue(house))" - - assert res[1].status - assert res[1].body == "little blue(house)" - def test_i_can_say_that_a_concept_isa_another_concept(self): sheerka = self.get_sheerka() sheerka.evaluate_user_input("def concept foo") @@ -779,36 +680,35 @@ as: sheerka.evaluate_user_input(exp) res = sheerka.evaluate_user_input("twenty one") - assert len(res) == 1 and res[0].status and sheerka.isinstance(res[0].body, "twenties") + assert len(res) == 1 + assert res[0].status + assert sheerka.isinstance(res[0].body, "twenties") res = sheerka.evaluate_user_input("eval twenty one") - assert len(res) == 1 and res[0].status and res[0].body == 21 + assert len(res) == 1 + assert res[0].status + assert res[0].body == 21 res = sheerka.evaluate_user_input("twenty two") - assert len(res) == 1 and res[0].status and sheerka.isinstance(res[0].body, "twenties") + assert len(res) == 1 + assert res[0].status + assert sheerka.isinstance(res[0].body, "twenties") res = sheerka.evaluate_user_input("eval twenty two") - assert len(res) == 1 and res[0].status and res[0].body == 22 + assert len(res) == 1 + assert res[0].status + assert res[0].body == 22 res = sheerka.evaluate_user_input("twenty three") assert len(res) == 1 assert not res[0].status - assert sheerka.isinstance(res[0].body, BuiltinConcepts.WHERE_CLAUSE_FAILED) + assert sheerka.isinstance(res[0].body, BuiltinConcepts.MULTIPLE_ERRORS) + assert str(BuiltinConcepts.WHERE_CLAUSE_FAILED) in [error.key for error in sheerka.get_error(res[0].body.body)] res = sheerka.evaluate_user_input("eval twenty three") assert len(res) == 1 assert not res[0].status - assert sheerka.isinstance(res[0].body, BuiltinConcepts.WHERE_CLAUSE_FAILED) - - - # def test_i_can_detect_when_only_one_evaluator_is_in_error(self): - # sheerka = self.get_sheerka() - # - # sheerka.evaluate_user_input("def concept 1 as one") - # res = sheerka.evaluate_user_input("eval 1") - # assert len(res) == 1 - # assert not res[0].status - # assert sheerka.isinstance(res[0].body, BuiltinConcepts.CONCEPT_EVAL_ERROR) + assert str(BuiltinConcepts.WHERE_CLAUSE_FAILED) in [error.key for error in sheerka.get_error(res[0].body.body)] def test_i_can_manage_some_type_of_infinite_recursion(self): sheerka = self.get_sheerka() @@ -862,7 +762,7 @@ as: assert not res[0].status assert sheerka.isinstance(res[0].body, BuiltinConcepts.WHERE_CLAUSE_FAILED) - @pytest.mark.xfail + @pytest.mark.skip("Not ready for that") def test_i_can_manage_missing_variables_from_bnf_parsing(self): definitions = [ "def concept one as 1", @@ -894,8 +794,8 @@ as: assert len(res) == 1 assert res[0].status - twenties = sheerka.get("twenties") - number = sheerka.get("number") + twenties = sheerka.get_by_key("twenties") + number = sheerka.get_by_key("number") assert sheerka.isa(twenties, number) def test_i_can_mix_sya_concepts_and_bnf_concept(self): @@ -910,9 +810,9 @@ as: sheerka = self.init_scenario(definitions) context = self.get_context(sheerka) - sheerka.set_sya_def(context, [ - (sheerka.get("mult").id, 20, SyaAssociativity.Right), - (sheerka.get("plus").id, 10, SyaAssociativity.Right), + sheerka.force_sya_def(context, [ + (sheerka.get_by_name("mult").id, 20, SyaAssociativity.Right), + (sheerka.get_by_name("plus").id, 10, SyaAssociativity.Right), ]) res = sheerka.evaluate_user_input("eval one plus two mult three") @@ -954,3 +854,71 @@ as: assert len(res) == 1 assert res[0].status assert res[0].body == 64 + + +class TestSheerkaNonRegFile(TestUsingFileBasedSheerka): + def test_i_can_def_several_concepts(self): + sheerka = self.get_sheerka() + sheerka.evaluate_user_input("def concept foo") + + sheerka = self.get_sheerka() + res = sheerka.evaluate_user_input("def concept bar") + + assert len(res) == 1 + assert res[0].status + assert res[0].body.body.id == "1002" + + def test_i_can_create_concept_with_bnf_definition(self): + sheerka = self.get_sheerka() + concept_a = self.bnf_concept("a", expression=OrderedChoice(StrMatch("one"), StrMatch("two"))) + sheerka.create_new_concept(self.get_context(sheerka), concept_a) + + res = sheerka.evaluate_user_input("def concept plus from bnf a ('plus' plus)?") + assert len(res) == 1 + assert res[0].status + assert sheerka.isinstance(res[0].value, BuiltinConcepts.NEW_CONCEPT) + + saved_concept = sheerka.sdp.get(sheerka.CONCEPTS_BY_KEY_ENTRY, "plus") + assert saved_concept.key == "plus" + assert saved_concept.metadata.definition == "a ('plus' plus)?" + assert "a" in saved_concept.values + assert "plus" in saved_concept.values + + expected_bnf = Sequence( + ConceptExpression(concept_a, rule_name="a"), + Optional(Sequence(StrMatch("plus"), ConceptExpression("plus")))) + + new_concept = res[0].value.body + assert new_concept.metadata.name == "plus" + assert new_concept.metadata.definition == "a ('plus' plus)?" + assert new_concept.bnf == expected_bnf + assert "a" in new_concept.values + assert "plus" in new_concept.values + + def test_i_can_recognize_bnf_definitions_from_separate_instances(self): + sheerka = self.get_sheerka() + concept_a = sheerka.evaluate_user_input("def concept a from bnf 'one' 'two'")[0].body.body + + sheerka = self.get_sheerka() + res = sheerka.evaluate_user_input("one two") + assert len(res) == 1 + assert res[0].status + assert sheerka.isinstance(res[0].value, concept_a) + + # add another bnf definition + concept_b = sheerka.evaluate_user_input("def concept b from bnf a 'three'")[0].body.body + + sheerka = self.get_sheerka() + res = sheerka.evaluate_user_input("one two") # previous one still works + assert len(res) == 1 + assert res[0].status + assert sheerka.isinstance(res[0].value, concept_a) + + res = self.get_sheerka().evaluate_user_input("one two three") # new one works + assert len(res) == 1 + assert res[0].status + assert sheerka.isinstance(res[0].value, concept_b) + + evaluated = sheerka.evaluate_concept(self.get_context(eval_body=True), res[0].value) + assert evaluated.body == "one two three" + assert evaluated.get_value("a") == sheerka.new(concept_a.key, body="one two").init_key() diff --git a/tests/parsers/parsers_utils.py b/tests/parsers/parsers_utils.py index 3772c75..50d6901 100644 --- a/tests/parsers/parsers_utils.py +++ b/tests/parsers/parsers_utils.py @@ -1,6 +1,7 @@ -from core.concept import CC, Concept +from core.concept import CC, Concept, ConceptParts, DoNotResolve from core.tokenizer import Tokenizer, TokenKind, Token -from parsers.BaseNodeParser import scnode, utnode, cnode, SCWC, CNC, short_cnode, SourceCodeWithConceptNode, CN, UTN +from parsers.BaseNodeParser import scnode, utnode, cnode, SCWC, CNC, short_cnode, SourceCodeWithConceptNode, CN, UTN, \ + SCN from parsers.SyaNodeParser import SyaConceptParserHelper @@ -43,7 +44,16 @@ def compute_debug_array(res): return to_compare -def get_node(concepts_map, expression_as_tokens, sub_expr, concept_key=None, skip=0, is_bnf=False, sya=False): +def get_node( + concepts_map, + expression_as_tokens, + sub_expr, + concept_key=None, + skip=0, + is_bnf=False, + sya=False, + init_empty_body=False, + exclude_body=False): """ Tries to find sub in expression When found, transform it to its correct type @@ -54,6 +64,8 @@ def get_node(concepts_map, expression_as_tokens, sub_expr, concept_key=None, ski :param skip: number of occurrences of sub_expr to skip :param is_bnf: True if the concept to search is a bnf definition :param sya: Return SyaConceptParserHelper instead of a ConceptNode when needed + :param init_empty_body: if True adds the source in the body (actually in compiled.BODY) + :param exclude_body: Ask to not compare body :return: """ if sub_expr == "')'": @@ -80,23 +92,38 @@ def get_node(concepts_map, expression_as_tokens, sub_expr, concept_key=None, ski content = [get_node(concepts_map, expression_as_tokens, c, sya=sya) for c in sub_expr.content] return SourceCodeWithConceptNode(first, last, content).pseudo_fix_source() + if isinstance(sub_expr, SCN): + node = get_node(concepts_map, expression_as_tokens, sub_expr.source, sya=sya) + sub_expr.fix_pos(node) + return sub_expr + if isinstance(sub_expr, (CNC, CC, CN)): concept_node = get_node( concepts_map, expression_as_tokens, sub_expr.source or sub_expr.concept_key, sub_expr.concept_key, sya=sya) + if not hasattr(concept_node, "concept"): + raise Exception(f"'{sub_expr.concept_key}' is not a concept. Check your map.") concept_found = concept_node.concept sub_expr.concept_key = concept_found.key sub_expr.concept = concept_found sub_expr.fix_pos((concept_node.start, concept_node.end if hasattr(concept_node, "end") else concept_node.start)) if hasattr(sub_expr, "compiled"): for k, v in sub_expr.compiled.items(): - node = get_node(concepts_map, expression_as_tokens, v, sya=sya) # need to get start and end positions - new_value = CC(Concept().update_from(concepts_map[v])) if (isinstance(v, str) and v in concepts_map) \ - else node + node = get_node(concepts_map, expression_as_tokens, v, sya=sya, exclude_body=exclude_body) # need to get start and end positions + if isinstance(v, str) and v in concepts_map: + new_value_concept = concepts_map[v] + new_value = CC(Concept().update_from(new_value_concept), exclude_body=exclude_body) + if init_empty_body: + init_body(new_value, concept_found, v) + else: + new_value = node + sub_expr.compiled[k] = new_value sub_expr.fix_pos(node) + if init_empty_body: + init_body(sub_expr, concept_found, sub_expr.source) if hasattr(sub_expr, "fix_source"): sub_expr.fix_source(expression_as_tokens[sub_expr.start: sub_expr.end + 1]) @@ -119,32 +146,54 @@ def get_node(concepts_map, expression_as_tokens, sub_expr, concept_key=None, ski # special case of python source code if "+" in sub_expr and sub_expr.strip() != "+": - return scnode(start, start + length - 1, sub_expr) + return SCN(sub_expr, start, start + length - 1) # try to match one of the concept from the map concept_key = concept_key or sub_expr concept_found = concepts_map.get(concept_key, None) if concept_found: concept_found = Concept().update_from(concept_found) # make a copy when massively used in tests - if not sya or len(concept_found.metadata.props) == 0 or is_bnf: - # if it's an atom, then return a ConceptNode - return CN(concept_found, start, start + length - 1, source=sub_expr) - else: - # else return a ParserHelper + if sya and len(concept_found.metadata.variables) > 0 and not is_bnf: return SyaConceptParserHelper(concept_found, start) + elif init_empty_body: + node = CNC(concept_found, start, start + length - 1, source=sub_expr, exclude_body=exclude_body) + init_body(node, concept_found, sub_expr) + return node + else: + return CN(concept_found, start, start + length - 1, source=sub_expr) else: # else an UnrecognizedTokensNode return utnode(start, start + length - 1, sub_expr) -def compute_expected_array(concepts_map, expression, expected, sya=False): +def init_body(item, concept, value): + if "body" in item.compiled: + item.compiled[ConceptParts.BODY] = item.compiled["body"] + del (item.compiled["body"]) + return + + if not concept or concept.metadata.body or ConceptParts.BODY in item.compiled: + return + + item.compiled[ConceptParts.BODY] = DoNotResolve(value) + + +def compute_expected_array(concepts_map, expression, expected, sya=False, init_empty_body=False, exclude_body=False): """ Computes a simple but sufficient version of the result of infix_to_postfix() :param concepts_map: :param expression: :param expected: :param sya: if true, generate an SyaConceptParserHelper instead of a cnode + :param init_empty_body: if True adds the source in the body (actually in compiled.BODY) + :param exclude_body: do not include ConceptParts.BODY in comparison :return: """ expression_as_tokens = [token.value for token in Tokenizer(expression) if token.type != TokenKind.EOF] - return [get_node(concepts_map, expression_as_tokens, sub_expr, sya=sya) for sub_expr in expected] + return [get_node( + concepts_map, + expression_as_tokens, + sub_expr, + sya=sya, + init_empty_body=init_empty_body, + exclude_body=exclude_body) for sub_expr in expected] diff --git a/tests/parsers/test_AtomsParser.py b/tests/parsers/test_AtomsParser.py index a18a980..e8d6aca 100644 --- a/tests/parsers/test_AtomsParser.py +++ b/tests/parsers/test_AtomsParser.py @@ -2,18 +2,21 @@ import pytest from core.builtin_concepts import BuiltinConcepts from core.concept import Concept from parsers.AtomNodeParser import AtomNodeParser -from parsers.BaseNodeParser import cnode, utnode, CNC +from parsers.BaseNodeParser import cnode, utnode, CNC, scnode, SCN from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka from tests.parsers.parsers_utils import compute_expected_array class TestAtomsParser(TestUsingMemoryBasedSheerka): - def init_parser(self, concepts_map, **kwargs): - sheerka, context, *updated_concepts = self.init_concepts(singleton=True, *concepts_map.values(), **kwargs) + def init_parser(self, my_map, create_new=False, singleton=True): + sheerka, context, *updated_concepts = self.init_concepts( + *my_map.values(), + create_new=create_new, + singleton=singleton) parser = AtomNodeParser() - parser.initialize(context, updated_concepts) + parser.init_from_concepts(context, updated_concepts) return sheerka, context, parser @@ -34,7 +37,7 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka): concepts_map = { "foo": Concept("foo"), "bar": Concept("bar"), - "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_prop("unit"), + "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"), } sheerka, context, parser = self.init_parser(concepts_map) @@ -57,7 +60,7 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka): concepts_map = { "foo bar": Concept("foo bar"), "one two three": Concept("one two three"), - "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_prop("unit"), + "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"), } sheerka, context, parser = self.init_parser(concepts_map) @@ -75,27 +78,27 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka): ("foo bar suffixed one", False, ["foo bar", " suffixed ", "one"]), ("foo bar one prefixed", False, ["foo bar", "one", " prefixed"]), ("foo bar one infix two", False, ["foo bar", "one", " infix ", "two"]), - ("foo bar 1 + 1", True, ["foo bar", " 1 + 1"]), + ("foo bar 1 + 1", False, ["foo bar", " 1 + 1"]), ("foo bar twenty one", False, ["foo bar", " twenty ", "one"]), ("foo bar x$!#", False, ["foo bar", " x$!#"]), ("suffixed one foo bar", False, ["suffixed ", "one", "foo bar"]), ("one prefixed foo bar", False, ["one", " prefixed ", "foo bar"]), ("one infix two foo bar", False, ["one", " infix ", "two", "foo bar"]), - ("1 + 1 foo bar", True, ["1 + 1 ", "foo bar"]), + ("1 + 1 foo bar", False, ["1 + 1 ", "foo bar"]), ("twenty one foo bar", False, ["twenty ", "one", "foo bar"]), ("x$!# foo bar", False, ["x$!# ", "foo bar"]), ("func(one)", False, ["func(", "one", ")"]), ]) def test_i_can_parse_when_unrecognized(self, text, expected_status, expected): concepts_map = { - "prefixed": Concept("a prefixed").def_prop("a"), - "suffixed": Concept("prefixed a").def_prop("a"), - "infix": Concept("a infix b").def_prop("a").def_prop("b"), + "prefixed": Concept("a prefixed").def_var("a"), + "suffixed": Concept("prefixed a").def_var("a"), + "infix": Concept("a infix b").def_var("a").def_var("b"), "foo bar": Concept("foo bar"), "one": Concept("one"), "two": Concept("two"), - "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_prop("unit"), + "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"), } sheerka, context, parser = self.init_parser(concepts_map) @@ -116,13 +119,13 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka): ]) def test_i_can_parse_when_surrounded_by_spaces(self, text, expected_status, expected): concepts_map = { - "prefixed": Concept("a prefixed").def_prop("a"), - "suffixed": Concept("prefixed a").def_prop("a"), - "infix": Concept("a infix b").def_prop("a").def_prop("b"), + "prefixed": Concept("a prefixed").def_var("a"), + "suffixed": Concept("prefixed a").def_var("a"), + "infix": Concept("a infix b").def_var("a").def_var("b"), "foo bar": Concept("foo bar"), "one": Concept("one"), "two": Concept("two"), - "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_prop("unit"), + "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"), } sheerka, context, parser = self.init_parser(concepts_map) @@ -201,8 +204,8 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka): ]) def test_i_cannot_parse_concepts_with_property_or_bnf_or_unrecognized(self, text): concepts_map = { - "foo": Concept("foo a").def_prop("a"), - "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_prop("unit"), + "foo": Concept("foo a").def_var("a"), + "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"), } sheerka, context, parser = self.init_parser(concepts_map) @@ -221,12 +224,12 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka): ]) def test_i_can_parse_when_unrecognized_yield_multiple_values(self, text, expected): concepts_map = { - "hello1": Concept("hello a").def_prop("a"), - "hello2": Concept("hello b").def_prop("b"), + "hello1": Concept("hello a").def_var("a"), + "hello2": Concept("hello b").def_var("b"), "bar": Concept("bar") } - sheerka, context, parser = self.init_parser(concepts_map, create_new=True) + sheerka, context, parser = self.init_parser(concepts_map, create_new=True, singleton=False) list_of_res = parser.parse(context, text) assert len(list_of_res) == len(expected) @@ -239,3 +242,23 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka): expected_array = compute_expected_array(concepts_map, text, expected[1]) assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert lexer_nodes == expected_array + + @pytest.mark.parametrize("text, expected", [ + ("1 + twenty one", [SCN("1 + twenty "), "one"]), + ("one + twenty one", ["one", SCN(" + twenty "), ("one", 1)]), + ]) + def test_source_code_found_must_be_considered_as_potential_false_positive(self, text, expected): + concepts_map = { + "one": Concept("one") + } + + sheerka, context, parser = self.init_parser(concepts_map) + res = parser.parse(context, text) + wrapper = res.body + lexer_nodes = res.body.body + + assert not res.status + + expected_array = compute_expected_array(concepts_map, text, expected) + assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert lexer_nodes == expected_array diff --git a/tests/parsers/test_BaseNodeParser.py b/tests/parsers/test_BaseNodeParser.py new file mode 100644 index 0000000..2de01c6 --- /dev/null +++ b/tests/parsers/test_BaseNodeParser.py @@ -0,0 +1,257 @@ +import pytest +from core.concept import Concept +from parsers.BaseNodeParser import BaseNodeParser +from parsers.BnfNodeParser import StrMatch, Sequence, OrderedChoice, Optional, ZeroOrMore, OneOrMore, ConceptExpression + +from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka + + +class TestBaseNodeParser(TestUsingMemoryBasedSheerka): + @pytest.mark.parametrize("concept, expected", [ + (Concept("foo"), {"foo": ["1001"]}), + (Concept("foo a").def_var("a"), {"foo": ["1001"]}), + (Concept("a b foo").def_var("a").def_var("b"), {"foo": ["1001"]}), + ]) + def test_i_can_get_concepts_by_first_keyword(self, concept, expected): + """ + Given a concept, i can find the first know token + example: + Concept("a foo b").def_var("a").def_var("b") + 'a' and 'b' are properties + the first 'real' token is foo + :return: + """ + + sheerka, context, *updated = self.init_concepts(concept) + + res = BaseNodeParser.get_concepts_by_first_keyword(context, updated) + + assert res.status + assert res.body == expected + + @pytest.mark.parametrize("bnf, expected", [ + (StrMatch("foo"), {"foo": ["1002"]}), + (StrMatch("bar"), {"bar": ["1002"]}), + (ConceptExpression("bar"), {"c:|1001:": ["1002"]}), + (Sequence(StrMatch("foo"), StrMatch("bar")), {"foo": ["1002"]}), + (Sequence(StrMatch("foo"), ConceptExpression("bar")), {"foo": ["1002"]}), + (Sequence(ConceptExpression("bar"), StrMatch("foo")), {"c:|1001:": ["1002"]}), + (OrderedChoice(StrMatch("foo"), StrMatch("bar")), {"foo": ["1002"], "bar": ["1002"]}), + (Optional(StrMatch("foo")), {"foo": ["1002"]}), + (ZeroOrMore(StrMatch("foo")), {"foo": ["1002"]}), + (OneOrMore(StrMatch("foo")), {"foo": ["1002"]}), + ]) + def test_i_can_get_concepts_by_first_keyword_with_bnf(self, bnf, expected): + sheerka = self.get_sheerka() + context = self.get_context(sheerka) + + bar = Concept("bar").init_key() + sheerka.set_id_if_needed(bar, False) + sheerka.add_in_cache(bar) + + concept = Concept("foo").init_key() + concept.bnf = bnf + sheerka.set_id_if_needed(concept, False) + + res = BaseNodeParser.get_concepts_by_first_keyword(context, [concept]) + + assert res.status + assert res.body == expected + + def test_i_can_get_concepts_by_first_keyword_when_multiple_concepts(self): + sheerka = self.get_sheerka() + context = self.get_context(sheerka) + + bar = Concept("bar").init_key() + sheerka.set_id_if_needed(bar, False) + sheerka.add_in_cache(bar) + + baz = Concept("baz").init_key() + sheerka.set_id_if_needed(baz, False) + sheerka.add_in_cache(baz) + + foo = Concept("foo").init_key() + foo.bnf = OrderedChoice(ConceptExpression("bar"), ConceptExpression("baz"), StrMatch("qux")) + sheerka.set_id_if_needed(foo, False) + + res = BaseNodeParser.get_concepts_by_first_keyword(context, [bar, baz, foo]) + + assert res.status + assert res.body == { + "bar": ["1001"], + "baz": ["1002"], + "c:|1001:": ["1003"], + "c:|1002:": ["1003"], + "qux": ["1003"], + } + + def test_i_can_get_concepts_by_first_keyword_using_sheerka(self): + sheerka, context, *updated = self.init_concepts( + "one", + "two", + Concept("twenty", definition="'twenty' (one|two)"), + create_new=True + ) + + bar = Concept("bar").init_key() + sheerka.set_id_if_needed(bar, False) + sheerka.add_in_cache(bar) + + foo = Concept("foo").init_key() + foo.bnf = OrderedChoice(ConceptExpression("one"), ConceptExpression("bar"), StrMatch("qux")) + sheerka.set_id_if_needed(foo, False) + + res = BaseNodeParser.get_concepts_by_first_keyword(context, [bar, foo], use_sheerka=True) + + assert res.status + assert res.body == { + "one": ["1001"], + "two": ["1002"], + "twenty": ["1003"], + "bar": ["1004"], + "c:|1001:": ["1005"], + "c:|1004:": ["1005"], + "qux": ["1005"], + } + + def test_i_can_resolve_concepts_by_first_keyword(self): + sheerka, context, *updated = self.init_concepts( + "one", + Concept("two", definition="one"), + Concept("three", definition="two"), + create_new=False + ) + + concepts_by_first_keywords = { + "one": ["1001"], + "c:|1001:": ["1002"], + "c:|1002:": ["1003"], + } + + resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, concepts_by_first_keywords) + + assert resolved_ret_val.status + assert resolved_ret_val.body == { + "one": ["1001", "1002", "1003"], + } + + def test_i_can_resolve_when_concepts_are_sets(self): + sheerka, context, one, two, three, number, foo = self.init_concepts( + "one", + "two", + "three", + "number", + Concept("foo", definition="number three"), + create_new=True # mandatory because set_isa() needs it + ) + + sheerka.set_isa(context, sheerka.new("one"), number) + sheerka.set_isa(context, sheerka.new("two"), number) + + cbfk = BaseNodeParser.get_concepts_by_first_keyword(context, [one, two, three, number, foo]).body + + resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, cbfk) + + assert resolved_ret_val.status + assert resolved_ret_val.body == { + "one": ["1001", "1005"], + "two": ["1002", "1005"], + "three": ["1003"], + "number": ["1004"], + } + + def test_concepts_are_defined_once(self): + sheerka = self.get_sheerka() + context = self.get_context(sheerka) + good = self.create_concept_lite(sheerka, "good") + foo = self.create_concept_lite(sheerka, "foo", bnf=ConceptExpression("good")) + bar = self.create_concept_lite(sheerka, "bar", bnf=ConceptExpression("good")) + baz = self.create_concept_lite(sheerka, "baz", bnf=OrderedChoice( + ConceptExpression("foo"), + ConceptExpression("bar"))) + + concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_keyword( + context, [good, foo, bar, baz]).body + + resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, concepts_by_first_keywords) + assert resolved_ret_val.status + assert resolved_ret_val.body == { + "good": ["1001", "1002", "1003", "1004"], + } + + def test_i_can_resolve_more_complex(self): + sheerka = self.get_sheerka() + context = self.get_context(sheerka) + + a = self.create_concept_lite(sheerka, "a", bnf=Sequence("one", "two")) + b = self.create_concept_lite(sheerka, "b", bnf=Sequence(ConceptExpression("a"), "two")) + + concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_keyword( + context, [a, b]).body + + resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, concepts_by_first_keywords) + assert resolved_ret_val.status + assert resolved_ret_val.body == { + "one": ["1001", "1002"], + } + + # def tests_i_can_detect_direct_recursion(self): + # sheerka = self.get_sheerka() + # good = self.get_concept(sheerka, "good") + # foo = self.get_concept(sheerka, "foo", ConceptExpression("bar")) + # bar = self.get_concept(sheerka, "bar", ConceptExpression("foo")) + # + # concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_keyword(sheerka, [good, foo, bar]).body + # + # resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(sheerka, concepts_by_first_keywords) + # assert resolved_ret_val.status + # assert resolved_ret_val.body == { + # "good": ["1001"], + # BuiltinConcepts.CHICKEN_AND_EGG: ["1002", "1003"] + # } + # + # def test_i_can_detect_indirect_infinite_recursion(self): + # sheerka = self.get_sheerka() + # good = self.get_concept(sheerka, "good") + # one = self.get_concept(sheerka, "one", ConceptExpression("two")) + # two = self.get_concept(sheerka, "two", ConceptExpression("three")) + # three = self.get_concept(sheerka, "three", ConceptExpression("two")) + # + # concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_keyword(sheerka, [good, one, two, three]).body + # + # resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(sheerka, concepts_by_first_keywords) + # assert resolved_ret_val.status + # assert resolved_ret_val.body == { + # "good": ["1001"], + # BuiltinConcepts.CHICKEN_AND_EGG: ["1002", "1004", "1003"] + # } + # + # def test_i_can_detect_infinite_recursion_from_ordered_choice(self): + # sheerka = self.get_sheerka() + # good = self.get_concept(sheerka, "good") + # one = self.get_concept(sheerka, "one", ConceptExpression("two")) + # two = self.get_concept(sheerka, "two", OrderedChoice(ConceptExpression("one"), ConceptExpression("two"))) + # + # concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_keyword(sheerka, [good, one, two]).body + # + # resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(sheerka, concepts_by_first_keywords) + # assert resolved_ret_val.status + # assert resolved_ret_val.body == { + # "good": ["1001"], + # BuiltinConcepts.CHICKEN_AND_EGG: ["1002", "1003"] + # } + # + # def test_i_can_detect_infinite_recursion_with_sequence(self): + # sheerka = self.get_sheerka() + # good = self.get_concept(sheerka, "good") + # one = self.get_concept(sheerka, "one", ConceptExpression("two")) + # two = self.get_concept(sheerka, "two", Sequence(StrMatch("yes"), ConceptExpression("one"))) + # + # concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_keyword(sheerka, [good, one, two]).body + # + # resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(sheerka, concepts_by_first_keywords) + # assert resolved_ret_val.status + # assert resolved_ret_val.body == { + # "good": ["1001"], + # BuiltinConcepts.CHICKEN_AND_EGG: ["1002", "1003"] + # } diff --git a/tests/parsers/test_BnfNodeParser.py b/tests/parsers/test_BnfNodeParser.py index c52d979..ef92c69 100644 --- a/tests/parsers/test_BnfNodeParser.py +++ b/tests/parsers/test_BnfNodeParser.py @@ -1,24 +1,25 @@ -from ast import Str - import pytest from core.builtin_concepts import BuiltinConcepts from core.concept import Concept, ConceptParts, DoNotResolve -from core.tokenizer import Tokenizer, TokenKind, Token -from parsers.BaseNodeParser import cnode, short_cnode -from parsers.BnfParser import BnfParser -from parsers.BnfNodeParser import BnfNodeParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \ - ParsingExpressionVisitor, TerminalNode, NonTerminalNode, ZeroOrMore, OneOrMore, \ - UnrecognizedTokensNode, ConceptExpression, ConceptGroupExpression +from parsers.BaseNodeParser import CNC, UTN, CN +from parsers.BnfNodeParser import BnfNodeParser, StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \ + Optional, ZeroOrMore, OneOrMore, ConceptExpression +import tests.parsers.parsers_utils from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka - -class ConceptVisitor(ParsingExpressionVisitor): - def __init__(self): - self.concepts = set() - - def visit_ConceptExpression(self, node): - self.concepts.add(node.concept) +cmap = { + "one": Concept("one"), + "two": Concept("two"), + "three": Concept("three"), + "plus": Concept(name="a plus b").def_var("a").def_var("b"), + "bnf one": Concept("bnf_one", definition="'one'"), + 'one and two': Concept("one and two", definition="one two"), + 'one or more three': Concept("one or more three", definition="three+"), + 'two or four': Concept("two or four", definition="two | 'four'"), + "twenties": Concept("twenties", definition="'twenty' c:two or four:=unit"), + "one or more plus": Concept("one or more plus", definition="c:a plus b:+"), +} def u(parsing_expression, start, end, children=None): @@ -39,1252 +40,738 @@ def u(parsing_expression, start, end, children=None): return NonTerminalNode(parsing_expression, start, end, [], children) -def evaluated(concept): - c = Concept(name=concept.name, body=concept.name) - - -def t(text): - if text.startswith("'") or text.startswith('"'): - return Token(TokenKind.STRING, text, 0, 0, 0) - - if text.startswith(" "): - return Token(TokenKind.WHITESPACE, text, 0, 0, 0) - - return Token(TokenKind.IDENTIFIER, text, 0, 0, 0) - - -def get_expected(concept, text=None): - c = Concept(name=concept.name) - c.compiled[ConceptParts.BODY] = DoNotResolve(text or concept.name) - c.init_key() - c.metadata.id = concept.id - return c - - -def cbody(concept): - """cbody stands for compiled body""" - if not ConceptParts.BODY in concept.compiled: - return None - return concept.compiled[ConceptParts.BODY] - - -def cprop(concept, prop_name): - """cbody stands for compiled property""" - return concept.compiled[prop_name] +def compute_expected_array(my_concepts_map, expression, expected): + return tests.parsers.parsers_utils.compute_expected_array( + my_concepts_map, + expression, + expected, + init_empty_body=True) class TestBnfNodeParser(TestUsingMemoryBasedSheerka): + sheerka = None - def init(self, concepts, grammar): - sheerka = self.get_sheerka(singleton=True) - context = self.get_context(sheerka) - for c in concepts: - context.sheerka.add_in_cache(c) - context.sheerka.set_id_if_needed(c, False) + @classmethod + def setup_class(cls): + t = TestBnfNodeParser() + TestBnfNodeParser.sheerka, context, _ = t.init_parser( + cmap, + singleton=False, + create_new=True, + init_from_sheerka=True) + + def init_parser(self, my_concepts_map=None, init_from_sheerka=False, **kwargs): + if my_concepts_map is not None: + sheerka, context, *updated = self.init_concepts(*my_concepts_map.values(), **kwargs) + for i, pair in enumerate(my_concepts_map): + my_concepts_map[pair] = updated[i] + else: + sheerka = TestBnfNodeParser.sheerka + context = self.get_context(sheerka) + + parser = BnfNodeParser(sheerka=sheerka) if init_from_sheerka else BnfNodeParser() + return sheerka, context, parser + + def exec_get_concepts_sequences(self, my_map, text, expected, multiple_result=False, post_init_concepts=None): + sheerka, context, *updated = self.init_concepts(*my_map.values(), create_new=False, singleton=True) + if not multiple_result: + expected_array = [compute_expected_array(my_map, text, expected)] + else: + expected_array = [compute_expected_array(my_map, text, e) for e in expected] + + if post_init_concepts: + post_init_concepts(sheerka, context) parser = BnfNodeParser() - parser.initialize(context, grammar) + parser.init_from_concepts(context, updated) + parser.reset_parser(context, text) - return context, parser + bnf_parsers_helpers = parser.get_concepts_sequences() - def execute(self, concepts, grammar, text): - context, parser = self.init(concepts, grammar) + assert len(bnf_parsers_helpers) == len(expected_array) + for parser_helper, expected_sequence in zip(bnf_parsers_helpers, expected_array): + assert parser_helper.sequence == expected_sequence - res = parser.parse(context, text) - wrapper = res.value - return_value = res.value.value + if len(bnf_parsers_helpers) == 1: + return sheerka, context, bnf_parsers_helpers[0].sequence + else: + return sheerka, context, [pe.sequence for pe in bnf_parsers_helpers] - return context, res, wrapper, return_value + def validate_get_concepts_sequences(self, my_map, text, expected, multiple_result=False, post_init_concepts=None): + sheerka, context, sequences = self.exec_get_concepts_sequences( + my_map, text, expected, multiple_result, post_init_concepts + ) + return sequences + # sheerka, context, *updated = self.init_concepts(*my_map.values(), create_new=False, singleton=True) + # if not multiple_result: + # expected_array = [compute_expected_array(my_map, text, expected)] + # else: + # expected_array = [compute_expected_array(my_map, text, e) for e in expected] + # + # if post_init_concepts: + # post_init_concepts(sheerka, context) + # + # parser = BnfNodeParser() + # parser.init_from_concepts(context, updated) + # parser.reset_parser(context, text) + # + # bnf_parsers_helpers = parser.get_concepts_sequences() + # + # assert len(bnf_parsers_helpers) == len(expected_array) + # for parser_helper, expected_sequence in zip(bnf_parsers_helpers, expected_array): + # assert parser_helper.sequence == expected_sequence + # + # if len(bnf_parsers_helpers) == 1: + # return bnf_parsers_helpers[0].sequence + # else: + # return [pe.sequence for pe in bnf_parsers_helpers] - @pytest.mark.parametrize("match, text", [ - ("foo", "foo"), - ("'foo'", "'foo'"), - ("1", "1"), - ("3.14", "3.14"), - ("+", "+"), + def test_i_cannot_parse_empty_strings(self): + sheerka, context, parser = self.init_parser({}, singleton=True) + + res = parser.parse(context, "") + + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) + assert res.body.reason == BuiltinConcepts.IS_EMPTY + + @pytest.mark.parametrize("expr, text", [ (StrMatch("foo"), "foo"), (StrMatch("'foo'"), "'foo'"), (StrMatch("1"), "1"), (StrMatch("3.14"), "3.14"), (StrMatch("+"), "+"), ]) - def test_i_can_match_simple_tokens(self, match, text): - foo = Concept(name="foo") - grammar = {foo: match} + def test_i_can_match_simple_bnf(self, expr, text): + my_map = { + text: self.bnf_concept("foo", expr) + } - context, res, wrapper, return_value = self.execute([foo], grammar, text) - - assert res.status - assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert return_value == [ConceptNode(get_expected(foo, text), 0, 0, source=text, underlying=u(match, 0, 0))] + sequence = self.validate_get_concepts_sequences(my_map, text, [text]) + assert sequence[0].underlying == u(expr, 0, 0) def test_i_can_match_multiple_concepts_in_one_input(self): - one = Concept(name="one") - two = Concept(name="two") - grammar = {one: "one", two: "two"} + my_map = { + "one": self.bnf_concept("one"), + "two": self.bnf_concept("two"), + } - context, res, wrapper, return_value = self.execute([one, two], grammar, "one two one") + text = "one two one" + expected = ["one", "two", ("one", 1)] + self.validate_get_concepts_sequences(my_map, text, expected) - assert res.status - assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert return_value == [ - ConceptNode(get_expected(one), 0, 0, source="one", underlying=u("one", 0, 0)), - ConceptNode(get_expected(two), 2, 2, source="two", underlying=u("two", 2, 2)), - ConceptNode(get_expected(one), 4, 4, source="one", underlying=u("one", 4, 4)), - ] + @pytest.mark.parametrize("text, expected", [ + ("one two three", [CNC("foo", source="one two three")]), + ("one two", []), + ("one two four", []), + ]) + def test_i_can_match_sequence(self, text, expected): + my_map = { + "foo": self.bnf_concept("foo", Sequence("one", "two", "three")), + } - def test_i_can_match_sequence(self): - foo = Concept(name="foo") - grammar = {foo: Sequence("one", "two", "three")} - - context, res, wrapper, return_value = self.execute([foo], grammar, "one two three") - - assert res.status - assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert return_value == [ - ConceptNode( - get_expected(foo, "one two three"), - 0, - 4, - source="one two three", - underlying=u(grammar[foo], 0, 4, [ - u("one", 0, 0), - u("two", 2, 2), - u("three", 4, 4)]))] + self.validate_get_concepts_sequences(my_map, text, expected) def test_i_always_choose_the_longest_match(self): - foo = Concept(name="foo") - bar = Concept(name="bar") - grammar = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")} + my_map = { + "foo": self.bnf_concept("foo", Sequence("one", "two", "three")), + "bar": self.bnf_concept("bar", Sequence("one", "two")), + } - context, res, wrapper, return_value = self.execute([foo, bar], grammar, "one two three") + text = "one two three" + expected = [CNC("foo", source=text)] + self.validate_get_concepts_sequences(my_map, text, expected) - assert res.status - assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert return_value == [cnode("foo", 0, 4, "one two three")] + def test_i_can_match_multiple_sequences(self): + my_map = { + "foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"), StrMatch("three"))), + "bar": self.bnf_concept("bar", Sequence(StrMatch("one"), StrMatch("two"))), + } - def test_i_can_match_several_sequences(self): - foo = Concept(name="foo") - bar = Concept(name="bar") - grammar = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")} + text = "one two three one two" + expected = [ + CNC("foo", source="one two three"), + CNC("bar", source="one two", start=6, end=8)] - context, res, wrapper, return_value = self.execute([foo, bar], grammar, "one two three one two") + self.validate_get_concepts_sequences(my_map, text, expected) - assert res.status - assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert return_value == [ - cnode("foo", 0, 4, "one two three"), - cnode("bar", 6, 8, "one two"), + @pytest.mark.parametrize("text, expected", [ + ("one", [CNC("foo", source="one")]), + ("two", [CNC("foo", source="two")]), + ("three", []), + + ]) + def test_i_can_match_ordered_choice(self, text, expected): + my_map = { + "foo": self.bnf_concept("foo", OrderedChoice(StrMatch("one"), StrMatch("two"))) + } + + self.validate_get_concepts_sequences(my_map, text, expected) + + def test_i_do_not_match_ordered_choice_with_empty_alternative(self): + my_map = { + "foo": self.bnf_concept("foo", OrderedChoice(StrMatch("one"), StrMatch(""))) + } + + text = "" + expected = [] + + self.validate_get_concepts_sequences(my_map, text, expected) + + @pytest.mark.parametrize("text, expected", [ + ("thirty one ok", [CNC("foo", source="thirty one ok")]), + ("twenty one ok", [CNC("foo", source="twenty one ok")]), + ]) + def test_i_can_mix_sequence_and_ordered(self, text, expected): + my_map = { + "foo": self.bnf_concept("foo", + Sequence( + OrderedChoice(StrMatch("twenty"), StrMatch("thirty")), + StrMatch("one"), + StrMatch("ok")) + )} + self.validate_get_concepts_sequences(my_map, text, expected) + + @pytest.mark.parametrize("text, expected", [ + ("twenty thirty", [CNC("foo", source="twenty thirty")]), + ("one", [CNC("foo", source="one")]), + ]) + def test_i_can_mix_ordered_choices_and_sequences(self, text, expected): + my_map = { + "foo": self.bnf_concept("foo", + OrderedChoice( + Sequence(StrMatch("twenty"), StrMatch("thirty")), + StrMatch("one")))} + + self.validate_get_concepts_sequences(my_map, text, expected) + + @pytest.mark.parametrize("text, expected", [ + ("one", [CNC("foo", source="one")]), + ("", []), + ("two", []), + ]) + def test_i_can_parse_optional(self, text, expected): + my_map = { + "foo": self.bnf_concept("foo", Optional(StrMatch("one"))) + } + + self.validate_get_concepts_sequences(my_map, text, expected) + + @pytest.mark.parametrize("text, expected", [ + ("twenty one", [CNC("foo", source="twenty one")]), + ("one", [CNC("foo", source="one")]), + ]) + def test_i_can_parse_sequence_starting_with_optional(self, text, expected): + my_map = { + "foo": self.bnf_concept("foo", + Sequence( + Optional(StrMatch("twenty")), + StrMatch("one"))) + } + + self.validate_get_concepts_sequences(my_map, text, expected) + + @pytest.mark.parametrize("text, expected", [ + ("one two three", [CNC("foo", source="one two three")]), + ("one two", [CNC("foo", source="one two")]), + ]) + def test_i_can_parse_sequence_ending_with_optional(self, text, expected): + my_map = { + "foo": self.bnf_concept("foo", + Sequence( + StrMatch("one"), + StrMatch("two"), + Optional(StrMatch("three")))) + } + + self.validate_get_concepts_sequences(my_map, text, expected) + + @pytest.mark.parametrize("text, expected", [ + ("one two three", [CNC("foo", source="one two three")]), + ("one three", [CNC("foo", source="one three")]), + ]) + def test_i_can_parse_sequence_with_optional_in_between(self, text, expected): + my_map = { + "foo": self.bnf_concept("foo", + Sequence( + StrMatch("one"), + Optional(StrMatch("two")), + StrMatch("three"))) + } + + self.validate_get_concepts_sequences(my_map, text, expected) + + @pytest.mark.parametrize("text, expected", [ + ("", []), + ("two", []), + ("one", [CNC("foo", source="one")]), + ("one one", [CNC("foo", source="one one")]), + ]) + def test_i_can_parse_zero_or_more(self, text, expected): + my_map = { + "foo": self.bnf_concept("foo", ZeroOrMore(StrMatch("one"))) + } + + self.validate_get_concepts_sequences(my_map, text, expected) + + @pytest.mark.parametrize("text, expected", [ + ("two", [CNC("foo", source="two")]), + ("one two", [CNC("foo", source="one two")]), + ("one one two", [CNC("foo", source="one one two")]), + ]) + def test_i_can_parse_sequence_and_zero_or_more(self, text, expected): + my_map = { + "foo": self.bnf_concept("foo", + Sequence( + ZeroOrMore(StrMatch("one")), + StrMatch("two") + )) + } + + self.validate_get_concepts_sequences(my_map, text, expected) + + @pytest.mark.parametrize("text, expected", [ + ("one, one , one", [CNC("foo", source="one, one , one")]), + ]) + def test_i_can_parse_zero_or_more_with_separator(self, text, expected): + my_map = { + "foo": self.bnf_concept("foo", ZeroOrMore(StrMatch("one"), sep=",")) + } + + self.validate_get_concepts_sequences(my_map, text, expected) + + def test_that_zero_or_more_is_greedy(self): + my_map = { + "foo": self.bnf_concept("foo", ZeroOrMore(StrMatch("one"))), + "bar": self.bnf_concept("foo", StrMatch("one")) + } + + text = "one one one" + expected = [CNC("foo", source=text)] + self.validate_get_concepts_sequences(my_map, text, expected) + + @pytest.mark.parametrize("text, expected", [ + ("", []), + ("two", []), + ("one", [CNC("foo", source="one")]), + ("one one one", [CNC("foo", source="one one one")]), + ]) + def test_i_can_parse_one_or_more(self, text, expected): + my_map = { + "foo": self.bnf_concept("foo", OneOrMore(StrMatch("one"))), + } + + self.validate_get_concepts_sequences(my_map, text, expected) + + @pytest.mark.parametrize("text, expected", [ + ("two", []), + ("one two", [CNC("foo", source="one two")]), + ("one one two", [CNC("foo", source="one one two")]), + ]) + def test_i_can_parse_sequence_one_and_or_more(self, text, expected): + my_map = { + "foo": self.bnf_concept("foo", + Sequence( + OneOrMore(StrMatch("one")), + StrMatch("two") + )) + } + + self.validate_get_concepts_sequences(my_map, text, expected) + + @pytest.mark.parametrize("text, expected", [ + ("one, one , one", [CNC("foo", source="one, one , one")]), + ]) + def test_i_can_parse_one_or_more_with_separator(self, text, expected): + my_map = { + "foo": self.bnf_concept("foo", OneOrMore(StrMatch("one"), sep=",")) + } + + self.validate_get_concepts_sequences(my_map, text, expected) + + def test_that_one_or_more_is_greedy(self): + my_map = { + "foo": self.bnf_concept("foo", OneOrMore(StrMatch("one"))), + "bar": self.bnf_concept("foo", StrMatch("one")) + } + + text = "one one one" + expected = [CNC("foo", source=text)] + self.validate_get_concepts_sequences(my_map, text, expected) + + @pytest.mark.parametrize("text, expected", [ + ("one two", [ + [CNC("foo", source="one two")], + [CNC("bar", source="one two")]]), + ("one two one two", [ + [CNC("bar", source="one two"), CNC("bar", source="one two")], + [CNC("foo", source="one two"), CNC("bar", source="one two")], + [CNC("bar", source="one two"), CNC("foo", source="one two")], + [CNC("foo", source="one two"), CNC("foo", source="one two")]]), + ]) + def test_i_can_have_multiple_results(self, text, expected): + my_map = { + "foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"))), + "bar": self.bnf_concept("bar", Sequence( + StrMatch("one"), + OrderedChoice(StrMatch("two"), StrMatch("three")))), + } + + text = "one two" + expected = [[CNC("foo", source=text)], [CNC("bar", source=text)]] + self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True) + + def test_i_can_refer_to_other_concepts(self): + my_map = { + "foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"))), + "bar": self.bnf_concept("bar", ConceptExpression("foo")) + } + + text = "one two" + expected = [ + [CNC("foo", source=text)], + [CN("bar", source=text)] # Do not check the compiled part ] + sequences = self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True) - def test_i_can_match_ordered_choice(self): - foo = Concept(name="foo") - grammar = {foo: OrderedChoice("one", "two")} - context, parser = self.init([foo], grammar) + # explicit validations of the compiled + concept_foo = sequences[0][0].concept + assert concept_foo.body is None + assert concept_foo.compiled == {ConceptParts.BODY: DoNotResolve("one two")} - res1 = parser.parse(context, "one") - assert res1.status - assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT) - assert res1.value.body == [cnode("foo", 0, 0, "one")] - assert res1.value.body[0].underlying == u(grammar[foo], 0, 0, [u("one", 0, 0)]) + concept_bar = sequences[1][0].concept + assert concept_bar.body is None + assert concept_bar.compiled == { + ConceptParts.BODY: concept_foo, + "foo": concept_foo + } + assert id(concept_bar.compiled[ConceptParts.BODY]) == id(concept_bar.compiled["foo"]) - res2 = parser.parse(context, "two") - assert res2.status - assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT) - assert res2.value.body == [cnode("foo", 0, 0, "two")] - assert res2.value.body[0].underlying == u(grammar[foo], 0, 0, [u("two", 0, 0)]) + def test_i_can_refer_to_other_concepts_with_body(self): + my_map = { + "foo": self.bnf_concept(Concept("foo", body="'foo'"), Sequence(StrMatch("one"), StrMatch("two"))), + "bar": self.bnf_concept("bar", ConceptExpression("foo")) + } - res3 = parser.parse(context, "three") - assert not res3.status - assert context.sheerka.isinstance(res3.value, BuiltinConcepts.PARSER_RESULT) - assert res3.value.value == [ - UnrecognizedTokensNode(0, 0, [t("three")]) + text = "one two" + expected = [ + [CNC("foo", source=text)], + [CN("bar", source=text)] # Do not check the compiled part ] + sequences = self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True) - def test_i_cannot_match_ordered_choice_with_empty_alternative(self): - foo = Concept(name="foo") - grammar = {foo: Sequence(OrderedChoice("one", ""), "two")} + # explicit validations of the compiled + concept_foo = sequences[0][0].concept + assert concept_foo.body is None + assert len(concept_foo.compiled) == 0 # because there is a body defined in the metadata - context, res, wrapper, return_value = self.execute([foo], grammar, "ok") - - assert not res.status - assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert return_value == [ - UnrecognizedTokensNode(0, 0, [t("ok")]) - ] - - def test_i_can_mix_sequences_and_ordered_choices(self): - foo = Concept(name="foo") - grammar = {foo: Sequence(OrderedChoice("twenty", "thirty"), "one", "ok")} - - context, parser = self.init([foo], grammar) - - res1 = parser.parse(context, "twenty one ok") - assert res1.status - assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT) - assert res1.value.body == [ConceptNode(get_expected(foo, "twenty one ok"), 0, 4, source="twenty one ok", - underlying=u(grammar[foo], 0, 4, [ - u(OrderedChoice("twenty", "thirty"), 0, 0, [u("twenty", 0, 0)]), - u("one", 2, 2), - u("ok", 4, 4)]))] - - res2 = parser.parse(context, "thirty one ok") - assert res2.status - assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT) - assert res2.value.body == [ConceptNode(get_expected(foo, "thirty one ok"), 0, 4, source="thirty one ok", - underlying=u(grammar[foo], 0, 4, [ - u(OrderedChoice("twenty", "thirty"), 0, 0, [u("thirty", 0, 0)]), - u("one", 2, 2), - u("ok", 4, 4)]))] - - res3 = parser.parse(context, "twenty one") - assert not res3.status - assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT) - assert res3.value.value == [ - UnrecognizedTokensNode(0, 2, [t("twenty"), t(" "), t("one")]) - ] - - def test_i_can_mix_ordered_choices_and_sequences(self): - foo = Concept(name="foo") - grammar = {foo: OrderedChoice(Sequence("twenty", "thirty"), "one")} - - context, parser = self.init([foo], grammar) - - res = parser.parse(context, "twenty thirty") - assert res.status - assert res.value.value == [cnode("foo", 0, 2, "twenty thirty")] - - res = parser.parse(context, "one") - assert res.status - assert res.value.value == [cnode("foo", 0, 0, "one")] - - def test_i_cannot_parse_empty_optional(self): - foo = Concept(name="foo") - grammar = {foo: Optional("one")} - context, parser = self.init([foo], grammar) - - res = parser.parse(context, "") - return_value = res.value - - assert not res.status - assert context.sheerka.isinstance(return_value, BuiltinConcepts.IS_EMPTY) - - def test_i_can_parse_optional(self): - foo = Concept(name="foo") - grammar = {foo: Optional("one")} - - context, res, wrapper, return_value = self.execute([foo], grammar, "one") - - assert res.status - assert return_value == [ConceptNode(get_expected(foo, "one"), 0, 0, source="one", - underlying=u(grammar[foo], 0, 0, [u("one", 0, 0)]))] - - def test_i_can_parse_sequence_starting_with_optional(self): - foo = Concept(name="foo") - grammar = {foo: Sequence(Optional("twenty"), "one")} - context, parser = self.init([foo], grammar) - - res = parser.parse(context, "twenty one") - assert res.status - assert res.value.body == [ConceptNode( - get_expected(foo, "twenty one"), 0, 2, - source="twenty one", - underlying=u(grammar[foo], 0, 2, - [ - u(Optional("twenty"), 0, 0, [u("twenty", 0, 0)]), - u("one", 2, 2)] - ))] - - res = parser.parse(context, "one") - assert res.status - assert res.value.body == [ConceptNode(get_expected(foo, "one"), 0, 0, source="one", - underlying=u(grammar[foo], 0, 0, [u("one", 0, 0)]))] - - def test_i_can_parse_sequence_ending_with_optional(self): - foo = Concept(name="foo") - grammar = {foo: Sequence("one", "two", Optional("three"))} - - context, parser = self.init([foo], grammar) - - res = parser.parse(context, "one two three") - assert res.status - assert res.value.body == [cnode("foo", 0, 4, "one two three")] - - res = parser.parse(context, "one two") - assert res.status - assert res.value.body == [cnode("foo", 0, 2, "one two")] - - def test_i_can_parse_sequence_with_optional_in_between(self): - foo = Concept(name="foo") - - grammar = {foo: Sequence("one", Optional("two"), "three")} - - context, parser = self.init([foo], grammar) - - res = parser.parse(context, "one two three") - assert res.status - assert res.value.body == [cnode("foo", 0, 4, "one two three")] - - res = parser.parse(context, "one three") - assert res.status - assert res.value.body == [cnode("foo", 0, 2, "one three")] - - def test_i_cannot_parse_wrong_input_with_optional(self): - foo = Concept(name="foo") - grammar = {foo: Optional("one")} - - context, res, wrapper, return_value = self.execute([foo], grammar, "two") - - assert not res.status - assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert return_value == [ - UnrecognizedTokensNode(0, 0, [t("two")]) - ] - - def test_i_can_use_reference(self): - # when there are multiple matches for the same input - # Do I need to create a choice concept ? - # No, create a return value for every possible graph - - foo = Concept(name="foo") - bar = Concept(name="bar") - grammar = {foo: Sequence("one", "two"), bar: foo} - context, parser = self.init([foo, bar], grammar) - res = parser.parse(context, "one two") - - assert len(res) == 2 - - assert res[0].status - assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) - assert res[0].value.body == [cnode("foo", 0, 2, "one two")] - concept_found_1 = res[0].value.body[0].concept - assert cbody(concept_found_1) == DoNotResolve("one two") - - assert res[1].status - assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) - assert res[1].value.body == [cnode("bar", 0, 2, "one two")] - concept_found_2 = res[1].value.body[0].concept - # the body and the prop['foo'] are the same concept 'foo' - assert cbody(concept_found_2) == get_expected(foo, "one two") - assert id(cprop(concept_found_2, "foo")) == id(cbody(concept_found_2)) - - def test_i_can_use_a_reference_with_a_body(self): - """ - Same test than before (test_i_can_use_reference()) - but this time, the concept 'foo' already has a body. - :return: - """ - - foo = Concept(name="foo", body="'foo'") - bar = Concept(name="bar") - grammar = {foo: Sequence("one", "two"), bar: foo} - context, parser = self.init([foo, bar], grammar) - res = parser.parse(context, "one two") - - assert len(res) == 2 - - assert res[0].status - assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) - assert res[0].value.body == [cnode("foo", 0, 2, "one two")] - concept_found_1 = res[0].value.body[0].concept - assert concept_found_1.metadata.body == "'foo'" - assert cbody(concept_found_1) is None - - assert res[1].status - assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) - assert res[1].value.body == [cnode("bar", 0, 2, "one two")] - concept_found_2 = res[1].value.body[0].concept - assert cbody(concept_found_2) == foo - # the body and the prop['foo'] are the same concept 'foo' - assert id(cprop(concept_found_2, "foo")) == id(cbody(concept_found_2)) + concept_bar = sequences[1][0].concept + assert concept_bar.body is None + assert concept_bar.compiled == { + ConceptParts.BODY: concept_foo, + "foo": concept_foo + } def test_i_can_use_context_reference_with_multiple_levels(self): - """ - Same than previous one, but with reference of reference - :return: - """ - - foo = Concept(name="foo") - bar = Concept(name="bar") - baz = Concept(name="baz") - grammar = {foo: Sequence("one", "two"), bar: foo, baz: bar} - context, parser = self.init([foo, bar, baz], grammar) - - res = parser.parse(context, "one two") - assert len(res) == 3 - - assert res[0].status - assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) - assert res[0].value.body == [cnode("foo", 0, 2, "one two")] - concept_found_1 = res[0].value.body[0].concept - assert cbody(concept_found_1) == DoNotResolve("one two") - - assert res[1].status - assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) - assert res[1].value.body == [cnode("bar", 0, 2, "one two")] - concept_found_2 = res[1].value.body[0].concept - assert cbody(concept_found_2) == get_expected(foo, "one two") - assert id(cprop(concept_found_2, "foo")) == id(cbody(concept_found_2)) - - assert res[2].status - assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT) - assert res[2].value.body == [cnode("baz", 0, 2, "one two")] - concept_found_3 = res[2].value.body[0].concept - expected_foo = get_expected(foo, "one two") - assert cbody(concept_found_3) == get_expected(bar, expected_foo) - assert cprop(concept_found_3, "foo") == expected_foo - assert id(cprop(concept_found_3, "bar")) == id(cbody(concept_found_3)) - - def test_order_is_not_important_when_using_references(self): - """ - Same test than test_i_can_use_reference(), - but this time, 'bar' is declared before 'foo' - So the order of the result is different - :return: - """ - foo = Concept(name="foo") - bar = Concept(name="bar") - grammar = {bar: foo, foo: Sequence("one", "two")} - context, parser = self.init([foo, bar], grammar) - - res = parser.parse(context, "one two") - assert len(res) == 2 - assert res[0].value.body == [cnode("bar", 0, 2, "one two")] - assert res[1].value.body == [cnode("foo", 0, 2, "one two")] - - def test_i_can_parse_when_reference(self): - foo = Concept(name="foo") - bar = Concept(name="bar") - grammar = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")} - context, parser = self.init([foo, bar], grammar) - - res = parser.parse(context, "twenty two") - assert res.status - assert res.value.body == [cnode("bar", 0, 2, "twenty two")] - concept_found = res.value.body[0].concept - assert cbody(concept_found) == DoNotResolve("twenty two") - assert cprop(concept_found, "foo") == get_expected(foo, "twenty") - - res = parser.parse(context, "thirty one") - assert res.status - assert res.value.body == [cnode("bar", 0, 2, "thirty one")] - concept_found = res.value.body[0].concept - assert cbody(concept_found) == DoNotResolve("thirty one") - assert cprop(concept_found, "foo") == get_expected(foo, "thirty") - - res = parser.parse(context, "twenty") - assert res.status - assert res.value.body == [cnode("foo", 0, 0, "twenty")] - concept_found = res.value.body[0].concept - assert cbody(concept_found) == DoNotResolve("twenty") - - def test_i_can_parse_when_reference_has_a_body(self): - foo = Concept(name="foo", body="'one'") - bar = Concept(name="bar") - grammar = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")} - context, parser = self.init([foo, bar], grammar) - - res = parser.parse(context, "twenty two") - assert res.status - assert res.value.body == [cnode("bar", 0, 2, "twenty two")] - concept_found = res.value.body[0].concept - assert cbody(concept_found) == DoNotResolve("twenty two") - assert cprop(concept_found, "foo") == foo - - res = parser.parse(context, "twenty") - assert res.status - assert res.value.body == [cnode("foo", 0, 0, "twenty")] - concept_found = res.value.body[0].concept - assert concept_found.metadata.body == "'one'" - - def test_i_can_parse_multiple_results(self): - foo = Concept(name="foo") - bar = Concept(name="bar") - grammar = { - bar: Sequence("one", "two"), - foo: Sequence("one", OrderedChoice("two", "three")) + my_map = { + "foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"))), + "bar": self.bnf_concept("bar", ConceptExpression("foo")), + "baz": self.bnf_concept("baz", ConceptExpression("bar")), } - context, parser = self.init([foo, bar], grammar) - res = parser.parse(context, "one two") - assert len(res) == 2 - assert res[0].status - assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) - assert res[0].value.body == [cnode("bar", 0, 2, "one two")] - concept_found_0 = res[0].value.body[0].concept - assert cbody(concept_found_0) == DoNotResolve("one two") - assert len(concept_found_0.props) == 0 + text = "one two" + expected = [ + [CNC("foo", source=text)], + [CN("bar", source=text)], # Do not check the compiled part + [CN("baz", source=text)], # Do not check the compiled part + ] + sequences = self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True) - assert res[1].status - assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) - assert res[1].value.body == [cnode("foo", 0, 2, "one two")] - concept_found_1 = res[1].value.body[0].concept - assert cbody(concept_found_1) == DoNotResolve("one two") - assert len(concept_found_1.props) == 0 + # explicit validations of the compiled + concept_foo = sequences[0][0].concept + assert concept_foo.body is None + assert concept_foo.compiled == {ConceptParts.BODY: DoNotResolve("one two")} - def test_i_can_parse_multiple_results_times_two(self): - foo = Concept(name="foo") - bar = Concept(name="bar") - grammar = { - bar: Sequence("one", "two"), - foo: Sequence("one", OrderedChoice("two", "three")) + concept_bar = sequences[1][0].concept + assert concept_bar.body is None + assert concept_bar.compiled == { + ConceptParts.BODY: concept_foo, + "foo": concept_foo } - context, parser = self.init([foo, bar], grammar) - res = parser.parse(context, "one two one two") - assert len(res) == 4 - assert res[0].status - assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) - assert res[0].value.body == [short_cnode("bar", "one two"), short_cnode("bar", "one two")] - - assert res[1].status - assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) - assert res[1].value.body == [short_cnode("foo", "one two"), short_cnode("bar", "one two")] - - assert res[2].status - assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT) - assert res[2].value.body == [short_cnode("bar", "one two"), short_cnode("foo", "one two")] - - assert res[3].status - assert context.sheerka.isinstance(res[3].value, BuiltinConcepts.PARSER_RESULT) - assert res[3].value.body == [short_cnode("foo", "one two"), short_cnode("foo", "one two")] - - def test_i_can_parse_multiple_results_when_reference(self): - """ - TODO : There should no be two answer, has the one with bar is totally useless - Note that bar = Sequence(foo, OrderedChoice("one", "two")) does not match - - :return: - """ - foo = Concept(name="foo") - bar = Concept(name="bar") - grammar = { - bar: Sequence(foo, Optional(OrderedChoice("one", "two"))), - foo: OrderedChoice("twenty", "thirty") + concept_baz = sequences[2][0].concept + assert concept_baz.body is None + assert concept_baz.compiled == { + ConceptParts.BODY: concept_bar, + "bar": concept_bar, + "foo": concept_foo, } - context, parser = self.init([foo, bar], grammar) - res = parser.parse(context, "twenty") - assert len(res) == 2 - assert res[0].status - assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) - assert res[0].value.body == [cnode("bar", 0, 0, "twenty")] + def test_i_can_mix_reference_to_other_concepts(self): + my_map = { + "foo": self.bnf_concept("foo", OrderedChoice(StrMatch("twenty"), StrMatch("thirty"))), + "bar": self.bnf_concept("bar", Sequence( + ConceptExpression("foo"), + OrderedChoice(StrMatch("one"), StrMatch("two")))), + } - assert res[1].status - assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) - assert res[1].value.body == [cnode("foo", 0, 0, "twenty")] + text = "twenty two" + expected = [CN("bar", source="twenty two")] + sequences = self.validate_get_concepts_sequences(my_map, text, expected) + concept_bar = sequences[0].concept + assert concept_bar.compiled == { + ConceptParts.BODY: DoNotResolve("twenty two"), + "foo": my_map["foo"], + } + assert concept_bar.compiled["foo"].compiled == {ConceptParts.BODY: DoNotResolve("twenty")} + + text = "thirty one" + expected = [CN("bar", source="thirty one")] + sequences = self.validate_get_concepts_sequences(my_map, text, expected) + concept_bar = sequences[0].concept + assert concept_bar.compiled == { + ConceptParts.BODY: DoNotResolve("thirty one"), + "foo": my_map["foo"], + } + assert concept_bar.compiled["foo"].compiled == {ConceptParts.BODY: DoNotResolve("thirty")} + + def test_i_can_mix_reference_to_other_concepts_when_body(self): + my_map = { + "foo": self.bnf_concept(Concept("foo", body="'foo'"), + OrderedChoice(StrMatch("twenty"), StrMatch("thirty"))), + "bar": self.bnf_concept("bar", Sequence( + ConceptExpression("foo"), + OrderedChoice(StrMatch("one"), StrMatch("two")))), + } + + text = "twenty two" + expected = [CN("bar", source="twenty two")] + sheerka, context, sequences = self.exec_get_concepts_sequences(my_map, text, expected) + + concept_bar = sequences[0].concept + assert concept_bar.compiled == { + ConceptParts.BODY: DoNotResolve("twenty two"), + "foo": sheerka.new("foo"), + } + assert concept_bar.compiled["foo"].compiled == {} # as foo as a body + + text = "thirty one" + expected = [CN("bar", source="thirty one")] + sequences = self.validate_get_concepts_sequences(my_map, text, expected) + concept_bar = sequences[0].concept + assert concept_bar.compiled == { + ConceptParts.BODY: DoNotResolve("thirty one"), + "foo": sheerka.new("foo"), + } + assert concept_bar.compiled["foo"].compiled == {} + + def test_i_can_mix_zero_and_more_and_reference_to_other_concepts(self): + my_map = { + "foo": self.bnf_concept("foo", OrderedChoice(StrMatch("one"), StrMatch("two"), StrMatch("three"))), + "bar": self.bnf_concept("bar", ZeroOrMore(ConceptExpression("foo"))), + } + + text = "one two three" + expected = [CN("bar", source="one two three")] + sequences = self.validate_get_concepts_sequences(my_map, text, expected) + concept_bar = sequences[0].concept + assert concept_bar.compiled == { + ConceptParts.BODY: DoNotResolve("one two three"), + "foo": [my_map["foo"], my_map["foo"], my_map["foo"]] + } + assert concept_bar.compiled["foo"][0].compiled == {ConceptParts.BODY: DoNotResolve("one")} + assert concept_bar.compiled["foo"][1].compiled == {ConceptParts.BODY: DoNotResolve("two")} + assert concept_bar.compiled["foo"][2].compiled == {ConceptParts.BODY: DoNotResolve("three")} def test_i_can_parse_concept_reference_that_is_not_in_grammar(self): - one = Concept(name="one") - two = Concept(name="two") - foo = Concept(name="foo") - grammar = {foo: Sequence("twenty", OrderedChoice(one, two))} - context, parser = self.init([one, two, foo], grammar) - - res = parser.parse(context, "twenty two") - assert res.status - assert res.value.body == [cnode("foo", 0, 2, "twenty two")] - concept_found = res.value.body[0].concept - assert cbody(concept_found) == DoNotResolve("twenty two") - assert cprop(concept_found, "two") == get_expected(two, "two") - - res = parser.parse(context, "twenty one") - assert res.status - assert res.value.body == [cnode("foo", 0, 2, "twenty one")] - - def test_i_can_initialize_when_cyclic_reference(self): - foo = Concept(name="foo") - grammar = {foo: Optional("one", ConceptExpression("foo"))} - context, parser = self.init([foo], grammar) - - assert parser.concepts_grammars[foo] == Optional("one", ConceptExpression(foo, rule_name="foo")) - - def test_i_cannot_initialize_when_cyclic_reference_when_concept_is_under_construction_and_not_known(self): - foo = Concept(name="foo").init_key() - grammar = {foo: Optional("one", ConceptExpression("foo"))} - - context = self.get_context() - parser = BnfNodeParser() - parser.initialize(context, grammar) - assert parser.concepts_grammars[foo] == Optional("one", ConceptExpression("foo", rule_name="foo")) - - def test_i_can_initialize_when_cyclic_reference_when_concept_is_under_construction_and_known(self): - foo = Concept(name="foo").init_key() - grammar = {foo: Optional("one", ConceptExpression("foo"))} - - context = self.get_context() - context.concepts["foo"] = foo - parser = BnfNodeParser() - parser.initialize(context, grammar) - assert parser.concepts_grammars[foo] == Optional("one", ConceptExpression(foo, rule_name="foo")) - - def test_i_can_parse_concept_reference_that_is_group(self): - """ - if one is number, then number is a 'group' - a group can be found under the sdp entry 'all_' - """ - - context = self.get_context() - one = Concept(name="one") - two = Concept(name="two") - number = Concept(name="number") - foo = Concept(name="foo") - for c in [one, two, number, foo]: - context.sheerka.set_id_if_needed(c, False) - context.sheerka.add_in_cache(c) - - context.sheerka.add_concept_to_set(context, one, number) - context.sheerka.add_concept_to_set(context, two, number) - - grammar = {foo: Sequence("twenty", number)} - - parser = BnfNodeParser() - parser.initialize(context, grammar) - - res = parser.parse(context, "twenty two") - assert res.status - assert res.value.body == [cnode("foo", 0, 2, "twenty two")] - concept_found = res.value.body[0].concept - assert cbody(concept_found) == DoNotResolve("twenty two") - assert cprop(concept_found, "two") == get_expected(two, "two") - assert cprop(concept_found, "number") == get_expected(number, get_expected(two, "two")) - - res = parser.parse(context, "twenty one") - assert res.status - assert res.value.body == [cnode("foo", 0, 2, "twenty one")] - concept_found = res.value.body[0].concept - assert cbody(concept_found) == DoNotResolve("twenty one") - assert cprop(concept_found, "one") == get_expected(one, "one") - assert cprop(concept_found, "number") == get_expected(number, get_expected(one, "one")) - - def test_i_can_parse_zero_or_more(self): - foo = Concept(name="foo") - grammar = {foo: ZeroOrMore("one")} - - context, res, wrapper, return_value = self.execute([foo], grammar, "one one") - - assert res.status - assert return_value == [cnode("foo", 0, 2, "one one")] - assert return_value[0].underlying == u(grammar[foo], 0, 2, [u("one", 0, 0), u("one", 2, 2)]) - - concept_found = return_value[0].concept - assert cbody(concept_found) == DoNotResolve("one one") - - def test_i_can_parse_sequence_and_zero_or_more(self): - foo = Concept(name="foo") - grammar = {foo: Sequence(ZeroOrMore("one"), "two")} - context, parser = self.init([foo], grammar) - - res = parser.parse(context, "one one two") - assert res.status - assert res.value.value == [cnode("foo", 0, 4, "one one two")] - - res = parser.parse(context, "two") - assert res.status - assert res.value.value == [cnode("foo", 0, 0, "two")] - - def test_i_cannot_parse_zero_and_more_when_wrong_entry(self): - foo = Concept(name="foo") - grammar = {foo: ZeroOrMore("one")} - context, parser = self.init([foo], grammar) - - parser = BnfNodeParser() - parser.initialize(context, grammar) - - res = parser.parse(context, "one two") - assert not res.status - assert res.value.value == [ - cnode("foo", 0, 0, "one"), - UnrecognizedTokensNode(1, 2, [t(" "), t("two")]) - ] - - res = parser.parse(context, "two") - assert not res.status - assert res.value.value == [ - UnrecognizedTokensNode(0, 0, [t("two")]) - ] - - def test_i_can_parse_zero_and_more_with_separator(self): - foo = Concept(name="foo") - grammar = {foo: ZeroOrMore("one", sep=",")} - - context, res, wrapper, return_value = self.execute([foo], grammar, "one, one , one") - - assert res.status - assert return_value == [cnode("foo", 0, 7, "one, one , one")] - - def test_that_zero_and_more_is_greedy(self): - foo = Concept(name="foo") - bar = Concept(name="bar") - grammar = {foo: ZeroOrMore("one"), bar: "one"} - - context, res, wrapper, return_value = self.execute([foo], grammar, "one one one") - - assert res.status - assert return_value == [cnode("foo", 0, 4, "one one one")] - - def test_i_can_parse_one_and_more(self): - foo = Concept(name="foo") - grammar = {foo: OneOrMore("one")} - - context, res, wrapper, return_value = self.execute([foo], grammar, "one one") - - assert res.status - assert return_value == [cnode("foo", 0, 2, "one one")] - assert return_value[0].underlying == u(grammar[foo], 0, 2, [ - u("one", 0, 0), - u("one", 2, 2)]) - - def test_i_can_parse_sequence_and_one_or_more(self): - foo = Concept(name="foo") - grammar = {foo: Sequence(OneOrMore("one"), "two")} - context, parser = self.init([foo], grammar) - - res = parser.parse(context, "one one two") - assert res.status - assert res.value.value == [cnode("foo", 0, 4, "one one two")] - - res = parser.parse(context, "two") - assert not res.status - assert res.value.value == [ - UnrecognizedTokensNode(0, 0, [t("two")]) - ] - - def test_i_can_parse_one_and_more_with_separator(self): - foo = Concept(name="foo") - grammar = {foo: OneOrMore("one", sep=",")} - - context, res, wrapper, return_value = self.execute([foo], grammar, "one, one , one") - - assert res.status - assert return_value == [cnode("foo", 0, 7, "one, one , one")] - assert return_value[0].underlying == u(grammar[foo], 0, 7, [ - u("one", 0, 0), - u("one", 3, 3), - u("one", 7, 7)]) - - def test_that_one_and_more_is_greedy(self): - foo = Concept(name="foo") - bar = Concept(name="bar") - grammar = {foo: OneOrMore("one"), bar: "one"} - - context, res, wrapper, return_value = self.execute([foo], grammar, "one one one") - - assert res.status - assert return_value == [cnode("foo", 0, 4, "one one one")] - - def test_i_can_detect_infinite_recursion(self): - foo = Concept(name="foo") - bar = Concept(name="bar") - - grammar = { - bar: foo, - foo: bar - } - parser = BnfNodeParser() - parser.initialize(self.get_context(), grammar) - - assert bar not in parser.concepts_grammars - assert foo not in parser.concepts_grammars - - def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice(self): - foo = Concept(name="foo") - bar = Concept(name="bar") - grammar = { - bar: foo, - foo: OrderedChoice(bar, "foo") + my_map = { + "one": Concept("one"), + "two": Concept("two"), + "foo": self.bnf_concept("foo", + Sequence( + StrMatch("twenty"), + OrderedChoice(ConceptExpression("one"), ConceptExpression("two")))), } - parser = BnfNodeParser() - parser.initialize(self.get_context(), grammar) - - assert foo not in parser.concepts_grammars # removed because of the infinite recursion - assert bar not in parser.concepts_grammars # removed because of the infinite recursion - - # the other way around is possible - grammar = { - bar: foo, - foo: OrderedChoice("foo", bar) + text = "twenty one" + expected = [CN("foo", source="twenty one")] + sequences = self.validate_get_concepts_sequences(my_map, text, expected) + concept_foo = sequences[0].concept + assert concept_foo.compiled == { + ConceptParts.BODY: DoNotResolve("twenty one"), + "one": my_map["one"], } - context, parser = self.init([foo, bar], grammar) - assert foo in parser.concepts_grammars - assert bar in parser.concepts_grammars - - res = parser.parse(context, "foo") - assert len(res) == 2 - assert res[0].status - assert res[0].value.body == [cnode("bar", 0, 0, "foo")] - assert res[1].status - assert res[1].value.body == [cnode("foo", 0, 0, "foo")] - - def test_i_can_detect_indirect_infinite_recursion_with_sequence(self): - foo = Concept(name="foo") - bar = Concept(name="bar") - - grammar = { - bar: foo, - foo: Sequence("one", bar, "two") + def test_i_can_refer_to_group_concepts(self): + my_map = { + "one": Concept("one"), + "two": Concept("two"), + "number": Concept("number"), + "foo": self.bnf_concept("foo", Sequence("twenty", ConceptExpression("number"))) } - parser = BnfNodeParser() - parser.initialize(self.get_context(), grammar) - assert foo not in parser.concepts_grammars # removed because of the infinite recursion - assert bar not in parser.concepts_grammars # removed because of the infinite recursion + def pic(s, c): + s.add_concept_to_set(c, my_map["one"], my_map["number"]) + s.add_concept_to_set(c, my_map["two"], my_map["number"]) - def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choice(self): - foo = Concept(name="foo") - bar = Concept(name="bar") + text = "twenty two" + expected = [CN("foo", source="twenty two")] + sequences = self.validate_get_concepts_sequences(my_map, text, expected, post_init_concepts=pic) - grammar = { - bar: foo, - foo: Sequence("one", OrderedChoice(bar, "other"), "two") - } - parser = BnfNodeParser() - parser.initialize(self.get_context(), grammar) + # explicit validations of the compiled + concept_foo = sequences[0].concept + assert concept_foo.body is None + assert concept_foo.compiled == {'number': my_map["number"], + 'two': my_map["two"], + ConceptParts.BODY: DoNotResolve(value='twenty two')} - assert foo not in parser.concepts_grammars # removed because of the infinite recursion - assert bar not in parser.concepts_grammars # removed because of the infinite recursion + text = "twenty one" + expected = [CN("foo", source="twenty one")] + sequences = self.validate_get_concepts_sequences(my_map, text, expected, post_init_concepts=pic) - def test_infinite_recursion_does_not_fail_if_a_concept_is_missing(self): - foo = Concept(name="foo") - bar = Concept(name="bar") + # explicit validations of the compiled + concept_foo = sequences[0].concept + assert concept_foo.body is None + assert concept_foo.compiled == {'number': my_map["number"], + 'one': my_map["one"], + ConceptParts.BODY: DoNotResolve(value='twenty one')} - grammar = { - foo: bar - } - parser = BnfNodeParser() - parser.initialize(self.get_context(), grammar) - - assert foo in parser.concepts_grammars - - def test_i_can_detect_indirect_infinite_recursion_with_optional(self): - # TODO infinite recursion with optional - pass - - def test_i_can_detect_indirect_infinite_recursion_with_zero_and_more(self): - # TODO infinite recursion with optional - pass - - def test_i_can_detect_indirect_infinite_recursion_with_one_and_more(self): - # TODO infinite recursion with optional - pass - - def test_i_can_visit_parsing_expression(self): - mult = Concept(name="mult") - add = Concept(name="add") - - visitor = ConceptVisitor() - visitor.visit(Sequence(mult, Optional(Sequence("+", add)))) - - assert sorted(list(visitor.concepts)) == ["add", "mult"] - - def test_i_can_initialize_rule_names(self): - context = self.get_context() - foo = Concept(name="foo") - bar = Concept(name="bar") - - grammar = {foo: Sequence("one", "two"), bar: foo} - parser = BnfNodeParser() - ret = parser.initialize(context, grammar) - return_value = ret.body - - assert return_value[foo].rule_name == "" - assert return_value[bar].rule_name == "foo" - - @pytest.mark.parametrize("text, end_position", [ - ("foo", 0), - ("foo bar", 2), - ("foo bar ", 3), - (" foo bar ", 4) + @pytest.mark.parametrize("bar_expr", [ + ConceptExpression("foo"), + OrderedChoice(ConceptExpression("foo"), StrMatch("one")), + Sequence(StrMatch("one"), ConceptExpression("foo"), StrMatch("two")) ]) - def test_cannot_parser_unknown_concepts(self, text, end_position): - context, res, wrapper, return_value = self.execute([], {}, text) - tokens = list(Tokenizer(text))[:-1] - - assert not res.status - assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert return_value == [UnrecognizedTokensNode(0, end_position, tokens)] - - def test_i_cannot_parse_when_part_of_the_input_is_unrecognized(self): - one = Concept(name="one") - two = Concept(name="two") - grammar = {one: "one", two: "two"} - - context, res, wrapper, return_value = self.execute([one, two], grammar, "one two three") - - assert not res.status - assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert return_value == [ - ConceptNode(get_expected(one, "one"), 0, 0, source="one", underlying=u("one", 0, 0)), - ConceptNode(get_expected(two, "two"), 2, 2, source="two", underlying=u("two", 2, 2)), - UnrecognizedTokensNode(3, 4, [t(" "), t("three")]) - ] - - def test_i_cannot_parse_when_wrong_sequence(self): - foo = Concept(name="foo") - grammar = {foo: Sequence("one", "two", "three")} - - context, res, wrapper, return_value = self.execute([foo], grammar, "one two three one") - - assert not res.status - assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert return_value == [ - short_cnode("foo", "one two three"), - UnrecognizedTokensNode(5, 6, [t(" "), t("one")]) - ] - - def test_i_cannot_parse_when_sequence_cannot_match_because_of_end_of_file(self): - foo = Concept(name="foo") - grammar = {foo: Sequence("one", "two", "three")} - - context, res, wrapper, return_value = self.execute([foo], grammar, "one two") - - assert not res.status - assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert return_value == [ - UnrecognizedTokensNode(0, 2, [t("one"), t(" "), t("two")]) - ] - - def test_i_cannot_parse_multiple_results_when_unknown_tokens_at_the_end(self): - foo = Concept(name="foo") - bar = Concept(name="bar") - grammar = { - bar: Sequence("one", "two"), - foo: Sequence("one", OrderedChoice("two", "three")) + def test_i_can_detect_infinite_recursion(self, bar_expr): + my_map = { + "foo": self.bnf_concept("foo", ConceptExpression("bar")), + "bar": self.bnf_concept("bar", bar_expr), } - context, parser = self.init([foo, bar], grammar) - res = parser.parse(context, "one two four five") + sheerka, context, parser = self.init_parser(my_map, singleton=True) + parser.context = context + parser.sheerka = sheerka - assert len(res) == 2 - assert not res[0].status - assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) - assert res[0].value.body == [ - cnode("bar", 0, 2, "one two"), - UnrecognizedTokensNode(3, 6, [t(" "), t("four"), t(" "), t("five")]) - ] + parsing_expression = parser.get_parsing_expression(my_map["foo"]) + assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG) + assert sheerka.isinstance(parser.concepts_grammars.get(my_map["foo"].id), BuiltinConcepts.CHICKEN_AND_EGG) - assert not res[1].status - assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) - assert res[1].value.body == [ - cnode("foo", 0, 2, "one two"), - UnrecognizedTokensNode(3, 6, [t(" "), t("four"), t(" "), t("five")]) - ] + parsing_expression = parser.get_parsing_expression(my_map["bar"]) + assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG) + assert sheerka.isinstance(parser.concepts_grammars.get(my_map["bar"].id), BuiltinConcepts.CHICKEN_AND_EGG) - def test_i_cannot_parse_multiple_results_when_beginning_by_unknown_tokens(self): - foo = Concept(name="foo") - bar = Concept(name="bar") - grammar = { - bar: Sequence("one", "two"), - foo: Sequence("one", OrderedChoice("two", "three")) + def test_i_can_get_parsing_expression_when_concept_isa(self): + my_map = { + "one": Concept("one"), + "twenty": Concept("twenty"), + "number": Concept("number"), + "twenties": self.bnf_concept("twenties", Sequence(ConceptExpression("twenty"), ConceptExpression("number"))) } - context, parser = self.init([foo, bar], grammar) + sheerka, context, parser = self.init_parser(my_map, singleton=True) + parser.context = context + parser.sheerka = sheerka + sheerka.set_isa(context, sheerka.new("one"), my_map["number"]) + sheerka.set_isa(context, sheerka.new("twenty"), my_map["number"]) - res = parser.parse(context, "four five one two") + parsing_expression = parser.get_parsing_expression(my_map["twenties"]) + assert parsing_expression == Sequence( + ConceptExpression(my_map["twenty"], rule_name="twenty"), + ConceptExpression(my_map["number"], rule_name="number")) - assert len(res) == 2 - assert not res[0].status - assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) - assert res[0].value.body == [ - UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), - cnode("bar", 4, 6, "one two"), - ] + assert parsing_expression.nodes[0].nodes == [StrMatch("twenty")] + assert isinstance(parsing_expression.nodes[1].nodes[0], OrderedChoice) + assert ConceptExpression(my_map["one"], rule_name="one") in parsing_expression.nodes[1].nodes[0].elements + assert ConceptExpression(my_map["twenty"], rule_name="twenty") in parsing_expression.nodes[1].nodes[0].elements - assert not res[1].status - assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) - assert res[1].value.body == [ - UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), - cnode("foo", 4, 6, "one two"), - ] - - def test_i_cannot_parse_multiple_results_when_surrounded_by_unknown_tokens(self): - foo = Concept(name="foo") - bar = Concept(name="bar") - grammar = { - bar: Sequence("one", "two"), - foo: Sequence("one", OrderedChoice("two", "three")) + def test_i_can_get_parsing_expression_when_sequence_of_concept(self): + my_map = { + "one": Concept("one"), + "two_ones": self.bnf_concept("two_ones", Sequence(ConceptExpression("one"), ConceptExpression("one"))) } - context, parser = self.init([foo, bar], grammar) + sheerka, context, parser = self.init_parser(my_map, singleton=True) + parser.context = context + parser.sheerka = sheerka - res = parser.parse(context, "four five one two six seven") - assert len(res) == 2 - assert not res[0].status - assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) - assert res[0].value.body == [ - UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), - cnode("bar", 4, 6, "one two"), - UnrecognizedTokensNode(7, 10, [t(" "), t("six"), t(" "), t("seven")]), - ] + parsing_expression = parser.get_parsing_expression(my_map["two_ones"]) + assert parsing_expression == Sequence( + ConceptExpression(my_map["one"], rule_name="one"), + ConceptExpression(my_map["one"], rule_name="one")) - assert not res[1].status - assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) - assert res[1].value.body == [ - UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), - cnode("foo", 4, 6, "one two"), - UnrecognizedTokensNode(7, 10, [t(" "), t("six"), t(" "), t("seven")]), - ] - - def test_i_cannot_parse_multiple_results_when_unknown_tokens_in_the_middle(self): - context = self.get_context() - foo = Concept(name="foo") - bar = Concept(name="bar") - baz = Concept(name="baz") - grammar = { - bar: Sequence("one", "two"), - foo: Sequence("one", OrderedChoice("two", "three")), - baz: StrMatch("six"), - } - context, parser = self.init([foo, bar, baz], grammar) - - res = parser.parse(context, "one two four five six") - assert len(res) == 2 - assert not res[0].status - assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) - assert res[0].value.body == [ - cnode("bar", 0, 2, "one two"), - UnrecognizedTokensNode(3, 7, [t(" "), t("four"), t(" "), t("five"), t(" ")]), - cnode("baz", 8, 8, "six"), - ] - - assert not res[1].status - assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) - assert res[1].value.body == [ - cnode("foo", 0, 2, "one two"), - UnrecognizedTokensNode(3, 7, [t(" "), t("four"), t(" "), t("five"), t(" ")]), - cnode("baz", 8, 8, "six"), - ] - - def test_i_can_get_the_inner_concept_when_possible(self): - foo = Concept(name="foo") - one = Concept(name="one") - grammar = {foo: Sequence(Optional(ZeroOrMore(one)), ZeroOrMore("one"))} - - context, res, wrapper, return_value = self.execute([foo, one], grammar, "one") - - assert res.status - assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert return_value == [cnode("foo", 0, 0, "one")] - concept_found = return_value[0].concept - assert cbody(concept_found) == get_expected(one, "one") - assert id(cprop(concept_found, "one")) == id(cbody(concept_found)) - - def test_i_can_get_the_inner_concept_when_possible_with_rule_name(self): - foo = Concept(name="foo") - one = Concept(name="one") - grammar = {foo: Sequence( - Optional(ZeroOrMore(one, rule_name="zero"), rule_name="opt"), - ZeroOrMore("one"), rule_name="seq")} - - context, res, wrapper, return_value = self.execute([foo, one], grammar, "one") - - assert res.status - assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert return_value == [cnode("foo", 0, 0, "one")] - concept_found = return_value[0].concept - assert cbody(concept_found) == get_expected(one, "one") - assert id(cprop(concept_found, "one")) == id(cbody(concept_found)) - assert id(cprop(concept_found, "zero")) == id(cbody(concept_found)) - assert id(cprop(concept_found, "opt")) == id(cbody(concept_found)) - assert id(cprop(concept_found, "seq")) == id(cbody(concept_found)) - - def test_i_get_multiple_props_when_zero_or_more(self): - foo = Concept(name="foo") - one = Concept(name="one") - grammar = {foo: ZeroOrMore(one)} - - context, res, wrapper, return_value = self.execute([foo, one], grammar, "one one one") - assert res.status - assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert return_value == [cnode("foo", 0, 4, "one one one")] - concept_found = return_value[0].concept - assert cbody(concept_found) == DoNotResolve("one one one") - assert len(concept_found.compiled["one"]) == 3 - assert cprop(concept_found, "one")[0] == get_expected(one) - assert cprop(concept_found, "one")[1] == get_expected(one) - assert cprop(concept_found, "one")[2] == get_expected(one) - assert id(cprop(concept_found, "one")[0]) != id(cprop(concept_found, "one")[1]) - assert id(cprop(concept_found, "one")[1]) != id(cprop(concept_found, "one")[2]) - assert id(cprop(concept_found, "one")[2]) != id(cprop(concept_found, "one")[0]) - - def test_i_get_multiple_props_when_zero_or_more_and_different_values(self): - foo = Concept(name="foo") - one = Concept(name="one") - grammar = {foo: ZeroOrMore(Sequence(one, "ok", rule_name="seq")), one: OrderedChoice("one", "un", "uno")} - - context, res, wrapper, return_value = self.execute([foo, one], grammar, "one ok un ok uno ok") - assert res.status - assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert return_value == [short_cnode("foo", "one ok un ok uno ok")] - concept_found = return_value[0].concept - assert cprop(concept_found, "one")[0] == get_expected(one, "one") - assert cprop(concept_found, "one")[1] == get_expected(one, "un") - assert cprop(concept_found, "one")[2] == get_expected(one, "uno") - assert cprop(concept_found, "seq")[0] == DoNotResolve("one ok") - assert cprop(concept_found, "seq")[1] == DoNotResolve("un ok") - assert cprop(concept_found, "seq")[2] == DoNotResolve("uno ok") - - @pytest.mark.parametrize("rule, expected", [ - (StrMatch("string"), "'string'"), - (StrMatch("string", rule_name="rule_name"), "'string'=rule_name"), - (Sequence(StrMatch("foo"), StrMatch("bar")), "('foo' 'bar')"), - (Sequence(StrMatch("foo"), StrMatch("bar"), rule_name="rule_name"), "('foo' 'bar')=rule_name"), - (OrderedChoice(StrMatch("foo"), StrMatch("bar")), "('foo'|'bar')"), - (OrderedChoice(StrMatch("foo"), StrMatch("bar"), rule_name="rule_name"), "('foo'|'bar')=rule_name"), - (Optional(StrMatch("foo")), "'foo'?"), - (Optional(StrMatch("foo"), rule_name="rule_name"), "'foo'?=rule_name"), - (ZeroOrMore(StrMatch("foo")), "'foo'*"), - (ZeroOrMore(StrMatch("foo"), rule_name="rule_name"), "'foo'*=rule_name"), - (OneOrMore(StrMatch("foo")), "'foo'+"), - (OneOrMore(StrMatch("foo"), rule_name="rule_name"), "'foo'+=rule_name"), - (Sequence( - Optional(StrMatch("foo"), rule_name="a"), - ZeroOrMore(StrMatch("bar"), rule_name="b"), - OneOrMore(StrMatch("baz"), rule_name="c"), - rule_name="d"), "('foo'?=a 'bar'*=b 'baz'+=c)=d"), - (OrderedChoice( - Optional(StrMatch("foo"), rule_name="a"), - ZeroOrMore(StrMatch("bar"), rule_name="b"), - OneOrMore(StrMatch("baz"), rule_name="c"), - rule_name="d"), "('foo'?=a|'bar'*=b|'baz'+=c)=d"), - (Sequence( - OrderedChoice(StrMatch("foo"), StrMatch("bar"), rule_name="a"), - OrderedChoice(StrMatch("x"), StrMatch("y"), rule_name="b"), - rule_name="c"), "(('foo'|'bar')=a ('x'|'y')=b)=c") + @pytest.mark.parametrize("expr, text, expected", [ + (ZeroOrMore(StrMatch("one"), sep=","), "one,", [CNC("foo", source="one"), UTN(",")]), + (StrMatch("one"), "one two", [CNC("foo", source="one"), UTN(" two")]), + (StrMatch("one"), "two one", [UTN("two "), CNC("foo", source="one")]), ]) - def test_i_can_encode_grammar(self, rule, expected): - foo = Concept(name="foo") - grammar = {foo: rule} - context, parser = self.init([foo], grammar) + def test_i_can_recognize_unknown_concepts(self, expr, text, expected): + my_map = { + "foo": self.bnf_concept("foo", expr) + } - encoded = parser.encode_grammar(parser.concepts_grammars) - assert encoded["c:foo|1001:"] == expected + self.validate_get_concepts_sequences(my_map, text, expected) - bnf_parser = BnfParser() - parse_res = bnf_parser.parse(context, encoded["c:foo|1001:"]) - assert parse_res.status - assert parse_res.value.value == rule + def test_i_can_recognize_unknown_then_they_look_like_known(self): + my_map = { + "one two": self.bnf_concept("one two", Sequence("one", "two")), + "three": self.bnf_concept("three") + } - def test_i_can_encode_grammar_when_concept_simple(self): - foo = Concept(name="foo") - bar = Concept(name="bar") - grammar = {foo: ConceptExpression(bar)} - context, parser = self.init([foo, bar], grammar) + text = "one three" + expected = [UTN("one "), CNC("three", source="three")] + self.validate_get_concepts_sequences(my_map, text, expected) - encoded = parser.encode_grammar(parser.concepts_grammars) - assert encoded["c:foo|1001:"] == "c:bar|1002:=bar" + def test_i_can_remove_duplicates(self): + my_map = { + "one two": self.bnf_concept("one two", Sequence("one", "two")), + "one four": self.bnf_concept("one four", Sequence("one", "four")), + "three": self.bnf_concept("three") + } + sheerka, context, parser = self.init_parser(my_map, singleton=True) + parser.init_from_concepts(context, my_map.values()) - bnf_parser = BnfParser() - parse_res = bnf_parser.parse(context, encoded["c:foo|1001:"]) - assert parse_res.status - assert parse_res.value.value == grammar[foo] + parser.reset_parser(context, "one three") + sequences = parser.get_concepts_sequences() + sequence = parser.get_valid(sequences) - def test_i_can_encode_grammar_when_concepts(self): - foo = Concept(name="foo") - bar = Concept(name="bar") - baz = Concept(name="baz") - grammar = {foo: Sequence( - StrMatch("a"), - OrderedChoice(ConceptExpression(bar), - OneOrMore(ConceptExpression(baz)), rule_name="oc"), rule_name="s")} - context, parser = self.init([foo, bar, baz], grammar) + assert len(sequence) == 1 - encoded = parser.encode_grammar(parser.concepts_grammars) - assert encoded["c:foo|1001:"] == "('a' (c:bar|1002:=bar|c:baz|1003:=baz+)=oc)=s" + @pytest.mark.parametrize("parser_input, expected_status, expected", [ + ("one", True, [CNC("bnf one", source="one")]), # the bnf one is chosen + ("one two", True, [CN("one and two", source="one two")]), + ("three three three", True, [CN("one or more three", source="three three three")]), + ("twenty two", True, [CN("twenties", source="twenty two")]), + ("twenty four", True, [CN("twenties", source="twenty four")]), + ("twenty one", False, [UTN("twenty "), CN("bnf one", source="one")]), + ("twenty two + 1", True, [CN("twenties", source="twenty two"), " + 1"]), - bnf_parser = BnfParser() - parse_res = bnf_parser.parse(context, encoded["c:foo|1001:"]) - assert parse_res.status - assert parse_res.value.value == grammar[foo] + ]) + def test_i_can_parse(self, parser_input, expected_status, expected): + sheerka, context, parser = self.init_parser(init_from_sheerka=True) - def test_i_can_encode_grammar_when_set_concepts(self): - foo = Concept(name="foo") - bar = Concept(name="bar") - baz = Concept(name="baz") - grammar = {foo: Sequence( - StrMatch("a"), - OrderedChoice(bar, - OneOrMore(ConceptExpression(baz)), rule_name="oc"), rule_name="s")} - context = self.get_context() - for c in [foo, bar, baz]: - context.sheerka.add_in_cache(c) - context.sheerka.set_id_if_needed(c, False) - context.sheerka.add_concept_to_set(context, baz, bar) - - parser = BnfNodeParser() - parser.initialize(context, grammar) - - encoded = parser.encode_grammar(parser.concepts_grammars) - assert encoded["c:foo|1001:"] == "('a' (c:bar|1002:=bar|c:baz|1003:=baz+)=oc)=s" - - bnf_parser = BnfParser() - parse_res = bnf_parser.parse(context, encoded["c:foo|1001:"]) - assert parse_res.status - - expected = Sequence( - StrMatch("a"), - OrderedChoice(ConceptGroupExpression(bar, rule_name="bar"), - OneOrMore(ConceptExpression(baz, rule_name="baz")), rule_name="oc"), rule_name="s") - assert parse_res.value.value == expected - - def test_i_concept_validation_is_not_set_when_no_variables(self): - foo = Concept(name="foo") - grammar = {foo: "foo"} - - context, res, wrapper, return_value = self.execute([foo], grammar, "foo") - assert not return_value[0].concept.metadata.need_validation - - def test_i_concept_validation_is_set_when_unnamed_variables_are_found(self): - foo = Concept(name="foo") - grammar = {foo: Sequence("foo", OrderedChoice("a", "b"))} - - context, res, wrapper, return_value = self.execute([foo], grammar, "foo a") - assert not return_value[0].concept.metadata.need_validation - - def test_i_concept_validation_is_set_when_named_variables_are_found(self): - foo = Concept(name="foo") - grammar = {foo: Sequence("foo", OrderedChoice("a", "b", rule_name="var"))} - - context, res, wrapper, return_value = self.execute([foo], grammar, "foo a") - assert return_value[0].concept.metadata.need_validation + res = parser.parse(context, parser_input) + expected_array = compute_expected_array(cmap, parser_input, expected) + parser_result = res.value + concepts_nodes = res.value.value + assert res.status == expected_status + assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) + assert concepts_nodes == expected_array + # @pytest.mark.parametrize("parser_input, expected", [ + # ("one", [ + # (True, [CNC("bnf_one", source="one", one="one", body="one")]), + # (True, [CNC("one_or_two", source="one", one="one", body="one")]), + # ]), + # ("two plus two", [ + # (False, [CN("bnf_one"), UTN(" plus "), CN("one_or_two")]), + # (False, [CN("one_or_two"), UTN(" plus "), CN("one_or_two")]), + # ]) + # ]) + # def test_i_can_parse_when_multiple_results(self, parser_input, expected): + # sheerka, context, parser = self.init_parser(init_from_sheerka=True) # - # def test_i_can_parse_basic_arithmetic_operations_and_resolve_properties(self): - # context = self.get_context() - # add = Concept(name="add") - # mult = Concept(name="mult") - # atom = Concept(name="atom") + # res = parser.parse(context, parser_input) + # assert len(res) == len(expected) # - # grammar = { - # add: Sequence(mult, Optional(Sequence(OrderedChoice('+', '-', rule_name="sign"), add))), - # mult: Sequence(atom, Optional(Sequence(OrderedChoice('*', '/'), mult))), - # atom: OrderedChoice(OrderedChoice('1', '2', '3'), Sequence('(', add, ')')), - # } - # - # parser = BnfNodeParser() - # parser.register(grammar) - # - # # res = parser.parse(context, "1") - # # assert len(res) == 3 # add, mult, atom - # # - # # res = parser.parse(context, "1 * 2") - # # assert len(res) == 2 # add and mult - # # - # # res = parser.parse(context, "1 + 2") - # # assert res.status - # # assert return_value == [ConceptNode(add, 0, 4, source="1 + 2")] - # - # res = parser.parse(context, "1 * 2 + 3") - # assert res.status - # assert return_value == [ConceptNode(add, 0, 4, source="1 + 2 + 3")] - - def test_i_can_register_concepts_with_the_same_name(self): - # TODO : concepts are registered by name, - # what when two concepts have the same name ? - pass - - def test_i_can_parse_very_very_long_input(self): - # TODO: In the current implementation, all the tokens are loaded in memory - # It's clearly not the good approach - pass + # for res_i, expected_i in zip(res, expected): + # assert res_i.status == expected_i[0] + # expected_array = compute_expected_array(cmap, parser_input, expected_i[1]) + # assert res_i.value.value == expected_array diff --git a/tests/parsers/test_BnfNodeParser_Old.py b/tests/parsers/test_BnfNodeParser_Old.py new file mode 100644 index 0000000..d3855e5 --- /dev/null +++ b/tests/parsers/test_BnfNodeParser_Old.py @@ -0,0 +1,1305 @@ +# from ast import Str +# +# import pytest +# from core.builtin_concepts import BuiltinConcepts +# from core.concept import Concept, ConceptParts, DoNotResolve +# from core.tokenizer import Tokenizer, TokenKind, Token +# from parsers.BaseNodeParser import cnode, short_cnode +# from parsers.BnfParser import BnfParser +# from parsers.BnfNodeParser_Old import BnfNodeParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \ +# ParsingExpressionVisitor, TerminalNode, NonTerminalNode, ZeroOrMore, OneOrMore, \ +# UnrecognizedTokensNode, ConceptExpression, ConceptGroupExpression +# +# from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka +# +# +# class ConceptVisitor(ParsingExpressionVisitor): +# def __init__(self): +# self.concepts = set() +# +# def visit_ConceptExpression(self, node): +# self.concepts.add(node.concept) +# +# +# def u(parsing_expression, start, end, children=None): +# """ +# u stands for underlying +# :param parsing_expression: +# :param start: +# :param end: +# :param children: +# :return: +# """ +# if isinstance(parsing_expression, str): +# parsing_expression = StrMatch(parsing_expression) +# +# if isinstance(parsing_expression, StrMatch): +# return TerminalNode(parsing_expression, start, end, parsing_expression.to_match) +# +# return NonTerminalNode(parsing_expression, start, end, [], children) +# +# +# def evaluated(concept): +# c = Concept(name=concept.name, body=concept.name) +# +# +# def t(text): +# if text.startswith("'") or text.startswith('"'): +# return Token(TokenKind.STRING, text, 0, 0, 0) +# +# if text.startswith(" "): +# return Token(TokenKind.WHITESPACE, text, 0, 0, 0) +# +# return Token(TokenKind.IDENTIFIER, text, 0, 0, 0) +# +# +# def get_expected(concept, text=None): +# c = Concept(name=concept.name) +# c.compiled[ConceptParts.BODY] = DoNotResolve(text or concept.name) +# c.init_key() +# c.metadata.id = concept.id +# return c +# +# +# def cbody(concept): +# """cbody stands for compiled body""" +# if not ConceptParts.BODY in concept.compiled: +# return None +# return concept.compiled[ConceptParts.BODY] +# +# +# def cprop(concept, prop_name): +# """cbody stands for compiled property""" +# return concept.compiled[prop_name] +# +# +# class TestBnfNodeParser(TestUsingMemoryBasedSheerka): +# +# def init(self, concepts, grammar): +# sheerka = self.get_sheerka(singleton=True) +# context = self.get_context(sheerka) +# for c in concepts: +# context.sheerka.add_in_cache(c) +# context.sheerka.set_id_if_needed(c, False) +# +# parser = BnfNodeParser() +# parser.initialize(context, grammar) +# +# return context, parser +# +# def execute(self, concepts, grammar, text): +# context, parser = self.init(concepts, grammar) +# +# res = parser.parse(context, text) +# wrapper = res.value +# return_value = res.value.value +# +# return context, res, wrapper, return_value +# +# +# @pytest.mark.parametrize("match, text", [ +# ("foo", "foo"), +# ("'foo'", "'foo'"), +# ("1", "1"), +# ("3.14", "3.14"), +# ("+", "+"), +# (StrMatch("foo"), "foo"), +# (StrMatch("'foo'"), "'foo'"), +# (StrMatch("1"), "1"), +# (StrMatch("3.14"), "3.14"), +# (StrMatch("+"), "+"), +# ]) +# def test_i_can_match_simple_tokens(self, match, text): +# foo = Concept(name="foo") +# grammar = {foo: match} +# +# context, res, wrapper, return_value = self.execute([foo], grammar, text) +# +# assert res.status +# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) +# assert return_value == [ConceptNode(get_expected(foo, text), 0, 0, source=text, underlying=u(match, 0, 0))] +# +# +# def test_i_can_match_multiple_concepts_in_one_input(self): +# one = Concept(name="one") +# two = Concept(name="two") +# grammar = {one: "one", two: "two"} +# +# context, res, wrapper, return_value = self.execute([one, two], grammar, "one two one") +# +# assert res.status +# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) +# assert return_value == [ +# ConceptNode(get_expected(one), 0, 0, source="one", underlying=u("one", 0, 0)), +# ConceptNode(get_expected(two), 2, 2, source="two", underlying=u("two", 2, 2)), +# ConceptNode(get_expected(one), 4, 4, source="one", underlying=u("one", 4, 4)), +# ] +# +# +# def test_i_can_match_sequence(self): +# foo = Concept(name="foo") +# grammar = {foo: Sequence("one", "two", "three")} +# +# context, res, wrapper, return_value = self.execute([foo], grammar, "one two three") +# +# assert res.status +# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) +# assert return_value == [ +# ConceptNode( +# get_expected(foo, "one two three"), +# 0, +# 4, +# source="one two three", +# underlying=u(grammar[foo], 0, 4, [ +# u("one", 0, 0), +# u("two", 2, 2), +# u("three", 4, 4)]))] +# +# +# def test_i_always_choose_the_longest_match(self): +# foo = Concept(name="foo") +# bar = Concept(name="bar") +# grammar = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")} +# +# context, res, wrapper, return_value = self.execute([foo, bar], grammar, "one two three") +# +# assert res.status +# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) +# assert return_value == [cnode("foo", 0, 4, "one two three")] +# +# def test_i_can_match_several_sequences(self): +# foo = Concept(name="foo") +# bar = Concept(name="bar") +# grammar = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")} +# +# context, res, wrapper, return_value = self.execute([foo, bar], grammar, "one two three one two") +# +# assert res.status +# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) +# assert return_value == [ +# cnode("foo", 0, 4, "one two three"), +# cnode("bar", 6, 8, "one two"), +# ] +# +# def test_i_can_match_ordered_choice(self): +# foo = Concept(name="foo") +# grammar = {foo: OrderedChoice("one", "two")} +# context, parser = self.init([foo], grammar) +# +# res1 = parser.parse(context, "one") +# assert res1.status +# assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT) +# assert res1.value.body == [cnode("foo", 0, 0, "one")] +# assert res1.value.body[0].underlying == u(grammar[foo], 0, 0, [u("one", 0, 0)]) +# +# res2 = parser.parse(context, "two") +# assert res2.status +# assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT) +# assert res2.value.body == [cnode("foo", 0, 0, "two")] +# assert res2.value.body[0].underlying == u(grammar[foo], 0, 0, [u("two", 0, 0)]) +# +# res3 = parser.parse(context, "three") +# assert not res3.status +# assert context.sheerka.isinstance(res3.value, BuiltinConcepts.PARSER_RESULT) +# assert res3.value.value == [ +# UnrecognizedTokensNode(0, 0, [t("three")]) +# ] +# +# def test_i_cannot_match_ordered_choice_with_empty_alternative(self): +# foo = Concept(name="foo") +# grammar = {foo: Sequence(OrderedChoice("one", ""), "two")} +# +# context, res, wrapper, return_value = self.execute([foo], grammar, "ok") +# +# assert not res.status +# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) +# assert return_value == [ +# UnrecognizedTokensNode(0, 0, [t("ok")]) +# ] +# +# def test_i_can_mix_sequences_and_ordered_choices(self): +# foo = Concept(name="foo") +# grammar = {foo: Sequence(OrderedChoice("twenty", "thirty"), "one", "ok")} +# +# context, parser = self.init([foo], grammar) +# +# res1 = parser.parse(context, "twenty one ok") +# assert res1.status +# assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT) +# assert res1.value.body == [ConceptNode(get_expected(foo, "twenty one ok"), 0, 4, source="twenty one ok", +# underlying=u(grammar[foo], 0, 4, [ +# u(OrderedChoice("twenty", "thirty"), 0, 0, [u("twenty", 0, 0)]), +# u("one", 2, 2), +# u("ok", 4, 4)]))] +# +# res2 = parser.parse(context, "thirty one ok") +# assert res2.status +# assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT) +# assert res2.value.body == [ConceptNode(get_expected(foo, "thirty one ok"), 0, 4, source="thirty one ok", +# underlying=u(grammar[foo], 0, 4, [ +# u(OrderedChoice("twenty", "thirty"), 0, 0, [u("thirty", 0, 0)]), +# u("one", 2, 2), +# u("ok", 4, 4)]))] +# +# res3 = parser.parse(context, "twenty one") +# assert not res3.status +# assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT) +# assert res3.value.value == [ +# UnrecognizedTokensNode(0, 2, [t("twenty"), t(" "), t("one")]) +# ] +# +# def test_i_can_mix_ordered_choices_and_sequences(self): +# foo = Concept(name="foo") +# grammar = {foo: OrderedChoice(Sequence("twenty", "thirty"), "one")} +# +# context, parser = self.init([foo], grammar) +# +# res = parser.parse(context, "twenty thirty") +# assert res.status +# assert res.value.value == [cnode("foo", 0, 2, "twenty thirty")] +# +# res = parser.parse(context, "one") +# assert res.status +# assert res.value.value == [cnode("foo", 0, 0, "one")] +# +# def test_i_cannot_parse_empty_optional(self): +# foo = Concept(name="foo") +# grammar = {foo: Optional("one")} +# context, parser = self.init([foo], grammar) +# +# res = parser.parse(context, "") +# return_value = res.value +# +# assert not res.status +# assert context.sheerka.isinstance(return_value, BuiltinConcepts.IS_EMPTY) +# +# def test_i_can_parse_optional(self): +# foo = Concept(name="foo") +# grammar = {foo: Optional("one")} +# +# context, res, wrapper, return_value = self.execute([foo], grammar, "one") +# +# assert res.status +# assert return_value == [ConceptNode(get_expected(foo, "one"), 0, 0, source="one", +# underlying=u(grammar[foo], 0, 0, [u("one", 0, 0)]))] +# +# def test_i_can_parse_sequence_starting_with_optional(self): +# foo = Concept(name="foo") +# grammar = {foo: Sequence(Optional("twenty"), "one")} +# context, parser = self.init([foo], grammar) +# +# res = parser.parse(context, "twenty one") +# assert res.status +# assert res.value.body == [ConceptNode( +# get_expected(foo, "twenty one"), 0, 2, +# source="twenty one", +# underlying=u(grammar[foo], 0, 2, +# [ +# u(Optional("twenty"), 0, 0, [u("twenty", 0, 0)]), +# u("one", 2, 2)] +# ))] +# +# res = parser.parse(context, "one") +# assert res.status +# assert res.value.body == [ConceptNode(get_expected(foo, "one"), 0, 0, source="one", +# underlying=u(grammar[foo], 0, 0, [u("one", 0, 0)]))] +# +# def test_i_can_parse_sequence_ending_with_optional(self): +# foo = Concept(name="foo") +# grammar = {foo: Sequence("one", "two", Optional("three"))} +# +# context, parser = self.init([foo], grammar) +# +# res = parser.parse(context, "one two three") +# assert res.status +# assert res.value.body == [cnode("foo", 0, 4, "one two three")] +# +# res = parser.parse(context, "one two") +# assert res.status +# assert res.value.body == [cnode("foo", 0, 2, "one two")] +# +# def test_i_can_parse_sequence_with_optional_in_between(self): +# foo = Concept(name="foo") +# +# grammar = {foo: Sequence("one", Optional("two"), "three")} +# +# context, parser = self.init([foo], grammar) +# +# res = parser.parse(context, "one two three") +# assert res.status +# assert res.value.body == [cnode("foo", 0, 4, "one two three")] +# +# res = parser.parse(context, "one three") +# assert res.status +# assert res.value.body == [cnode("foo", 0, 2, "one three")] +# +# def test_i_cannot_parse_wrong_input_with_optional(self): +# foo = Concept(name="foo") +# grammar = {foo: Optional("one")} +# +# context, res, wrapper, return_value = self.execute([foo], grammar, "two") +# +# assert not res.status +# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) +# assert return_value == [ +# UnrecognizedTokensNode(0, 0, [t("two")]) +# ] +# +# def test_i_can_use_reference(self): +# # when there are multiple matches for the same input +# # Do I need to create a choice concept ? +# # No, create a return value for every possible graph +# +# foo = Concept(name="foo") +# bar = Concept(name="bar") +# grammar = {foo: Sequence("one", "two"), bar: foo} +# context, parser = self.init([foo, bar], grammar) +# res = parser.parse(context, "one two") +# +# assert len(res) == 2 +# +# assert res[0].status +# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) +# assert res[0].value.body == [cnode("foo", 0, 2, "one two")] +# concept_found_1 = res[0].value.body[0].concept +# assert cbody(concept_found_1) == DoNotResolve("one two") +# +# assert res[1].status +# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) +# assert res[1].value.body == [cnode("bar", 0, 2, "one two")] +# concept_found_2 = res[1].value.body[0].concept +# # the body and the prop['foo'] are the same concept 'foo' +# assert cbody(concept_found_2) == get_expected(foo, "one two") +# assert id(cprop(concept_found_2, "foo")) == id(cbody(concept_found_2)) +# +# def test_i_can_use_a_reference_with_a_body(self): +# """ +# Same test than before (test_i_can_use_reference()) +# but this time, the concept 'foo' already has a body. +# :return: +# """ +# +# foo = Concept(name="foo", body="'foo'") +# bar = Concept(name="bar") +# grammar = {foo: Sequence("one", "two"), bar: foo} +# context, parser = self.init([foo, bar], grammar) +# res = parser.parse(context, "one two") +# +# assert len(res) == 2 +# +# assert res[0].status +# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) +# assert res[0].value.body == [cnode("foo", 0, 2, "one two")] +# concept_found_1 = res[0].value.body[0].concept +# assert concept_found_1.metadata.body == "'foo'" +# assert cbody(concept_found_1) is None +# +# assert res[1].status +# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) +# assert res[1].value.body == [cnode("bar", 0, 2, "one two")] +# concept_found_2 = res[1].value.body[0].concept +# assert cbody(concept_found_2) == foo +# # the body and the prop['foo'] are the same concept 'foo' +# assert id(cprop(concept_found_2, "foo")) == id(cbody(concept_found_2)) +# +# def test_i_can_use_context_reference_with_multiple_levels(self): +# """ +# Same than previous one, but with reference of reference +# :return: +# """ +# +# foo = Concept(name="foo") +# bar = Concept(name="bar") +# baz = Concept(name="baz") +# grammar = {foo: Sequence("one", "two"), bar: foo, baz: bar} +# context, parser = self.init([foo, bar, baz], grammar) +# +# res = parser.parse(context, "one two") +# assert len(res) == 3 +# +# assert res[0].status +# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) +# assert res[0].value.body == [cnode("foo", 0, 2, "one two")] +# concept_found_1 = res[0].value.body[0].concept +# assert cbody(concept_found_1) == DoNotResolve("one two") +# +# assert res[1].status +# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) +# assert res[1].value.body == [cnode("bar", 0, 2, "one two")] +# concept_found_2 = res[1].value.body[0].concept +# assert cbody(concept_found_2) == get_expected(foo, "one two") +# assert id(cprop(concept_found_2, "foo")) == id(cbody(concept_found_2)) +# +# assert res[2].status +# assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT) +# assert res[2].value.body == [cnode("baz", 0, 2, "one two")] +# concept_found_3 = res[2].value.body[0].concept +# expected_foo = get_expected(foo, "one two") +# assert cbody(concept_found_3) == get_expected(bar, expected_foo) +# assert cprop(concept_found_3, "foo") == expected_foo +# assert id(cprop(concept_found_3, "bar")) == id(cbody(concept_found_3)) +# +# def test_order_is_not_important_when_using_references(self): +# """ +# Same test than test_i_can_use_reference(), +# but this time, 'bar' is declared before 'foo' +# So the order of the result is different +# :return: +# """ +# foo = Concept(name="foo") +# bar = Concept(name="bar") +# grammar = {bar: foo, foo: Sequence("one", "two")} +# context, parser = self.init([foo, bar], grammar) +# +# res = parser.parse(context, "one two") +# assert len(res) == 2 +# assert res[0].value.body == [cnode("bar", 0, 2, "one two")] +# assert res[1].value.body == [cnode("foo", 0, 2, "one two")] +# +# def test_i_can_parse_when_reference(self): +# foo = Concept(name="foo") +# bar = Concept(name="bar") +# grammar = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")} +# context, parser = self.init([foo, bar], grammar) +# +# res = parser.parse(context, "twenty two") +# assert res.status +# assert res.value.body == [cnode("bar", 0, 2, "twenty two")] +# concept_found = res.value.body[0].concept +# assert cbody(concept_found) == DoNotResolve("twenty two") +# assert cprop(concept_found, "foo") == get_expected(foo, "twenty") +# +# res = parser.parse(context, "thirty one") +# assert res.status +# assert res.value.body == [cnode("bar", 0, 2, "thirty one")] +# concept_found = res.value.body[0].concept +# assert cbody(concept_found) == DoNotResolve("thirty one") +# assert cprop(concept_found, "foo") == get_expected(foo, "thirty") +# +# res = parser.parse(context, "twenty") +# assert res.status +# assert res.value.body == [cnode("foo", 0, 0, "twenty")] +# concept_found = res.value.body[0].concept +# assert cbody(concept_found) == DoNotResolve("twenty") +# +# def test_i_can_parse_when_reference_has_a_body(self): +# foo = Concept(name="foo", body="'one'") +# bar = Concept(name="bar") +# grammar = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")} +# context, parser = self.init([foo, bar], grammar) +# +# res = parser.parse(context, "twenty two") +# assert res.status +# assert res.value.body == [cnode("bar", 0, 2, "twenty two")] +# concept_found = res.value.body[0].concept +# assert cbody(concept_found) == DoNotResolve("twenty two") +# assert cprop(concept_found, "foo") == foo +# +# res = parser.parse(context, "twenty") +# assert res.status +# assert res.value.body == [cnode("foo", 0, 0, "twenty")] +# concept_found = res.value.body[0].concept +# assert concept_found.metadata.body == "'one'" +# +# def test_i_can_parse_multiple_results(self): +# foo = Concept(name="foo") +# bar = Concept(name="bar") +# grammar = { +# bar: Sequence("one", "two"), +# foo: Sequence("one", OrderedChoice("two", "three")) +# } +# context, parser = self.init([foo, bar], grammar) +# +# res = parser.parse(context, "one two") +# assert len(res) == 2 +# assert res[0].status +# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) +# assert res[0].value.body == [cnode("bar", 0, 2, "one two")] +# concept_found_0 = res[0].value.body[0].concept +# assert cbody(concept_found_0) == DoNotResolve("one two") +# assert len(concept_found_0.props) == 0 +# +# assert res[1].status +# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) +# assert res[1].value.body == [cnode("foo", 0, 2, "one two")] +# concept_found_1 = res[1].value.body[0].concept +# assert cbody(concept_found_1) == DoNotResolve("one two") +# assert len(concept_found_1.props) == 0 +# +# def test_i_can_parse_multiple_results_times_two(self): +# foo = Concept(name="foo") +# bar = Concept(name="bar") +# grammar = { +# bar: Sequence("one", "two"), +# foo: Sequence("one", OrderedChoice("two", "three")) +# } +# context, parser = self.init([foo, bar], grammar) +# +# res = parser.parse(context, "one two one two") +# assert len(res) == 4 +# assert res[0].status +# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) +# assert res[0].value.body == [short_cnode("bar", "one two"), short_cnode("bar", "one two")] +# +# assert res[1].status +# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) +# assert res[1].value.body == [short_cnode("foo", "one two"), short_cnode("bar", "one two")] +# +# assert res[2].status +# assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT) +# assert res[2].value.body == [short_cnode("bar", "one two"), short_cnode("foo", "one two")] +# +# assert res[3].status +# assert context.sheerka.isinstance(res[3].value, BuiltinConcepts.PARSER_RESULT) +# assert res[3].value.body == [short_cnode("foo", "one two"), short_cnode("foo", "one two")] +# +# def test_i_can_parse_multiple_results_when_reference(self): +# """ +# TODO : There should no be two answer, has the one with bar is totally useless +# Note that bar = Sequence(foo, OrderedChoice("one", "two")) does not match +# +# :return: +# """ +# foo = Concept(name="foo") +# bar = Concept(name="bar") +# grammar = { +# bar: Sequence(foo, Optional(OrderedChoice("one", "two"))), +# foo: OrderedChoice("twenty", "thirty") +# } +# context, parser = self.init([foo, bar], grammar) +# +# res = parser.parse(context, "twenty") +# assert len(res) == 2 +# assert res[0].status +# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) +# assert res[0].value.body == [cnode("bar", 0, 0, "twenty")] +# +# assert res[1].status +# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) +# assert res[1].value.body == [cnode("foo", 0, 0, "twenty")] +# +# def test_i_can_parse_concept_reference_that_is_not_in_grammar(self): +# one = Concept(name="one") +# two = Concept(name="two") +# foo = Concept(name="foo") +# grammar = {foo: Sequence("twenty", OrderedChoice(one, two))} +# context, parser = self.init([one, two, foo], grammar) +# +# res = parser.parse(context, "twenty two") +# assert res.status +# assert res.value.body == [cnode("foo", 0, 2, "twenty two")] +# concept_found = res.value.body[0].concept +# assert cbody(concept_found) == DoNotResolve("twenty two") +# assert cprop(concept_found, "two") == get_expected(two, "two") +# +# res = parser.parse(context, "twenty one") +# assert res.status +# assert res.value.body == [cnode("foo", 0, 2, "twenty one")] +# +# def test_i_can_initialize_when_cyclic_reference(self): +# foo = Concept(name="foo") +# grammar = {foo: Optional("one", ConceptExpression("foo"))} +# context, parser = self.init([foo], grammar) +# +# assert parser.concepts_grammars[foo] == Optional("one", ConceptExpression(foo, rule_name="foo")) +# +# def test_i_cannot_initialize_when_cyclic_reference_when_concept_is_under_construction_and_not_known(self): +# foo = Concept(name="foo").init_key() +# grammar = {foo: Optional("one", ConceptExpression("foo"))} +# +# context = self.get_context() +# parser = BnfNodeParser() +# parser.initialize(context, grammar) +# assert parser.concepts_grammars[foo] == Optional("one", ConceptExpression("foo", rule_name="foo")) +# +# def test_i_can_initialize_when_cyclic_reference_when_concept_is_under_construction_and_known(self): +# foo = Concept(name="foo").init_key() +# grammar = {foo: Optional("one", ConceptExpression("foo"))} +# +# context = self.get_context() +# context.concepts["foo"] = foo +# parser = BnfNodeParser() +# parser.initialize(context, grammar) +# assert parser.concepts_grammars[foo] == Optional("one", ConceptExpression(foo, rule_name="foo")) +# +# def test_i_can_parse_concept_reference_that_is_group(self): +# """ +# if one is number, then number is a 'group' +# a group can be found under the sdp entry 'all_' +# """ +# +# context = self.get_context() +# one = Concept(name="one") +# two = Concept(name="two") +# number = Concept(name="number") +# foo = Concept(name="foo") +# for c in [one, two, number, foo]: +# context.sheerka.set_id_if_needed(c, False) +# context.sheerka.add_in_cache(c) +# +# context.sheerka.add_concept_to_set(context, one, number) +# context.sheerka.add_concept_to_set(context, two, number) +# +# grammar = {foo: Sequence("twenty", number)} +# +# parser = BnfNodeParser() +# parser.initialize(context, grammar) +# +# res = parser.parse(context, "twenty two") +# assert res.status +# assert res.value.body == [cnode("foo", 0, 2, "twenty two")] +# concept_found = res.value.body[0].concept +# assert cbody(concept_found) == DoNotResolve("twenty two") +# assert cprop(concept_found, "two") == get_expected(two, "two") +# assert cprop(concept_found, "number") == get_expected(number, get_expected(two, "two")) +# +# res = parser.parse(context, "twenty one") +# assert res.status +# assert res.value.body == [cnode("foo", 0, 2, "twenty one")] +# concept_found = res.value.body[0].concept +# assert cbody(concept_found) == DoNotResolve("twenty one") +# assert cprop(concept_found, "one") == get_expected(one, "one") +# assert cprop(concept_found, "number") == get_expected(number, get_expected(one, "one")) +# +# def test_i_can_parse_zero_or_more(self): +# foo = Concept(name="foo") +# grammar = {foo: ZeroOrMore("one")} +# +# context, res, wrapper, return_value = self.execute([foo], grammar, "one one") +# +# assert res.status +# assert return_value == [cnode("foo", 0, 2, "one one")] +# assert return_value[0].underlying == u(grammar[foo], 0, 2, [u("one", 0, 0), u("one", 2, 2)]) +# +# concept_found = return_value[0].concept +# assert cbody(concept_found) == DoNotResolve("one one") +# +# def test_i_can_parse_sequence_and_zero_or_more(self): +# foo = Concept(name="foo") +# grammar = {foo: Sequence(ZeroOrMore("one"), "two")} +# context, parser = self.init([foo], grammar) +# +# res = parser.parse(context, "one one two") +# assert res.status +# assert res.value.value == [cnode("foo", 0, 4, "one one two")] +# +# res = parser.parse(context, "two") +# assert res.status +# assert res.value.value == [cnode("foo", 0, 0, "two")] +# +# def test_i_cannot_parse_zero_and_more_when_wrong_entry(self): +# # TEST WITH UNRECOGNIZED +# foo = Concept(name="foo") +# grammar = {foo: ZeroOrMore("one")} +# context, parser = self.init([foo], grammar) +# +# parser = BnfNodeParser() +# parser.initialize(context, grammar) +# +# res = parser.parse(context, "one two") +# assert not res.status +# assert res.value.value == [ +# cnode("foo", 0, 0, "one"), +# UnrecognizedTokensNode(1, 2, [t(" "), t("two")]) +# ] +# +# res = parser.parse(context, "two") +# assert not res.status +# assert res.value.value == [ +# UnrecognizedTokensNode(0, 0, [t("two")]) +# ] +# +# def test_i_can_parse_zero_and_more_with_separator(self): +# foo = Concept(name="foo") +# grammar = {foo: ZeroOrMore("one", sep=",")} +# +# context, res, wrapper, return_value = self.execute([foo], grammar, "one, one , one") +# +# assert res.status +# assert return_value == [cnode("foo", 0, 7, "one, one , one")] +# +# def test_that_zero_and_more_is_greedy(self): +# foo = Concept(name="foo") +# bar = Concept(name="bar") +# grammar = {foo: ZeroOrMore("one"), bar: "one"} +# +# context, res, wrapper, return_value = self.execute([foo], grammar, "one one one") +# +# assert res.status +# assert return_value == [cnode("foo", 0, 4, "one one one")] +# +# ############## +# ## YOU STOPPED HERE +# +# # next one to do is below +# ############# +# +# +# def test_i_can_parse_one_and_more(self): +# foo = Concept(name="foo") +# grammar = {foo: OneOrMore("one")} +# +# context, res, wrapper, return_value = self.execute([foo], grammar, "one one") +# +# assert res.status +# assert return_value == [cnode("foo", 0, 2, "one one")] +# assert return_value[0].underlying == u(grammar[foo], 0, 2, [ +# u("one", 0, 0), +# u("one", 2, 2)]) +# +# +# def test_i_can_parse_sequence_and_one_or_more(self): +# foo = Concept(name="foo") +# grammar = {foo: Sequence(OneOrMore("one"), "two")} +# context, parser = self.init([foo], grammar) +# +# res = parser.parse(context, "one one two") +# assert res.status +# assert res.value.value == [cnode("foo", 0, 4, "one one two")] +# +# res = parser.parse(context, "two") +# assert not res.status +# assert res.value.value == [ +# UnrecognizedTokensNode(0, 0, [t("two")]) +# ] +# +# def test_i_can_parse_one_and_more_with_separator(self): +# foo = Concept(name="foo") +# grammar = {foo: OneOrMore("one", sep=",")} +# +# context, res, wrapper, return_value = self.execute([foo], grammar, "one, one , one") +# +# assert res.status +# assert return_value == [cnode("foo", 0, 7, "one, one , one")] +# assert return_value[0].underlying == u(grammar[foo], 0, 7, [ +# u("one", 0, 0), +# u("one", 3, 3), +# u("one", 7, 7)]) +# +# def test_that_one_and_more_is_greedy(self): +# foo = Concept(name="foo") +# bar = Concept(name="bar") +# grammar = {foo: OneOrMore("one"), bar: "one"} +# +# context, res, wrapper, return_value = self.execute([foo], grammar, "one one one") +# +# assert res.status +# assert return_value == [cnode("foo", 0, 4, "one one one")] +# +# @pytest.mark.skip("Done in BaseNode") +# def test_i_can_detect_infinite_recursion(self): +# foo = Concept(name="foo") +# bar = Concept(name="bar") +# +# grammar = { +# bar: foo, +# foo: bar +# } +# parser = BnfNodeParser() +# parser.initialize(self.get_context(), grammar) +# +# assert bar not in parser.concepts_grammars +# assert foo not in parser.concepts_grammars +# +# @pytest.mark.skip("Done in BaseNode") +# def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice(self): +# foo = Concept(name="foo") +# bar = Concept(name="bar") +# grammar = { +# bar: foo, +# foo: OrderedChoice(bar, "foo") +# } +# +# parser = BnfNodeParser() +# parser.initialize(self.get_context(), grammar) +# +# assert foo not in parser.concepts_grammars # removed because of the infinite recursion +# assert bar not in parser.concepts_grammars # removed because of the infinite recursion +# +# # the other way around is possible +# grammar = { +# bar: foo, +# foo: OrderedChoice("foo", bar) +# } +# context, parser = self.init([foo, bar], grammar) +# +# assert foo in parser.concepts_grammars +# assert bar in parser.concepts_grammars +# +# res = parser.parse(context, "foo") +# assert len(res) == 2 +# assert res[0].status +# assert res[0].value.body == [cnode("bar", 0, 0, "foo")] +# assert res[1].status +# assert res[1].value.body == [cnode("foo", 0, 0, "foo")] +# +# def test_i_can_detect_indirect_infinite_recursion_with_sequence(self): +# foo = Concept(name="foo") +# bar = Concept(name="bar") +# +# grammar = { +# bar: foo, +# foo: Sequence("one", bar, "two") +# } +# parser = BnfNodeParser() +# parser.initialize(self.get_context(), grammar) +# +# assert foo not in parser.concepts_grammars # removed because of the infinite recursion +# assert bar not in parser.concepts_grammars # removed because of the infinite recursion +# +# def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choice(self): +# foo = Concept(name="foo") +# bar = Concept(name="bar") +# +# grammar = { +# bar: foo, +# foo: Sequence("one", OrderedChoice(bar, "other"), "two") +# } +# parser = BnfNodeParser() +# parser.initialize(self.get_context(), grammar) +# +# assert foo not in parser.concepts_grammars # removed because of the infinite recursion +# assert bar not in parser.concepts_grammars # removed because of the infinite recursion +# +# def test_infinite_recursion_does_not_fail_if_a_concept_is_missing(self): +# foo = Concept(name="foo") +# bar = Concept(name="bar") +# +# grammar = { +# foo: bar +# } +# parser = BnfNodeParser() +# parser.initialize(self.get_context(), grammar) +# +# assert foo in parser.concepts_grammars +# +# def test_i_can_detect_indirect_infinite_recursion_with_optional(self): +# # TODO infinite recursion with optional +# pass +# +# def test_i_can_detect_indirect_infinite_recursion_with_zero_and_more(self): +# # TODO infinite recursion with optional +# pass +# +# def test_i_can_detect_indirect_infinite_recursion_with_one_and_more(self): +# # TODO infinite recursion with optional +# pass +# +# def test_i_can_visit_parsing_expression(self): +# mult = Concept(name="mult") +# add = Concept(name="add") +# +# visitor = ConceptVisitor() +# visitor.visit(Sequence(mult, Optional(Sequence("+", add)))) +# +# assert sorted(list(visitor.concepts)) == ["add", "mult"] +# +# def test_i_can_initialize_rule_names(self): +# context = self.get_context() +# foo = Concept(name="foo") +# bar = Concept(name="bar") +# +# grammar = {foo: Sequence("one", "two"), bar: foo} +# parser = BnfNodeParser() +# ret = parser.initialize(context, grammar) +# return_value = ret.body +# +# assert return_value[foo].rule_name == "" +# assert return_value[bar].rule_name == "foo" +# +# @pytest.mark.parametrize("text, end_position", [ +# ("foo", 0), +# ("foo bar", 2), +# ("foo bar ", 3), +# (" foo bar ", 4) +# ]) +# def test_cannot_parser_unknown_concepts(self, text, end_position): +# context, res, wrapper, return_value = self.execute([], {}, text) +# tokens = list(Tokenizer(text))[:-1] +# +# assert not res.status +# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) +# assert return_value == [UnrecognizedTokensNode(0, end_position, tokens)] +# +# def test_i_cannot_parse_when_part_of_the_input_is_unrecognized(self): +# one = Concept(name="one") +# two = Concept(name="two") +# grammar = {one: "one", two: "two"} +# +# context, res, wrapper, return_value = self.execute([one, two], grammar, "one two three") +# +# assert not res.status +# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) +# assert return_value == [ +# ConceptNode(get_expected(one, "one"), 0, 0, source="one", underlying=u("one", 0, 0)), +# ConceptNode(get_expected(two, "two"), 2, 2, source="two", underlying=u("two", 2, 2)), +# UnrecognizedTokensNode(3, 4, [t(" "), t("three")]) +# ] +# +# # def test_i_cannot_parse_when_wrong_sequence(self): +# # foo = Concept(name="foo") +# # grammar = {foo: Sequence("one", "two", "three")} +# # +# # context, res, wrapper, return_value = self.execute([foo], grammar, "one two three one") +# # +# # assert not res.status +# # assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) +# # assert return_value == [ +# # short_cnode("foo", "one two three"), +# # UnrecognizedTokensNode(5, 6, [t(" "), t("one")]) +# # ] +# +# # def test_i_cannot_parse_when_sequence_cannot_match_because_of_end_of_file(self): +# # foo = Concept(name="foo") +# # grammar = {foo: Sequence("one", "two", "three")} +# # +# # context, res, wrapper, return_value = self.execute([foo], grammar, "one two") +# # +# # assert not res.status +# # assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) +# # assert return_value == [ +# # UnrecognizedTokensNode(0, 2, [t("one"), t(" "), t("two")]) +# # ] +# +# def test_i_cannot_parse_multiple_results_when_unknown_tokens_at_the_end(self): +# foo = Concept(name="foo") +# bar = Concept(name="bar") +# grammar = { +# bar: Sequence("one", "two"), +# foo: Sequence("one", OrderedChoice("two", "three")) +# } +# context, parser = self.init([foo, bar], grammar) +# +# res = parser.parse(context, "one two four five") +# +# assert len(res) == 2 +# assert not res[0].status +# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) +# assert res[0].value.body == [ +# cnode("bar", 0, 2, "one two"), +# UnrecognizedTokensNode(3, 6, [t(" "), t("four"), t(" "), t("five")]) +# ] +# +# assert not res[1].status +# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) +# assert res[1].value.body == [ +# cnode("foo", 0, 2, "one two"), +# UnrecognizedTokensNode(3, 6, [t(" "), t("four"), t(" "), t("five")]) +# ] +# +# def test_i_cannot_parse_multiple_results_when_beginning_by_unknown_tokens(self): +# foo = Concept(name="foo") +# bar = Concept(name="bar") +# grammar = { +# bar: Sequence("one", "two"), +# foo: Sequence("one", OrderedChoice("two", "three")) +# } +# context, parser = self.init([foo, bar], grammar) +# +# res = parser.parse(context, "four five one two") +# +# assert len(res) == 2 +# assert not res[0].status +# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) +# assert res[0].value.body == [ +# UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), +# cnode("bar", 4, 6, "one two"), +# ] +# +# assert not res[1].status +# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) +# assert res[1].value.body == [ +# UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), +# cnode("foo", 4, 6, "one two"), +# ] +# +# def test_i_cannot_parse_multiple_results_when_surrounded_by_unknown_tokens(self): +# foo = Concept(name="foo") +# bar = Concept(name="bar") +# grammar = { +# bar: Sequence("one", "two"), +# foo: Sequence("one", OrderedChoice("two", "three")) +# } +# context, parser = self.init([foo, bar], grammar) +# +# res = parser.parse(context, "four five one two six seven") +# assert len(res) == 2 +# assert not res[0].status +# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) +# assert res[0].value.body == [ +# UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), +# cnode("bar", 4, 6, "one two"), +# UnrecognizedTokensNode(7, 10, [t(" "), t("six"), t(" "), t("seven")]), +# ] +# +# assert not res[1].status +# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) +# assert res[1].value.body == [ +# UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), +# cnode("foo", 4, 6, "one two"), +# UnrecognizedTokensNode(7, 10, [t(" "), t("six"), t(" "), t("seven")]), +# ] +# +# def test_i_cannot_parse_multiple_results_when_unknown_tokens_in_the_middle(self): +# context = self.get_context() +# foo = Concept(name="foo") +# bar = Concept(name="bar") +# baz = Concept(name="baz") +# grammar = { +# bar: Sequence("one", "two"), +# foo: Sequence("one", OrderedChoice("two", "three")), +# baz: StrMatch("six"), +# } +# context, parser = self.init([foo, bar, baz], grammar) +# +# res = parser.parse(context, "one two four five six") +# assert len(res) == 2 +# assert not res[0].status +# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) +# assert res[0].value.body == [ +# cnode("bar", 0, 2, "one two"), +# UnrecognizedTokensNode(3, 7, [t(" "), t("four"), t(" "), t("five"), t(" ")]), +# cnode("baz", 8, 8, "six"), +# ] +# +# assert not res[1].status +# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) +# assert res[1].value.body == [ +# cnode("foo", 0, 2, "one two"), +# UnrecognizedTokensNode(3, 7, [t(" "), t("four"), t(" "), t("five"), t(" ")]), +# cnode("baz", 8, 8, "six"), +# ] +# +# def test_i_can_get_the_inner_concept_when_possible(self): +# foo = Concept(name="foo") +# one = Concept(name="one") +# grammar = {foo: Sequence(Optional(ZeroOrMore(one)), ZeroOrMore("one"))} +# +# context, res, wrapper, return_value = self.execute([foo, one], grammar, "one") +# +# assert res.status +# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) +# assert return_value == [cnode("foo", 0, 0, "one")] +# concept_found = return_value[0].concept +# assert cbody(concept_found) == get_expected(one, "one") +# assert id(cprop(concept_found, "one")) == id(cbody(concept_found)) +# +# def test_i_can_get_the_inner_concept_when_possible_with_rule_name(self): +# foo = Concept(name="foo") +# one = Concept(name="one") +# grammar = {foo: Sequence( +# Optional(ZeroOrMore(one, rule_name="zero"), rule_name="opt"), +# ZeroOrMore("one"), rule_name="seq")} +# +# context, res, wrapper, return_value = self.execute([foo, one], grammar, "one") +# +# assert res.status +# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) +# assert return_value == [cnode("foo", 0, 0, "one")] +# concept_found = return_value[0].concept +# assert cbody(concept_found) == get_expected(one, "one") +# assert id(cprop(concept_found, "one")) == id(cbody(concept_found)) +# assert id(cprop(concept_found, "zero")) == id(cbody(concept_found)) +# assert id(cprop(concept_found, "opt")) == id(cbody(concept_found)) +# assert id(cprop(concept_found, "seq")) == id(cbody(concept_found)) +# +# def test_i_get_multiple_props_when_zero_or_more(self): +# foo = Concept(name="foo") +# one = Concept(name="one") +# grammar = {foo: ZeroOrMore(one)} +# +# context, res, wrapper, return_value = self.execute([foo, one], grammar, "one one one") +# assert res.status +# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) +# assert return_value == [cnode("foo", 0, 4, "one one one")] +# concept_found = return_value[0].concept +# assert cbody(concept_found) == DoNotResolve("one one one") +# assert len(concept_found.compiled["one"]) == 3 +# assert cprop(concept_found, "one")[0] == get_expected(one) +# assert cprop(concept_found, "one")[1] == get_expected(one) +# assert cprop(concept_found, "one")[2] == get_expected(one) +# assert id(cprop(concept_found, "one")[0]) != id(cprop(concept_found, "one")[1]) +# assert id(cprop(concept_found, "one")[1]) != id(cprop(concept_found, "one")[2]) +# assert id(cprop(concept_found, "one")[2]) != id(cprop(concept_found, "one")[0]) +# +# def test_i_get_multiple_props_when_zero_or_more_and_different_values(self): +# foo = Concept(name="foo") +# one = Concept(name="one") +# grammar = {foo: ZeroOrMore(Sequence(one, "ok", rule_name="seq")), one: OrderedChoice("one", "un", "uno")} +# +# context, res, wrapper, return_value = self.execute([foo, one], grammar, "one ok un ok uno ok") +# assert res.status +# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) +# assert return_value == [short_cnode("foo", "one ok un ok uno ok")] +# concept_found = return_value[0].concept +# assert cprop(concept_found, "one")[0] == get_expected(one, "one") +# assert cprop(concept_found, "one")[1] == get_expected(one, "un") +# assert cprop(concept_found, "one")[2] == get_expected(one, "uno") +# assert cprop(concept_found, "seq")[0] == DoNotResolve("one ok") +# assert cprop(concept_found, "seq")[1] == DoNotResolve("un ok") +# assert cprop(concept_found, "seq")[2] == DoNotResolve("uno ok") +# +# @pytest.mark.parametrize("rule, expected", [ +# (StrMatch("string"), "'string'"), +# (StrMatch("string", rule_name="rule_name"), "'string'=rule_name"), +# (Sequence(StrMatch("foo"), StrMatch("bar")), "('foo' 'bar')"), +# (Sequence(StrMatch("foo"), StrMatch("bar"), rule_name="rule_name"), "('foo' 'bar')=rule_name"), +# (OrderedChoice(StrMatch("foo"), StrMatch("bar")), "('foo'|'bar')"), +# (OrderedChoice(StrMatch("foo"), StrMatch("bar"), rule_name="rule_name"), "('foo'|'bar')=rule_name"), +# (Optional(StrMatch("foo")), "'foo'?"), +# (Optional(StrMatch("foo"), rule_name="rule_name"), "'foo'?=rule_name"), +# (ZeroOrMore(StrMatch("foo")), "'foo'*"), +# (ZeroOrMore(StrMatch("foo"), rule_name="rule_name"), "'foo'*=rule_name"), +# (OneOrMore(StrMatch("foo")), "'foo'+"), +# (OneOrMore(StrMatch("foo"), rule_name="rule_name"), "'foo'+=rule_name"), +# (Sequence( +# Optional(StrMatch("foo"), rule_name="a"), +# ZeroOrMore(StrMatch("bar"), rule_name="b"), +# OneOrMore(StrMatch("baz"), rule_name="c"), +# rule_name="d"), "('foo'?=a 'bar'*=b 'baz'+=c)=d"), +# (OrderedChoice( +# Optional(StrMatch("foo"), rule_name="a"), +# ZeroOrMore(StrMatch("bar"), rule_name="b"), +# OneOrMore(StrMatch("baz"), rule_name="c"), +# rule_name="d"), "('foo'?=a|'bar'*=b|'baz'+=c)=d"), +# (Sequence( +# OrderedChoice(StrMatch("foo"), StrMatch("bar"), rule_name="a"), +# OrderedChoice(StrMatch("x"), StrMatch("y"), rule_name="b"), +# rule_name="c"), "(('foo'|'bar')=a ('x'|'y')=b)=c") +# ]) +# def test_i_can_encode_grammar(self, rule, expected): +# foo = Concept(name="foo") +# grammar = {foo: rule} +# context, parser = self.init([foo], grammar) +# +# encoded = parser.encode_grammar(parser.concepts_grammars) +# assert encoded["c:foo|1001:"] == expected +# +# bnf_parser = BnfParser() +# parse_res = bnf_parser.parse(context, encoded["c:foo|1001:"]) +# assert parse_res.status +# assert parse_res.value.value == rule +# +# def test_i_can_encode_grammar_when_concept_simple(self): +# foo = Concept(name="foo") +# bar = Concept(name="bar") +# grammar = {foo: ConceptExpression(bar)} +# context, parser = self.init([foo, bar], grammar) +# +# encoded = parser.encode_grammar(parser.concepts_grammars) +# assert encoded["c:foo|1001:"] == "c:bar|1002:=bar" +# +# bnf_parser = BnfParser() +# parse_res = bnf_parser.parse(context, encoded["c:foo|1001:"]) +# assert parse_res.status +# assert parse_res.value.value == grammar[foo] +# +# def test_i_can_encode_grammar_when_concepts(self): +# foo = Concept(name="foo") +# bar = Concept(name="bar") +# baz = Concept(name="baz") +# grammar = {foo: Sequence( +# StrMatch("a"), +# OrderedChoice(ConceptExpression(bar), +# OneOrMore(ConceptExpression(baz)), rule_name="oc"), rule_name="s")} +# context, parser = self.init([foo, bar, baz], grammar) +# +# encoded = parser.encode_grammar(parser.concepts_grammars) +# assert encoded["c:foo|1001:"] == "('a' (c:bar|1002:=bar|c:baz|1003:=baz+)=oc)=s" +# +# bnf_parser = BnfParser() +# parse_res = bnf_parser.parse(context, encoded["c:foo|1001:"]) +# assert parse_res.status +# assert parse_res.value.value == grammar[foo] +# +# def test_i_can_encode_grammar_when_set_concepts(self): +# foo = Concept(name="foo") +# bar = Concept(name="bar") +# baz = Concept(name="baz") +# grammar = {foo: Sequence( +# StrMatch("a"), +# OrderedChoice(bar, +# OneOrMore(ConceptExpression(baz)), rule_name="oc"), rule_name="s")} +# context = self.get_context() +# for c in [foo, bar, baz]: +# context.sheerka.add_in_cache(c) +# context.sheerka.set_id_if_needed(c, False) +# context.sheerka.add_concept_to_set(context, baz, bar) +# +# parser = BnfNodeParser() +# parser.initialize(context, grammar) +# +# encoded = parser.encode_grammar(parser.concepts_grammars) +# assert encoded["c:foo|1001:"] == "('a' (c:bar|1002:=bar|c:baz|1003:=baz+)=oc)=s" +# +# bnf_parser = BnfParser() +# parse_res = bnf_parser.parse(context, encoded["c:foo|1001:"]) +# assert parse_res.status +# +# expected = Sequence( +# StrMatch("a"), +# OrderedChoice(ConceptGroupExpression(bar, rule_name="bar"), +# OneOrMore(ConceptExpression(baz, rule_name="baz")), rule_name="oc"), rule_name="s") +# assert parse_res.value.value == expected +# +# def test_i_concept_validation_is_not_set_when_no_variables(self): +# foo = Concept(name="foo") +# grammar = {foo: "foo"} +# +# context, res, wrapper, return_value = self.execute([foo], grammar, "foo") +# assert not return_value[0].concept.metadata.need_validation +# +# def test_i_concept_validation_is_set_when_unnamed_variables_are_found(self): +# foo = Concept(name="foo") +# grammar = {foo: Sequence("foo", OrderedChoice("a", "b"))} +# +# context, res, wrapper, return_value = self.execute([foo], grammar, "foo a") +# assert not return_value[0].concept.metadata.need_validation +# +# def test_i_concept_validation_is_set_when_named_variables_are_found(self): +# foo = Concept(name="foo") +# grammar = {foo: Sequence("foo", OrderedChoice("a", "b", rule_name="var"))} +# +# context, res, wrapper, return_value = self.execute([foo], grammar, "foo a") +# assert return_value[0].concept.metadata.need_validation +# +# +# # +# # def test_i_can_parse_basic_arithmetic_operations_and_resolve_properties(self): +# # context = self.get_context() +# # add = Concept(name="add") +# # mult = Concept(name="mult") +# # atom = Concept(name="atom") +# # +# # grammar = { +# # add: Sequence(mult, Optional(Sequence(OrderedChoice('+', '-', rule_name="sign"), add))), +# # mult: Sequence(atom, Optional(Sequence(OrderedChoice('*', '/'), mult))), +# # atom: OrderedChoice(OrderedChoice('1', '2', '3'), Sequence('(', add, ')')), +# # } +# # +# # parser = BnfNodeParser() +# # parser.register(grammar) +# # +# # # res = parser.parse(context, "1") +# # # assert len(res) == 3 # add, mult, atom +# # # +# # # res = parser.parse(context, "1 * 2") +# # # assert len(res) == 2 # add and mult +# # # +# # # res = parser.parse(context, "1 + 2") +# # # assert res.status +# # # assert return_value == [ConceptNode(add, 0, 4, source="1 + 2")] +# # +# # res = parser.parse(context, "1 * 2 + 3") +# # assert res.status +# # assert return_value == [ConceptNode(add, 0, 4, source="1 + 2 + 3")] +# +# def test_i_can_register_concepts_with_the_same_name(self): +# # TODO : concepts are registered by name, +# # what when two concepts have the same name ? +# pass +# +# def test_i_can_parse_very_very_long_input(self): +# # TODO: In the current implementation, all the tokens are loaded in memory +# # It's clearly not the good approach +# pass diff --git a/tests/parsers/test_BnfParser.py b/tests/parsers/test_BnfParser.py index b0ffc3b..7a5230e 100644 --- a/tests/parsers/test_BnfParser.py +++ b/tests/parsers/test_BnfParser.py @@ -1,13 +1,12 @@ import pytest - from core.builtin_concepts import BuiltinConcepts -from core.concept import Concept +from core.concept import Concept, DEFINITION_TYPE_BNF from core.tokenizer import Tokenizer, TokenKind, LexerError, Token from parsers.BaseNodeParser import cnode from parsers.BaseParser import UnexpectedTokenErrorNode -from parsers.BnfParser import BnfParser, UnexpectedEndOfFileError from parsers.BnfNodeParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \ BnfNodeParser, ConceptExpression +from parsers.BnfParser import BnfParser, UnexpectedEndOfFileError from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka @@ -27,6 +26,12 @@ eof_token = Token(TokenKind.EOF, "", 0, 0, 0) class TestBnfParser(TestUsingMemoryBasedSheerka): + def init_parser(self, *concepts): + sheerka, context, *updated = self.init_concepts(*concepts, singleton=True) + parser = BnfParser() + + return sheerka, context, parser, *updated + @pytest.mark.parametrize("expression, expected", [ ("'str'", StrMatch("str")), ("1", StrMatch("1")), @@ -80,8 +85,9 @@ class TestBnfParser(TestUsingMemoryBasedSheerka): OneOrMore(StrMatch("b"), rule_name="y"), rule_name="z")), ]) def test_i_can_parse_regex(self, expression, expected): - parser = BnfParser() - res = parser.parse(self.get_context(), Tokenizer(expression)) + sheerka, context, parser = self.init_parser() + + res = parser.parse(context, Tokenizer(expression)) assert not parser.has_error assert res.status @@ -96,16 +102,11 @@ class TestBnfParser(TestUsingMemoryBasedSheerka): ("'str' = var", Sequence(StrMatch("str"), StrMatch("="), c("var"))), ("'str''='var", Sequence(StrMatch("str"), StrMatch("="), c("var"))), ("foo=f", c("foo", "f")), + ("foo=f 'constant'", Sequence(c("foo", "f"), StrMatch("constant"))), ]) def test_i_can_parse_regex_with_concept(self, expression, expected): - foo = Concept("foo") - bar = Concept("bar") - var = Concept("var") - context = self.get_context() + sheerka, context, parser, foo, bar, var = self.init_parser("foo", "bar", "var") - for c in (foo, bar, var): - context.sheerka.add_in_cache(c) - parser = BnfParser() res = parser.parse(context, Tokenizer(expression)) assert not parser.has_error @@ -118,10 +119,9 @@ class TestBnfParser(TestUsingMemoryBasedSheerka): ("foo=f", ConceptExpression("foo", rule_name="f")), ]) def test_i_can_parse_regex_with_concept_when_the_concept_is_still_under_definition(self, expression, expected): - context = self.get_context() + sheerka, context, parser = self.init_parser() context.obj = ClassWithName("foo") - parser = BnfParser() res = parser.parse(context, Tokenizer(expression)) assert not parser.has_error @@ -137,47 +137,43 @@ class TestBnfParser(TestUsingMemoryBasedSheerka): ("'name", LexerError("Missing Trailing quote", "'name", 5, 1, 6)) ]) def test_i_can_detect_errors(self, expression, error): - parser = BnfParser() - res = parser.parse(self.get_context(), Tokenizer(expression)) + sheerka, context, parser = self.init_parser() + + res = parser.parse(context, Tokenizer(expression)) ret_value = res.value.value assert parser.has_error assert not res.status assert ret_value[0] == error def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text(self): - foo = Concept(name="foo") - bar = Concept(name="bar") - context = self.get_context() - context.sheerka.add_in_cache(foo) - context.sheerka.add_in_cache(bar) + sheerka, context, regex_parser, foo, bar = self.init_parser("foo", "bar") - regex_parser = BnfParser() - foo_definition = regex_parser.parse(context, "'twenty' | 'thirty'").value.value - bar_definition = regex_parser.parse(context, "foo ('one' | 'two')").value.value + for concept in [foo, bar]: + concept.metadata.definition_type = DEFINITION_TYPE_BNF - concepts = {bar: bar_definition, foo: foo_definition} - concept_parser = BnfNodeParser() - concept_parser.initialize(context, concepts) + foo.bnf = regex_parser.parse(context, "'twenty' | 'thirty'").value.value + bar.bnf = regex_parser.parse(context, "foo ('one' | 'two')").value.value - res = concept_parser.parse(context, "twenty two") + bnf_parser = BnfNodeParser() + bnf_parser.init_from_concepts(context, [foo, bar]) + + res = bnf_parser.parse(context, "twenty two") assert res.status assert res.value.body == [cnode("bar", 0, 2, "twenty two")] - res = concept_parser.parse(context, "thirty one") + res = bnf_parser.parse(context, "thirty one") assert res.status assert res.value.body == [cnode("bar", 0, 2, "thirty one")] - res = concept_parser.parse(context, "twenty") + res = bnf_parser.parse(context, "twenty") assert res.status assert res.value.body == [cnode("foo", 0, 0, "twenty")] def test_i_cannot_parse_when_too_many_concepts(self): - foo1 = Concept(name="foo", body="1") - foo2 = Concept(name="foo", body="2") - context = self.get_context() - context.sheerka.cache_by_key["foo"] = [foo1, foo2] + sheerka, context, regex_parser, foo1, foo2 = self.init_parser( + Concept(name="foo", body="1"), + Concept(name="foo", body="2")) - regex_parser = BnfParser() res = regex_parser.parse(context, "foo") assert not res.status @@ -185,9 +181,7 @@ class TestBnfParser(TestUsingMemoryBasedSheerka): assert res.value.body == ('key', 'foo') def test_i_cannot_parse_when_unknown_concept(self): - context = self.get_context() - - regex_parser = BnfParser() + sheerka, context, regex_parser = self.init_parser() res = regex_parser.parse(self.get_context(), "foo") assert not res.status diff --git a/tests/parsers/test_ConceptsWithConceptsParser.py b/tests/parsers/test_ConceptsWithConceptsParser.py index 97cc2d3..caaa378 100644 --- a/tests/parsers/test_ConceptsWithConceptsParser.py +++ b/tests/parsers/test_ConceptsWithConceptsParser.py @@ -1,194 +1,193 @@ -import ast - -import pytest - -from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts -from core.concept import Concept -from core.tokenizer import Token, TokenKind, Tokenizer -from parsers.BaseNodeParser import SourceCodeNode -from parsers.BnfNodeParser import ConceptNode, UnrecognizedTokensNode -from parsers.ConceptsWithConceptsParser import ConceptsWithConceptsParser -from parsers.MultipleConceptsParser import MultipleConceptsParser -from parsers.PythonParser import PythonNode - -from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka - -multiple_concepts_parser = MultipleConceptsParser() - - -def ret_val(*args): - result = [] - index = 0 - source = "" - for item in args: - if isinstance(item, Concept): - tokens = [Token(TokenKind.IDENTIFIER, item.name, 0, 0, 0)] - result.append(ConceptNode(item, index, index, tokens, item.name)) - index += 1 - source += item.name - elif isinstance(item, PythonNode): - tokens = list(Tokenizer(item.source))[:-1] # strip trailing EOF - result.append(SourceCodeNode(item, index, index + len(tokens) - 1, tokens, item.source)) - index += len(tokens) - source += item.source - else: - tokens = list(Tokenizer(item))[:-1] # strip trailing EOF - result.append(UnrecognizedTokensNode(index, index + len(tokens) - 1, tokens)) - index += len(tokens) - source += item - - return ReturnValueConcept( - "who", - False, - ParserResultConcept(parser=multiple_concepts_parser, value=result, source=source)) - - -class TestConceptsWithConceptsParser(TestUsingMemoryBasedSheerka): - - def init(self, concepts, inputs): - context = self.get_context() - for concept in concepts: - context.sheerka.create_new_concept(context, concept) - - return context, ret_val(*inputs) - - def execute(self, concepts, inputs): - context, input_return_values = self.init(concepts, inputs) - - parser = ConceptsWithConceptsParser() - result = parser.parse(context, input_return_values.body) - - wrapper = result.body - return_value = result.body.body - - return context, parser, result, wrapper, return_value - - @pytest.mark.parametrize("text, interested", [ - ("not parser result", False), - (ParserResultConcept(parser="not multiple_concepts_parser"), False), - (ParserResultConcept(parser=multiple_concepts_parser, value=[UnrecognizedTokensNode(0, 0, [])]), True), - ]) - def test_not_interested(self, text, interested): - context = self.get_context() - - res = ConceptsWithConceptsParser().parse(context, text) - if interested: - assert res is not None - else: - assert res is None - - def test_i_can_parse_composition_of_concepts(self): - foo = Concept("foo") - bar = Concept("bar") - plus = Concept("a plus b").def_prop("a").def_prop("b") - - context, parser, result, wrapper, return_value = self.execute([foo, bar, plus], [foo, " plus ", bar]) - - assert result.status - assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert result.who == wrapper.parser.name - assert wrapper.source == "foo plus bar" - assert context.sheerka.isinstance(return_value, plus) - - assert return_value.compiled["a"] == foo - assert return_value.compiled["b"] == bar - - # sanity check, I can evaluate the result - evaluated = context.sheerka.evaluate_concept(self.get_context(context.sheerka, True), return_value) - assert evaluated.key == return_value.key - assert evaluated.get_prop("a") == foo.init_key() - assert evaluated.get_prop("b") == bar.init_key() - - def test_i_can_parse_when_composition_of_source_code(self): - plus = Concept("a plus b", body="a + b").def_prop("a").def_prop("b") - left = PythonNode("1+1", ast.parse("1+1", mode="eval")) - right = PythonNode("2+2", ast.parse("2+2", mode="eval")) - context, parser, result, wrapper, return_value = self.execute([plus], [left, " plus ", right]) - - assert result.status - assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert result.who == wrapper.parser.name - assert wrapper.source == "1+1 plus 2+2" - assert context.sheerka.isinstance(return_value, plus) - - left_parser_result = ParserResultConcept(parser=parser, source="1+1", value=left) - right_parser_result = ParserResultConcept(parser=parser, source="2+2", value=right) - assert return_value.compiled["a"] == [ReturnValueConcept(parser.name, True, left_parser_result)] - assert return_value.compiled["b"] == [ReturnValueConcept(parser.name, True, right_parser_result)] - - # sanity check, I can evaluate the result - evaluated = context.sheerka.evaluate_concept(self.get_context(context.sheerka, True), return_value) - assert evaluated.key == return_value.key - assert evaluated.get_prop("a") == 2 - assert evaluated.get_prop("b") == 4 - assert evaluated.body == 6 - - def test_i_can_parse_when_mix_of_concept_and_code(self): - plus = Concept("a plus b").def_prop("a").def_prop("b") - code = PythonNode("1+1", ast.parse("1+1", mode="eval")) - foo = Concept("foo") - context, parser, result, wrapper, return_value = self.execute([plus, foo], [foo, " plus ", code]) - - assert result.status - assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert result.who == wrapper.parser.name - assert wrapper.source == "foo plus 1+1" - assert context.sheerka.isinstance(return_value, plus) - - code_parser_result = ParserResultConcept(parser=parser, source="1+1", value=code) - assert return_value.compiled["a"] == foo - assert return_value.compiled["b"] == [ReturnValueConcept(parser.name, True, code_parser_result)] - - # sanity check, I can evaluate the result - evaluated = context.sheerka.evaluate_concept(self.get_context(context.sheerka, True), return_value) - assert evaluated.key == return_value.key - assert evaluated.get_prop("a") == foo.init_key() - assert evaluated.get_prop("b") == 2 - - def test_i_can_parse_when_multiple_concepts_are_recognized(self): - foo = Concept("foo") - bar = Concept("bar") - plus_1 = Concept("a plus b", body="body1").def_prop("a").def_prop("b") - plus_2 = Concept("a plus b", body="body2").def_prop("a").def_prop("b") - - context, input_return_values = self.init([foo, bar, plus_1, plus_2], [foo, " plus ", bar]) - parser = ConceptsWithConceptsParser() - result = parser.parse(context, input_return_values.body) - - assert len(result) == 2 - - res = result[0] - wrapper = res.value - return_value = res.value.value - assert res.status - assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert res.who == wrapper.parser.name - assert wrapper.source == "foo plus bar" - assert context.sheerka.isinstance(return_value, plus_1) - assert return_value.compiled["a"] == foo - assert return_value.compiled["b"] == bar - - res = result[1] - wrapper = res.value - return_value = res.value.value - assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert res.who == wrapper.parser.name - assert wrapper.source == "foo plus bar" - assert context.sheerka.isinstance(return_value, plus_2) - assert return_value.compiled["a"] == foo - assert return_value.compiled["b"] == bar - - def test_i_cannot_parse_when_unknown_concept(self): - foo = Concept("foo") - bar = Concept("bar") - - context, input_return_values = self.init([foo, bar], [foo, " plus ", bar]) - parser = ConceptsWithConceptsParser() - result = parser.parse(context, input_return_values.body) - wrapper = result.body - return_value = result.body.body - - assert not result.status - assert context.sheerka.isinstance(wrapper, BuiltinConcepts.NOT_FOR_ME) - assert result.who == parser.name - assert return_value == input_return_values.body.body +# import ast +# +# import pytest +# +# from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts +# from core.concept import Concept +# from core.tokenizer import Token, TokenKind, Tokenizer +# from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode +# from parsers.ConceptsWithConceptsParser import ConceptsWithConceptsParser +# from parsers.MultipleConceptsParser import MultipleConceptsParser +# from parsers.PythonParser import PythonNode +# +# from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka +# +# multiple_concepts_parser = MultipleConceptsParser() +# +# +# def ret_val(*args): +# result = [] +# index = 0 +# source = "" +# for item in args: +# if isinstance(item, Concept): +# tokens = [Token(TokenKind.IDENTIFIER, item.name, 0, 0, 0)] +# result.append(ConceptNode(item, index, index, tokens, item.name)) +# index += 1 +# source += item.name +# elif isinstance(item, PythonNode): +# tokens = list(Tokenizer(item.source))[:-1] # strip trailing EOF +# result.append(SourceCodeNode(item, index, index + len(tokens) - 1, tokens, item.source)) +# index += len(tokens) +# source += item.source +# else: +# tokens = list(Tokenizer(item))[:-1] # strip trailing EOF +# result.append(UnrecognizedTokensNode(index, index + len(tokens) - 1, tokens)) +# index += len(tokens) +# source += item +# +# return ReturnValueConcept( +# "who", +# False, +# ParserResultConcept(parser=multiple_concepts_parser, value=result, source=source)) +# +# +# class TestConceptsWithConceptsParser(TestUsingMemoryBasedSheerka): +# +# def init(self, concepts, inputs): +# context = self.get_context() +# for concept in concepts: +# context.sheerka.create_new_concept(context, concept) +# +# return context, ret_val(*inputs) +# +# def execute(self, concepts, inputs): +# context, input_return_values = self.init(concepts, inputs) +# +# parser = ConceptsWithConceptsParser() +# result = parser.parse(context, input_return_values.body) +# +# wrapper = result.body +# return_value = result.body.body +# +# return context, parser, result, wrapper, return_value +# +# @pytest.mark.parametrize("text, interested", [ +# ("not parser result", False), +# (ParserResultConcept(parser="not multiple_concepts_parser"), False), +# (ParserResultConcept(parser=multiple_concepts_parser, value=[UnrecognizedTokensNode(0, 0, [])]), True), +# ]) +# def test_not_interested(self, text, interested): +# context = self.get_context() +# +# res = ConceptsWithConceptsParser().parse(context, text) +# if interested: +# assert res is not None +# else: +# assert res is None +# +# def test_i_can_parse_composition_of_concepts(self): +# foo = Concept("foo") +# bar = Concept("bar") +# plus = Concept("a plus b").def_var("a").def_var("b") +# +# context, parser, result, wrapper, return_value = self.execute([foo, bar, plus], [foo, " plus ", bar]) +# +# assert result.status +# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) +# assert result.who == wrapper.parser.name +# assert wrapper.source == "foo plus bar" +# assert context.sheerka.isinstance(return_value, plus) +# +# assert return_value.compiled["a"] == foo +# assert return_value.compiled["b"] == bar +# +# # sanity check, I can evaluate the result +# evaluated = context.sheerka.evaluate_concept(self.get_context(context.sheerka, True), return_value) +# assert evaluated.key == return_value.key +# assert evaluated.get_prop("a") == foo.init_key() +# assert evaluated.get_prop("b") == bar.init_key() +# +# def test_i_can_parse_when_composition_of_source_code(self): +# plus = Concept("a plus b", body="a + b").def_var("a").def_var("b") +# left = PythonNode("1+1", ast.parse("1+1", mode="eval")) +# right = PythonNode("2+2", ast.parse("2+2", mode="eval")) +# context, parser, result, wrapper, return_value = self.execute([plus], [left, " plus ", right]) +# +# assert result.status +# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) +# assert result.who == wrapper.parser.name +# assert wrapper.source == "1+1 plus 2+2" +# assert context.sheerka.isinstance(return_value, plus) +# +# left_parser_result = ParserResultConcept(parser=parser, source="1+1", value=left) +# right_parser_result = ParserResultConcept(parser=parser, source="2+2", value=right) +# assert return_value.compiled["a"] == [ReturnValueConcept(parser.name, True, left_parser_result)] +# assert return_value.compiled["b"] == [ReturnValueConcept(parser.name, True, right_parser_result)] +# +# # sanity check, I can evaluate the result +# evaluated = context.sheerka.evaluate_concept(self.get_context(context.sheerka, True), return_value) +# assert evaluated.key == return_value.key +# assert evaluated.get_prop("a") == 2 +# assert evaluated.get_prop("b") == 4 +# assert evaluated.body == 6 +# +# def test_i_can_parse_when_mix_of_concept_and_code(self): +# plus = Concept("a plus b").def_var("a").def_var("b") +# code = PythonNode("1+1", ast.parse("1+1", mode="eval")) +# foo = Concept("foo") +# context, parser, result, wrapper, return_value = self.execute([plus, foo], [foo, " plus ", code]) +# +# assert result.status +# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) +# assert result.who == wrapper.parser.name +# assert wrapper.source == "foo plus 1+1" +# assert context.sheerka.isinstance(return_value, plus) +# +# code_parser_result = ParserResultConcept(parser=parser, source="1+1", value=code) +# assert return_value.compiled["a"] == foo +# assert return_value.compiled["b"] == [ReturnValueConcept(parser.name, True, code_parser_result)] +# +# # sanity check, I can evaluate the result +# evaluated = context.sheerka.evaluate_concept(self.get_context(context.sheerka, True), return_value) +# assert evaluated.key == return_value.key +# assert evaluated.get_prop("a") == foo.init_key() +# assert evaluated.get_prop("b") == 2 +# +# def test_i_can_parse_when_multiple_concepts_are_recognized(self): +# foo = Concept("foo") +# bar = Concept("bar") +# plus_1 = Concept("a plus b", body="body1").def_var("a").def_var("b") +# plus_2 = Concept("a plus b", body="body2").def_var("a").def_var("b") +# +# context, input_return_values = self.init([foo, bar, plus_1, plus_2], [foo, " plus ", bar]) +# parser = ConceptsWithConceptsParser() +# result = parser.parse(context, input_return_values.body) +# +# assert len(result) == 2 +# +# res = result[0] +# wrapper = res.value +# return_value = res.value.value +# assert res.status +# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) +# assert res.who == wrapper.parser.name +# assert wrapper.source == "foo plus bar" +# assert context.sheerka.isinstance(return_value, plus_1) +# assert return_value.compiled["a"] == foo +# assert return_value.compiled["b"] == bar +# +# res = result[1] +# wrapper = res.value +# return_value = res.value.value +# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) +# assert res.who == wrapper.parser.name +# assert wrapper.source == "foo plus bar" +# assert context.sheerka.isinstance(return_value, plus_2) +# assert return_value.compiled["a"] == foo +# assert return_value.compiled["b"] == bar +# +# def test_i_cannot_parse_when_unknown_concept(self): +# foo = Concept("foo") +# bar = Concept("bar") +# +# context, input_return_values = self.init([foo, bar], [foo, " plus ", bar]) +# parser = ConceptsWithConceptsParser() +# result = parser.parse(context, input_return_values.body) +# wrapper = result.body +# return_value = result.body.body +# +# assert not result.status +# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.NOT_FOR_ME) +# assert result.who == parser.name +# assert return_value == input_return_values.body.body diff --git a/tests/parsers/test_DefaultParser.py b/tests/parsers/test_DefaultParser.py index df0b136..dc2f5c5 100644 --- a/tests/parsers/test_DefaultParser.py +++ b/tests/parsers/test_DefaultParser.py @@ -1,14 +1,14 @@ -import pytest import ast +import pytest from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept -from core.concept import Concept, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF -from parsers.BnfNodeParser import OrderedChoice, StrMatch, ConceptExpression -from parsers.PythonParser import PythonParser, PythonNode +from core.concept import DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF from core.tokenizer import Keywords, Tokenizer, LexerError +from parsers.BnfNodeParser import OrderedChoice, ConceptExpression, StrMatch +from parsers.BnfParser import BnfParser from parsers.DefaultParser import DefaultParser, NameNode, SyntaxErrorNode, CannotHandleErrorNode, IsaConceptNode from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode -from parsers.BnfParser import BnfParser +from parsers.PythonParser import PythonParser, PythonNode from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka @@ -63,6 +63,11 @@ def get_concept_part(part): class TestDefaultParser(TestUsingMemoryBasedSheerka): + def init_parser(self, *concepts): + sheerka, concept, *updated = self.init_concepts(*concepts, singleton=True) + parser = DefaultParser() + return sheerka, concept, parser, *updated + @pytest.mark.parametrize("text, expected", [ ("def concept hello", get_def_concept(name="hello")), ("def concept hello ", get_def_concept(name="hello")), @@ -76,8 +81,8 @@ class TestDefaultParser(TestUsingMemoryBasedSheerka): ("def concept hello as 1 + 1", get_def_concept(name="hello", body="1 + 1")), ]) def test_i_can_parse_def_concept(self, text, expected): - parser = DefaultParser() - res = parser.parse(self.get_context(), text) + sheerka, context, parser = self.init_parser() + res = parser.parse(context, text) node = res.value.value assert res.status @@ -93,8 +98,8 @@ pre isinstance(a, int) and isinstance(b, float) post isinstance(res, int) as res = a + b """ - parser = DefaultParser() - res = parser.parse(self.get_context(), text) + sheerka, context, parser = self.init_parser() + res = parser.parse(context, text) return_value = res.value expected_concept = get_def_concept( name="a plus b", @@ -123,8 +128,8 @@ func(a) ast.parse("def func(x):\n return x+1\nfunc(a)", mode="exec")) ) - parser = DefaultParser() - res = parser.parse(self.get_context(), text) + sheerka, context, parser = self.init_parser() + res = parser.parse(context, text) return_value = res.value assert res.status @@ -146,8 +151,8 @@ def concept add one to a as: ast.parse("def func(x):\n return x+1\nfunc(a)", mode="exec")) ) - parser = DefaultParser() - res = parser.parse(self.get_context(), text) + sheerka, context, parser = self.init_parser() + res = parser.parse(context, text) return_value = res.value assert res.status @@ -162,8 +167,8 @@ def func(x): func(a) """ - parser = DefaultParser() - res = parser.parse(self.get_context(), text) + sheerka, context, parser = self.init_parser() + res = parser.parse(context, text) return_value = res.value assert not res.status @@ -178,9 +183,7 @@ def concept add one to a as return x+1 func(a) """ - context = self.get_context() - - parser = DefaultParser() + sheerka, context, parser = self.init_parser() res = parser.parse(context, text) return_value = res.value @@ -190,8 +193,8 @@ def concept add one to a as def test_name_is_mandatory(self): text = "def concept as 'hello'" - parser = DefaultParser() - res = parser.parse(self.get_context(), text) + sheerka, context, parser = self.init_parser() + res = parser.parse(context, text) return_value = res.value assert not res.status @@ -203,8 +206,8 @@ def concept add one to a as text = "def hello as a where b pre c post d" expected_concept = get_def_concept(name="hello", body="a", where="b", pre="c", post="d") - parser = DefaultParser() - res = parser.parse(self.get_context(), text) + sheerka, context, parser = self.init_parser() + res = parser.parse(context, text) return_value = res.value assert not res.status @@ -221,10 +224,7 @@ def concept add one to a as "def concept hello as 1+" ]) def test_i_can_detect_error_in_declaration(self, text): - context = self.get_context() - sheerka = context.sheerka - - parser = DefaultParser() + sheerka, context, parser = self.init_parser() res = parser.parse(context, text) return_value = res.value @@ -234,21 +234,18 @@ def concept add one to a as def test_new_line_is_not_allowed_in_the_name(self): text = "def concept hello \n my friend as 'hello'" - parser = DefaultParser() - res = parser.parse(self.get_context(), text) + sheerka, context, parser = self.init_parser() + res = parser.parse(context, text) return_value = res.value assert not res.status assert return_value.value == [SyntaxErrorNode([], "Newline are not allowed in name.")] def test_i_can_parse_def_concept_from_bnf(self): - context = self.get_context() - a_concept = Concept("a_concept") - context.sheerka.add_in_cache(a_concept) - text = "def concept name from bnf a_concept | 'a_string' as __definition[0]" - parser = DefaultParser() + sheerka, context, parser, a_concept = self.init_parser("a_concept") res = parser.parse(context, text) + node = res.value.value definition = OrderedChoice(ConceptExpression(a_concept, rule_name="a_concept"), StrMatch("a_string")) parser_result = ParserResultConcept(BnfParser(), "a_concept | 'a_string'", None, definition, definition) @@ -261,12 +258,8 @@ def concept add one to a as assert node == expected def test_i_can_parse_def_concept_where_bnf_references_itself(self): - context = self.get_context() - a_concept = Concept("a_concept") - context.sheerka.add_in_cache(a_concept) - text = "def concept name from bnf 'a' + name?" - parser = DefaultParser() + sheerka, context, parser, a_concept = self.init_parser("a_concept") parser.parse(context, text) assert not parser.has_error @@ -277,9 +270,8 @@ def concept add one to a as "def concept name from as here is my body" ]) def test_i_can_detect_empty_bnf_declaration(self, text): - - parser = DefaultParser() - res = parser.parse(self.get_context(), text) + sheerka, context, parser = self.init_parser() + res = parser.parse(context, text) assert not res.status assert res.value.value[0] == SyntaxErrorNode([], "Empty declaration") @@ -288,8 +280,8 @@ def concept add one to a as "def concept addition from a plus b as a + b", "def concept addition from def a plus b as a + b"]) def test_i_can_def_concept_from_definition(self, text): - parser = DefaultParser() - res = parser.parse(self.get_context(), text) + sheerka, context, parser = self.init_parser() + res = parser.parse(context, text) expected = get_def_concept("addition", definition="a plus b", body="a + b") node = res.value.value @@ -301,8 +293,7 @@ def concept add one to a as def test_i_can_detect_not_for_me(self): text = "hello world" - context = self.get_context() - parser = DefaultParser() + sheerka, context, parser = self.init_parser() res = parser.parse(context, text) assert not res.status @@ -310,9 +301,9 @@ def concept add one to a as assert isinstance(res.value.body[0], CannotHandleErrorNode) def test_i_can_parse_is_a(self): - parser = DefaultParser() text = "the name of my 'concept' isa the name of the set" - res = parser.parse(self.get_context(), text) + sheerka, context, parser = self.init_parser() + res = parser.parse(context, text) expected = IsaConceptNode([], concept=NameNode(list(Tokenizer("the name of my 'concept'"))), set=NameNode(list(Tokenizer("the name of the set")))) @@ -331,8 +322,8 @@ def concept add one to a as "def concept_name" ]) def test_i_cannot_parse_invalid_entries(self, text): - parser = DefaultParser() - res = parser.parse(self.get_context(), text) + sheerka, context, parser = self.init_parser() + res = parser.parse(context, text) assert not res.status assert isinstance(res.body, ParserResultConcept) @@ -347,8 +338,8 @@ def concept add one to a as ("def concept c::", "Concept identifiers not found", ""), ]) def test_i_cannot_parse_when_tokenizer_fails(self, text, error_msg, error_text): - parser = DefaultParser() - res = parser.parse(self.get_context(), text) + sheerka, context, parser = self.init_parser() + res = parser.parse(context, text) assert not res.status assert isinstance(res.body, ParserResultConcept) @@ -357,10 +348,8 @@ def concept add one to a as assert res.body.body[0].text == error_text def test_i_cannot_parse_bnf_definition_referencing_unknown_concept(self): - context = self.get_context() text = "def concept name from bnf unknown" - - parser = DefaultParser() + sheerka, context, parser = self.init_parser() res = parser.parse(context, text) assert not res.status diff --git a/tests/parsers/test_ExactConceptParser.py b/tests/parsers/test_ExactConceptParser.py index c893720..83bb270 100644 --- a/tests/parsers/test_ExactConceptParser.py +++ b/tests/parsers/test_ExactConceptParser.py @@ -6,19 +6,19 @@ from parsers.ExactConceptParser import ExactConceptParser from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka -def metadata_prop(concept, prop_name): - for name, value in concept.metadata.props: +def variable_def(concept, prop_name): + for name, value in concept.metadata.variables: if name == prop_name: return value return None -def get_concept(name, variables): +def get_concept(name, variables=None): c = Concept(name=name) if variables: for v in variables: - c.def_prop(v) + c.def_var(v) c.init_key() return c @@ -50,9 +50,9 @@ class TestExactConceptParser(TestUsingMemoryBasedSheerka): # TODO: the last tuple is not possible, so the algo can be improved def test_i_can_recognize_a_simple_concept(self): - context = self.get_context() - concept = get_concept("hello world", []) - context.sheerka.add_in_cache(concept) + sheerka = self.get_sheerka(singleton=True) + context = self.get_context(sheerka) + concept = self.create_concept_lite(sheerka, "hello world") source = "hello world" results = ExactConceptParser().parse(context, source) @@ -64,9 +64,10 @@ class TestExactConceptParser(TestUsingMemoryBasedSheerka): assert not concept_found.metadata.need_validation def test_i_can_recognize_concepts_defined_several_times(self): - context = self.get_context() - context.sheerka.add_in_cache(get_concept("hello world", [])) - context.sheerka.add_in_cache(get_concept("hello a", ["a"])) + sheerka = self.get_sheerka(singleton=True) + context = self.get_context(sheerka) + self.create_concept_lite(sheerka, "hello world") + self.create_concept_lite(sheerka, "hello a", variables=["a"]) source = "hello world" results = ExactConceptParser().parse(context, source) @@ -76,7 +77,7 @@ class TestExactConceptParser(TestUsingMemoryBasedSheerka): assert results[0].status assert results[0].value.value.name == "hello a" - assert metadata_prop(results[0].value.value, "a") == "world" + assert variable_def(results[0].value.value, "a") == "world" assert results[0].value.value.metadata.need_validation assert results[1].status @@ -84,9 +85,10 @@ class TestExactConceptParser(TestUsingMemoryBasedSheerka): assert not results[1].value.value.metadata.need_validation def test_i_can_recognize_a_concept_with_variables(self): - context = self.get_context() - concept = get_concept("a + b", ["a", "b"]) - context.sheerka.add_in_cache(concept) + sheerka = self.get_sheerka(singleton=True) + context = self.get_context(sheerka) + concept = self.create_concept_lite(sheerka, "a + b", ["a", "b"]) + source = "10 + 5" results = ExactConceptParser().parse(context, source) @@ -95,46 +97,47 @@ class TestExactConceptParser(TestUsingMemoryBasedSheerka): concept_found = results[0].value.value assert concept_found.key == concept.key - assert metadata_prop(concept_found, "a") == "10" - assert metadata_prop(concept_found, "b") == "5" + assert variable_def(concept_found, "a") == "10" + assert variable_def(concept_found, "b") == "5" assert concept_found.metadata.need_validation def test_i_can_recognize_a_concept_with_duplicate_variables(self): - context = self.get_context() - concept = get_concept("a + b + a", ["a", "b"]) - context.sheerka.cache_by_key[concept.key] = concept + sheerka = self.get_sheerka(singleton=True) + context = self.get_context(sheerka) + concept = self.create_concept_lite(sheerka, "a + b + a", ["a", "b"]) + source = "10 + 5 + 10" - results = ExactConceptParser().parse(context, source) + results = ExactConceptParser(max_word_size=5).parse(context, source) assert len(results) == 1 assert results[0].status concept_found = results[0].value.value assert concept_found.key == concept.key - assert metadata_prop(concept_found, "a") == "10" - assert metadata_prop(concept_found, "b") == "5" + assert variable_def(concept_found, "a") == "10" + assert variable_def(concept_found, "b") == "5" assert concept_found.metadata.need_validation def test_i_can_manage_unknown_concept(self): - context = self.get_context() - source = "def concept hello world" # this is not a concept by itself + context = self.get_context(self.get_sheerka(singleton=True)) + source = "def concept hello" # this is not a concept by itself res = ExactConceptParser().parse(context, source) assert not res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT) - assert res.value.body == "def concept hello world" + assert res.value.body == "def concept hello" def test_i_can_detect_concepts_too_long(self): - context = self.get_context() - source = "a very very long concept that cannot be an unique one" + context = self.get_context(self.get_sheerka(singleton=True)) + source = "a very very long concept" res = ExactConceptParser().parse(context, source) assert not res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.CONCEPT_TOO_LONG) - assert res.value.body == "a very very long concept that cannot be an unique one" + assert res.value.body == "a very very long concept" def test_i_can_detect_concept_from_tokens(self): - context = self.get_context() + context = self.get_context(self.get_sheerka(singleton=True)) concept = get_concept("hello world", []) context.sheerka.add_in_cache(concept) diff --git a/tests/parsers/test_MultipleConceptsParser.py b/tests/parsers/test_MultipleConceptsParser.py index dd92a23..90e4dfa 100644 --- a/tests/parsers/test_MultipleConceptsParser.py +++ b/tests/parsers/test_MultipleConceptsParser.py @@ -1,216 +1,216 @@ -import pytest - -from core.builtin_concepts import ParserResultConcept, BuiltinConcepts -from core.concept import Concept -from core.tokenizer import Tokenizer, TokenKind, Token -from parsers.BaseNodeParser import cnode, scnode, utnode, SourceCodeNode -from parsers.BnfNodeParser import BnfNodeParser, ConceptNode, Sequence -from parsers.MultipleConceptsParser import MultipleConceptsParser -from parsers.PythonParser import PythonNode - -from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka - - -def get_return_value(context, grammar, expression): - parser = BnfNodeParser() - parser.initialize(context, grammar) - - ret_val = parser.parse(context, expression) - assert not ret_val.status - return ret_val - - -class TestMultipleConceptsParser(TestUsingMemoryBasedSheerka): - - def init(self, concepts, grammar, expression): - context = self.get_context() - for c in concepts: - context.sheerka.create_new_concept(context, c) - return_value = get_return_value(context, grammar, expression) - - return context, return_value - - def test_not_interested_if_not_parser_result(self): - context = self.get_context() - text = "not parser result" - - res = MultipleConceptsParser().parse(context, text) - assert res is None - - def test_not_interested_if_not_from_concept_lexer_parser(self): - context = self.get_context() - text = ParserResultConcept(parser="not concept lexer", value="some value") - - res = MultipleConceptsParser().parse(context, text) - assert res is None - - def test_i_can_parse_exact_concepts(self): - foo = Concept("foo", body="'foo'") - bar = Concept("bar", body="'bar'") - baz = Concept("baz", body="'baz'") - grammar = {} - context, return_value = self.init([foo, bar, baz], grammar, "bar foo baz") - - parser = MultipleConceptsParser() - ret_val = parser.parse(context, return_value.body) - - assert ret_val.status - assert ret_val.who == parser.name - assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) - assert ret_val.value.value == [ - ConceptNode(bar, 0, 0, source="bar"), - ConceptNode(foo, 2, 2, source="foo"), - ConceptNode(baz, 4, 4, source="baz")] - assert ret_val.value.source == "bar foo baz" - - def test_i_can_parse_when_ending_with_bnf(self): - foo = Concept("foo", body="'foo'") - bar = Concept("bar", body="'bar'") - grammar = {foo: Sequence("foo1", "foo2", "foo3")} - context, return_value = self.init([foo, bar], grammar, "bar foo1 foo2 foo3") - - parser = MultipleConceptsParser() - ret_val = parser.parse(context, return_value.body) - - assert ret_val.status - assert ret_val.who == parser.name - assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) - assert ret_val.value.value == [cnode("bar", 0, 0, "bar"), cnode("foo", 2, 6, "foo1 foo2 foo3")] - assert ret_val.value.source == "bar foo1 foo2 foo3" - - def test_i_can_parse_when_starting_with_bnf(self): - foo = Concept("foo", body="'foo'") - bar = Concept("bar", body="'bar'") - grammar = {foo: Sequence("foo1", "foo2", "foo3")} - context, return_value = self.init([foo, bar], grammar, "foo1 foo2 foo3 bar") - - parser = MultipleConceptsParser() - ret_val = parser.parse(context, return_value.body) - - assert ret_val.status - assert ret_val.who == parser.name - assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) - assert ret_val.value.value == [cnode("foo", 0, 4, "foo1 foo2 foo3"), cnode("bar", 6, 6, "bar")] - assert ret_val.value.source == "foo1 foo2 foo3 bar" - - def test_i_can_parse_when_concept_are_mixed(self): - foo = Concept("foo") - bar = Concept("bar") - baz = Concept("baz") - grammar = {foo: Sequence("foo1", "foo2", "foo3")} - context, return_value = self.init([foo, bar, baz], grammar, "baz foo1 foo2 foo3 bar") - - parser = MultipleConceptsParser() - ret_val = parser.parse(context, return_value.body) - - assert ret_val.status - assert ret_val.who == parser.name - assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) - assert ret_val.value.value == [ - cnode("baz", 0, 0, "baz"), - cnode("foo", 2, 6, "foo1 foo2 foo3"), - cnode("bar", 8, 8, "bar")] - assert ret_val.value.source == "baz foo1 foo2 foo3 bar" - - def test_i_can_parse_when_multiple_concepts_are_matching(self): - foo = Concept("foo") - bar = Concept("bar", body="bar1") - baz = Concept("bar", body="bar2") - grammar = {foo: "foo"} - context, return_value = self.init([foo, bar, baz], grammar, "foo bar") - - parser = MultipleConceptsParser() - ret_val = parser.parse(context, return_value.body) - - assert len(ret_val) == 2 - assert ret_val[0].status - assert ret_val[0].value.value == [cnode("foo", 0, 0, "foo"), cnode("bar", 2, 2, "bar")] - assert ret_val[0].value.source == "foo bar" - assert ret_val[0].value.value[1].concept.metadata.body == "bar1" - - assert ret_val[1].status - assert ret_val[1].value.value == [cnode("foo", 0, 0, "foo"), cnode("bar", 2, 2, "bar")] - assert ret_val[1].value.source == "foo bar" - assert ret_val[1].value.value[1].concept.metadata.body == "bar2" - - def test_i_can_parse_when_source_code(self): - foo = Concept("foo") - grammar = {foo: "foo"} - context, return_value = self.init([foo], grammar, "1 foo") - - parser = MultipleConceptsParser() - ret_val = parser.parse(context, return_value.body) - wrapper = ret_val.value - value = ret_val.value.value - - assert ret_val.status - assert ret_val.who == parser.name - assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert wrapper.source == "1 foo" - assert value == [ - scnode(0, 1, "1 "), - cnode("foo", 2, 2, "foo")] - - def test_i_cannot_parse_when_unrecognized_token(self): - twenty_two = Concept("twenty two") - one = Concept("one") - grammar = {twenty_two: Sequence("twenty", "two")} - context, return_value = self.init([twenty_two, one], grammar, "twenty two + one") - - parser = MultipleConceptsParser() - ret_val = parser.parse(context, return_value.body) - - assert not ret_val.status - assert ret_val.who == parser.name - assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) - assert ret_val.value.value == [ - cnode("twenty two", 0, 2, "twenty two"), - utnode(3, 5, " + "), - cnode("one", 6, 6, "one") - ] - assert ret_val.value.source == "twenty two + one" - - def test_i_cannot_parse_when_unknown_concepts(self): - twenty_two = Concept("twenty two") - one = Concept("one") - grammar = {twenty_two: Sequence("twenty", "two")} - context, return_value = self.init([twenty_two, one], grammar, "twenty two plus one") - - parser = MultipleConceptsParser() - ret_val = parser.parse(context, return_value.body) - - assert not ret_val.status - assert ret_val.who == parser.name - assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) - assert ret_val.value.value == [ - cnode("twenty two", 0, 2, "twenty two"), - utnode(3, 5, " plus "), - cnode("one", 6, 6, "one") - ] - assert ret_val.value.source == "twenty two plus one" - - @pytest.mark.parametrize("text, expected_source, expected_end", [ - ("True", "True", 0), - ("1 == 1", "1 == 1", 4), - ("1!xdf", "1", 0), - ("1", "1", 0), - ]) - def test_i_can_get_source_code_node(self, text, expected_source, expected_end): - tokens = list(Tokenizer(text))[:-1] # strip trailing EOF - - start_index = 5 # a random number different of zero - res = MultipleConceptsParser().get_source_code_node(self.get_context(), start_index, tokens) - - assert isinstance(res, SourceCodeNode) - assert isinstance(res.node, PythonNode) - assert res.source == expected_source - assert res.start == start_index - assert res.end == start_index + expected_end - - def test_i_cannot_parse_null_text(self): - res = MultipleConceptsParser().get_source_code_node(self.get_context(), 0, []) - assert res is None - - eof = Token(TokenKind.EOF, "", 0, 0, 0) - res = MultipleConceptsParser().get_source_code_node(self.get_context(), 0, [eof]) - assert res is None +# import pytest +# +# from core.builtin_concepts import ParserResultConcept, BuiltinConcepts +# from core.concept import Concept +# from core.tokenizer import Tokenizer, TokenKind, Token +# from parsers.BaseNodeParser import cnode, scnode, utnode, SourceCodeNode, ConceptNode +# from parsers.BnfNodeParser import BnfNodeParser, Sequence +# from parsers.MultipleConceptsParser import MultipleConceptsParser +# from parsers.PythonParser import PythonNode +# +# from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka +# +# +# def get_return_value(context, grammar, expression): +# parser = BnfNodeParser() +# parser.initialize(context, grammar) +# +# ret_val = parser.parse(context, expression) +# assert not ret_val.status +# return ret_val +# +# +# class TestMultipleConceptsParser(TestUsingMemoryBasedSheerka): +# +# def init(self, concepts, grammar, expression): +# context = self.get_context() +# for c in concepts: +# context.sheerka.create_new_concept(context, c) +# return_value = get_return_value(context, grammar, expression) +# +# return context, return_value +# +# def test_not_interested_if_not_parser_result(self): +# context = self.get_context() +# text = "not parser result" +# +# res = MultipleConceptsParser().parse(context, text) +# assert res is None +# +# def test_not_interested_if_not_from_concept_lexer_parser(self): +# context = self.get_context() +# text = ParserResultConcept(parser="not concept lexer", value="some value") +# +# res = MultipleConceptsParser().parse(context, text) +# assert res is None +# +# def test_i_can_parse_exact_concepts(self): +# foo = Concept("foo", body="'foo'") +# bar = Concept("bar", body="'bar'") +# baz = Concept("baz", body="'baz'") +# grammar = {} +# context, return_value = self.init([foo, bar, baz], grammar, "bar foo baz") +# +# parser = MultipleConceptsParser() +# ret_val = parser.parse(context, return_value.body) +# +# assert ret_val.status +# assert ret_val.who == parser.name +# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) +# assert ret_val.value.value == [ +# ConceptNode(bar, 0, 0, source="bar"), +# ConceptNode(foo, 2, 2, source="foo"), +# ConceptNode(baz, 4, 4, source="baz")] +# assert ret_val.value.source == "bar foo baz" +# +# def test_i_can_parse_when_ending_with_bnf(self): +# foo = Concept("foo", body="'foo'") +# bar = Concept("bar", body="'bar'") +# grammar = {foo: Sequence("foo1", "foo2", "foo3")} +# context, return_value = self.init([foo, bar], grammar, "bar foo1 foo2 foo3") +# +# parser = MultipleConceptsParser() +# ret_val = parser.parse(context, return_value.body) +# +# assert ret_val.status +# assert ret_val.who == parser.name +# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) +# assert ret_val.value.value == [cnode("bar", 0, 0, "bar"), cnode("foo", 2, 6, "foo1 foo2 foo3")] +# assert ret_val.value.source == "bar foo1 foo2 foo3" +# +# def test_i_can_parse_when_starting_with_bnf(self): +# foo = Concept("foo", body="'foo'") +# bar = Concept("bar", body="'bar'") +# grammar = {foo: Sequence("foo1", "foo2", "foo3")} +# context, return_value = self.init([foo, bar], grammar, "foo1 foo2 foo3 bar") +# +# parser = MultipleConceptsParser() +# ret_val = parser.parse(context, return_value.body) +# +# assert ret_val.status +# assert ret_val.who == parser.name +# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) +# assert ret_val.value.value == [cnode("foo", 0, 4, "foo1 foo2 foo3"), cnode("bar", 6, 6, "bar")] +# assert ret_val.value.source == "foo1 foo2 foo3 bar" +# +# def test_i_can_parse_when_concept_are_mixed(self): +# foo = Concept("foo") +# bar = Concept("bar") +# baz = Concept("baz") +# grammar = {foo: Sequence("foo1", "foo2", "foo3")} +# context, return_value = self.init([foo, bar, baz], grammar, "baz foo1 foo2 foo3 bar") +# +# parser = MultipleConceptsParser() +# ret_val = parser.parse(context, return_value.body) +# +# assert ret_val.status +# assert ret_val.who == parser.name +# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) +# assert ret_val.value.value == [ +# cnode("baz", 0, 0, "baz"), +# cnode("foo", 2, 6, "foo1 foo2 foo3"), +# cnode("bar", 8, 8, "bar")] +# assert ret_val.value.source == "baz foo1 foo2 foo3 bar" +# +# def test_i_can_parse_when_multiple_concepts_are_matching(self): +# foo = Concept("foo") +# bar = Concept("bar", body="bar1") +# baz = Concept("bar", body="bar2") +# grammar = {foo: "foo"} +# context, return_value = self.init([foo, bar, baz], grammar, "foo bar") +# +# parser = MultipleConceptsParser() +# ret_val = parser.parse(context, return_value.body) +# +# assert len(ret_val) == 2 +# assert ret_val[0].status +# assert ret_val[0].value.value == [cnode("foo", 0, 0, "foo"), cnode("bar", 2, 2, "bar")] +# assert ret_val[0].value.source == "foo bar" +# assert ret_val[0].value.value[1].concept.metadata.body == "bar1" +# +# assert ret_val[1].status +# assert ret_val[1].value.value == [cnode("foo", 0, 0, "foo"), cnode("bar", 2, 2, "bar")] +# assert ret_val[1].value.source == "foo bar" +# assert ret_val[1].value.value[1].concept.metadata.body == "bar2" +# +# def test_i_can_parse_when_source_code(self): +# foo = Concept("foo") +# grammar = {foo: "foo"} +# context, return_value = self.init([foo], grammar, "1 foo") +# +# parser = MultipleConceptsParser() +# ret_val = parser.parse(context, return_value.body) +# wrapper = ret_val.value +# value = ret_val.value.value +# +# assert ret_val.status +# assert ret_val.who == parser.name +# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) +# assert wrapper.source == "1 foo" +# assert value == [ +# scnode(0, 1, "1 "), +# cnode("foo", 2, 2, "foo")] +# +# def test_i_cannot_parse_when_unrecognized_token(self): +# twenty_two = Concept("twenty two") +# one = Concept("one") +# grammar = {twenty_two: Sequence("twenty", "two")} +# context, return_value = self.init([twenty_two, one], grammar, "twenty two + one") +# +# parser = MultipleConceptsParser() +# ret_val = parser.parse(context, return_value.body) +# +# assert not ret_val.status +# assert ret_val.who == parser.name +# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) +# assert ret_val.value.value == [ +# cnode("twenty two", 0, 2, "twenty two"), +# utnode(3, 5, " + "), +# cnode("one", 6, 6, "one") +# ] +# assert ret_val.value.source == "twenty two + one" +# +# def test_i_cannot_parse_when_unknown_concepts(self): +# twenty_two = Concept("twenty two") +# one = Concept("one") +# grammar = {twenty_two: Sequence("twenty", "two")} +# context, return_value = self.init([twenty_two, one], grammar, "twenty two plus one") +# +# parser = MultipleConceptsParser() +# ret_val = parser.parse(context, return_value.body) +# +# assert not ret_val.status +# assert ret_val.who == parser.name +# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) +# assert ret_val.value.value == [ +# cnode("twenty two", 0, 2, "twenty two"), +# utnode(3, 5, " plus "), +# cnode("one", 6, 6, "one") +# ] +# assert ret_val.value.source == "twenty two plus one" +# +# @pytest.mark.parametrize("text, expected_source, expected_end", [ +# ("True", "True", 0), +# ("1 == 1", "1 == 1", 4), +# ("1!xdf", "1", 0), +# ("1", "1", 0), +# ]) +# def test_i_can_get_source_code_node(self, text, expected_source, expected_end): +# tokens = list(Tokenizer(text))[:-1] # strip trailing EOF +# +# start_index = 5 # a random number different of zero +# res = MultipleConceptsParser().get_source_code_node(self.get_context(), start_index, tokens) +# +# assert isinstance(res, SourceCodeNode) +# assert isinstance(res.node, PythonNode) +# assert res.source == expected_source +# assert res.start == start_index +# assert res.end == start_index + expected_end +# +# def test_i_cannot_parse_null_text(self): +# res = MultipleConceptsParser().get_source_code_node(self.get_context(), 0, []) +# assert res is None +# +# eof = Token(TokenKind.EOF, "", 0, 0, 0) +# res = MultipleConceptsParser().get_source_code_node(self.get_context(), 0, [eof]) +# assert res is None diff --git a/tests/parsers/test_PythonParser.py b/tests/parsers/test_PythonParser.py index 70c4850..e205d0b 100644 --- a/tests/parsers/test_PythonParser.py +++ b/tests/parsers/test_PythonParser.py @@ -50,9 +50,9 @@ class TestPythonParser(TestUsingMemoryBasedSheerka): assert res.who == parser.name assert isinstance(res.value, NotForMeConcept) assert res.value.body == text - assert len(res.value.get_prop("reason")) == 1 - assert isinstance(res.value.get_prop("reason")[0], PythonErrorNode) - assert isinstance(res.value.get_prop("reason")[0].exception, SyntaxError) + assert len(res.value.get_value("reason")) == 1 + assert isinstance(res.value.get_value("reason")[0], PythonErrorNode) + assert isinstance(res.value.get_value("reason")[0].exception, SyntaxError) @pytest.mark.parametrize("text, error_msg, error_text", [ ("c::", "Concept identifiers not found", ""), @@ -66,10 +66,10 @@ class TestPythonParser(TestUsingMemoryBasedSheerka): assert isinstance(res.value, NotForMeConcept) assert res.value.body == text - assert len(res.value.get_prop("reason")) == 1 - assert isinstance(res.value.get_prop("reason")[0], LexerError) - assert res.value.get_prop("reason")[0].message == error_msg - assert res.value.get_prop("reason")[0].text == error_text + assert len(res.value.get_value("reason")) == 1 + assert isinstance(res.value.get_value("reason")[0], LexerError) + assert res.value.get_value("reason")[0].message == error_msg + assert res.value.get_value("reason")[0].text == error_text def test_i_can_parse_a_concept(self): text = "c:name|key: + 1" diff --git a/tests/parsers/test_PythonWithConceptsParser.py b/tests/parsers/test_PythonWithConceptsParser.py index 6da6cf3..dfcb899 100644 --- a/tests/parsers/test_PythonWithConceptsParser.py +++ b/tests/parsers/test_PythonWithConceptsParser.py @@ -4,7 +4,7 @@ import pytest from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept from core.concept import Concept from core.tokenizer import Token, TokenKind, Tokenizer -from parsers.BnfNodeParser import ConceptNode, UnrecognizedTokensNode +from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode from parsers.PythonParser import PythonNode from parsers.PythonWithConceptsParser import PythonWithConceptsParser from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser diff --git a/tests/parsers/test_SyaConceptLexerParser.py b/tests/parsers/test_SyaNodeParser.py similarity index 72% rename from tests/parsers/test_SyaConceptLexerParser.py rename to tests/parsers/test_SyaNodeParser.py index f660db6..fccce67 100644 --- a/tests/parsers/test_SyaConceptLexerParser.py +++ b/tests/parsers/test_SyaNodeParser.py @@ -3,7 +3,7 @@ from core.builtin_concepts import BuiltinConcepts from core.concept import Concept, CC from core.tokenizer import Tokenizer from parsers.BaseNodeParser import utnode, ConceptNode, cnode, short_cnode, UnrecognizedTokensNode, \ - SCWC, CNC, CN, UTN, scnode + SCWC, CNC, UTN from parsers.PythonParser import PythonNode from parsers.SyaNodeParser import SyaNodeParser, SyaConceptParserHelper, SyaAssociativity, \ NoneAssociativeSequenceErrorNode, TooManyParametersFound @@ -16,14 +16,69 @@ def compute_expected_array(concepts_map, expression, expected): return tests.parsers.parsers_utils.compute_expected_array(concepts_map, expression, expected, sya=True) -class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): +cmap = { + "one": Concept("one"), + "two": Concept("two"), + "three": Concept("three"), + "four": Concept("four"), + "plus": Concept("a plus b").def_var("a").def_var("b"), + "minus": Concept("a minus b").def_var("a").def_var("b"), + "mult": Concept("a mult b").def_var("a").def_var("b"), + "prefixed": Concept("a prefixed").def_var("a"), + "suffixed": Concept("suffixed a").def_var("a"), + "infix": Concept("a infix b").def_var("a").def_var("b"), + "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"), + "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"), + "square": Concept("square(a)").def_var("a"), + "foo bar": Concept("foo bar(a)").def_var("a"), + "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), +} - def init_parser(self, concepts_map, sya_def, **kwargs): - sheerka, context, *updated_concepts = self.init_concepts( - *concepts_map.values(), - singleton=True, + +class TestSyaNodeParser(TestUsingMemoryBasedSheerka): + sheerka = None + + @classmethod + def setup_class(cls): + t = TestSyaNodeParser() + TestSyaNodeParser.sheerka, context, _ = t.init_parser( + cmap, + singleton=False, create_new=True, - **kwargs) + init_from_sheerka=True) + + TestSyaNodeParser.sheerka.force_sya_def(context, [ + (cmap["plus"].id, 5, SyaAssociativity.Right), + (cmap["mult"].id, 10, SyaAssociativity.Right), + (cmap["minus"].id, 10, SyaAssociativity.Right), + (cmap["square"].id, None, SyaAssociativity.No)]) + + def init_parser(self, + my_concepts_map=None, + sya_def=None, + post_init_concepts=None, + **kwargs): + + if my_concepts_map is not None: + # a new concept map is given + # use it but + # do not instantiate a new sheerka + # do not update / init from sheerka + if 'singleton' not in kwargs: + kwargs["singleton"] = True + init_from_sheerka = kwargs.get("init_from_sheerka", False) + sheerka, context, *concepts = self.init_concepts(*my_concepts_map.values(), **kwargs) + else: + # No custom concept map is given -> Use the global cmap + # Sheerka is already initialized (the class instance) + # Use it to initialize the parser + init_from_sheerka = kwargs.get("init_from_sheerka", True) + sheerka = TestSyaNodeParser.sheerka + context = self.get_context(sheerka) + concepts = cmap.values() + + if post_init_concepts: + post_init_concepts(sheerka, context) if sya_def: sya_def_to_use = {} @@ -32,29 +87,15 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): else: sya_def_to_use = None - parser = SyaNodeParser() - parser.initialize(context, updated_concepts, sya_def_to_use) + if init_from_sheerka: + parser = SyaNodeParser(sheerka=sheerka) + else: + parser = SyaNodeParser() + if my_concepts_map: + parser.init_from_concepts(context, concepts, sya=sya_def_to_use) return sheerka, context, parser - def test_i_can_initialize(self): - sheerka, context, c1, c2, c3, c4, c5 = self.init_concepts( - "foo", - Concept("bar a").def_prop("a"), - Concept("a baz").def_prop("a"), - Concept("baz a qux b").def_prop("a").def_prop("b"), - Concept("foo a bar b").def_prop("a").def_prop("b"), - ) - - parser = SyaNodeParser() - parser.initialize(context, [c1, c2, c3, c4, c5]) - - assert parser.concepts_by_first_keyword == { - "foo": [c1.id, c5.id], - "bar": [c2.id], - "baz": [c3.id, c4.id], - } - @pytest.mark.parametrize("expression, expected_sequences", [ ("one plus two", [["one", "two", "plus"]]), ("1 + 1 plus two", [["1 + 1 ", "two", "plus"]]), @@ -143,21 +184,21 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): ("x$!# plus z$!#", [["x$!# ", " z$!#", "plus"]]), ]) def test_i_can_post_fix_simple_infix_concepts(self, expression, expected_sequences): - concepts_map = { - "plus": Concept("a plus b").def_prop("a").def_prop("b"), - "one": Concept("one"), - "two": Concept("two"), - "three": Concept("three"), - "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_prop("unit"), - } - sheerka, context, parser = self.init_parser(concepts_map, None) + # concepts_map = { + # "plus": Concept("a plus b").def_var("a").def_var("b"), + # "one": Concept("one"), + # "two": Concept("two"), + # "three": Concept("three"), + # "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), + # } + sheerka, context, parser = self.init_parser() res = parser.infix_to_postfix(context, expression) assert len(res) == len(expected_sequences) for res_i, expected in zip(res, expected_sequences): assert len(res_i.errors) == 0 - expected_array = compute_expected_array(concepts_map, expression, expected) + expected_array = compute_expected_array(cmap, expression, expected) assert res_i.out == expected_array @pytest.mark.parametrize("expression, expected_sequences", [ @@ -169,14 +210,14 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): ]) def test_i_can_post_fix_infix_concepts_with_long_name(self, expression, expected_sequences): concepts_map = { - "plus plus plus": Concept("a plus plus plus b").def_prop("a").def_prop("b"), - "another long name infix": Concept("a another long name infix b").def_prop("a").def_prop("b"), + "plus plus plus": Concept("a plus plus plus b").def_var("a").def_var("b"), + "another long name infix": Concept("a another long name infix b").def_var("a").def_var("b"), "one": Concept("one"), "two": Concept("two"), "three": Concept("three"), - "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_prop("unit"), + "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), } - sheerka, context, parser = self.init_parser(concepts_map, None) + sheerka, context, parser = self.init_parser(concepts_map, create_new=True, init_from_sheerka=True) res = parser.infix_to_postfix(context, expression) @@ -200,20 +241,20 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): ("x$!# prefixed", [["x$!# ", "prefixed"]]), ]) def test_i_can_post_fix_simple_prefixed_concepts(self, expression, expected_sequences): - concepts_map = { - "prefixed": Concept("a prefixed").def_prop("a"), - "one": Concept("one"), - "two": Concept("two"), - "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_prop("unit"), - } - sheerka, context, parser = self.init_parser(concepts_map, None) + # concepts_map = { + # "prefixed": Concept("a prefixed").def_var("a"), + # "one": Concept("one"), + # "two": Concept("two"), + # "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), + # } + sheerka, context, parser = self.init_parser() res = parser.infix_to_postfix(context, expression) assert len(res) == len(expected_sequences) for res_i, expected in zip(res, expected_sequences): assert len(res_i.errors) == 0 - expected_array = compute_expected_array(concepts_map, expression, expected) + expected_array = compute_expected_array(cmap, expression, expected) assert res_i.out == expected_array @pytest.mark.parametrize("expression, expected_sequences", [ @@ -243,13 +284,13 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): ]) def test_i_can_post_fix_prefixed_concepts_with_long_names(self, expression, expected_sequences): concepts_map = { - "prefixed prefixed": Concept("a prefixed prefixed").def_prop("a"), - "long name prefixed": Concept("a long name prefixed").def_prop("a"), + "prefixed prefixed": Concept("a prefixed prefixed").def_var("a"), + "long name prefixed": Concept("a long name prefixed").def_var("a"), "one": Concept("one"), "two": Concept("two"), - "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_prop("unit"), + "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), } - sheerka, context, parser = self.init_parser(concepts_map, None) + sheerka, context, parser = self.init_parser(concepts_map, create_new=True, init_from_sheerka=True) res = parser.infix_to_postfix(context, expression) @@ -273,20 +314,20 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): ("suffixed x$!#", [[" x$!#", "suffixed"]]), ]) def test_i_can_post_fix_simple_suffixed_concepts(self, expression, expected_sequences): - concepts_map = { - "suffixed": Concept("suffixed a").def_prop("a"), - "one": Concept("one"), - "two": Concept("two"), - "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_prop("unit"), - } - sheerka, context, parser = self.init_parser(concepts_map, None) + # concepts_map = { + # "suffixed": Concept("suffixed a").def_var("a"), + # "one": Concept("one"), + # "two": Concept("two"), + # "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), + # } + sheerka, context, parser = self.init_parser() res = parser.infix_to_postfix(context, expression) assert len(res) == len(expected_sequences) for res_i, expected in zip(res, expected_sequences): assert len(res_i.errors) == 0 - expected_array = compute_expected_array(concepts_map, expression, expected) + expected_array = compute_expected_array(cmap, expression, expected) assert res_i.out == expected_array @pytest.mark.parametrize("expression, expected", [ @@ -295,11 +336,11 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): ]) def test_i_can_post_fix_suffixed_concepts_with_long_names(self, expression, expected): concepts_map = { - "suffixed suffixed": Concept("suffixed suffixed a").def_prop("a"), - "long name suffixed": Concept("long name suffixed a").def_prop("a"), + "suffixed suffixed": Concept("suffixed suffixed a").def_var("a"), + "long name suffixed": Concept("long name suffixed a").def_var("a"), "one": Concept("one"), "two": Concept("two"), - "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_prop("unit"), + "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), } sheerka, context, parser = self.init_parser(concepts_map, None) @@ -340,22 +381,22 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): :return: """ - concepts_map = { - "?": Concept("a ? b : c").def_prop("a").def_prop("b").def_prop("c"), - "if": Concept("if a then b else c end").def_prop("a").def_prop("b").def_prop("c"), - "one": Concept("one"), - "two": Concept("two"), - "three": Concept("three"), - "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_prop("unit"), - } - sheerka, context, parser = self.init_parser(concepts_map, None) + # concepts_map = { + # "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"), + # "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"), + # "one": Concept("one"), + # "two": Concept("two"), + # "three": Concept("three"), + # "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), + # } + sheerka, context, parser = self.init_parser() res = parser.infix_to_postfix(context, expression) assert len(res) == len(expected_sequences) for res_i, expected in zip(res, expected_sequences): # assert len(res_i.errors) == 0 # Do not validate errors - expected_array = compute_expected_array(concepts_map, expression, expected) + expected_array = compute_expected_array(cmap, expression, expected) assert res_i.out == expected_array @pytest.mark.parametrize("expression, expected_sequences", [ @@ -373,14 +414,14 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): ]) def test_i_can_post_fix_ternary_concept_with_long_names(self, expression, expected_sequences): concepts_map = { - "? ?": Concept("a ? ? b : : c").def_prop("a").def_prop("b").def_prop("c"), - "if if": Concept("if if a then then b else else c end end").def_prop("a").def_prop("b").def_prop("c"), + "? ?": Concept("a ? ? b : : c").def_var("a").def_var("b").def_var("c"), + "if if": Concept("if if a then then b else else c end end").def_var("a").def_var("b").def_var("c"), "one": Concept("one"), "two": Concept("two"), "three": Concept("three"), - "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_prop("unit"), + "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), } - sheerka, context, parser = self.init_parser(concepts_map, None) + sheerka, context, parser = self.init_parser(concepts_map, create_new=True, init_from_sheerka=True) res = parser.infix_to_postfix(context, expression) @@ -397,8 +438,8 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): ]) def test_i_can_post_fix_suffixed_unary_composition(self, expression, expected): concepts_map = { - "foo": Concept("foo a").def_prop("a"), - "bar": Concept("bar a").def_prop("a"), + "foo": Concept("foo a").def_var("a"), + "bar": Concept("bar a").def_var("a"), "baz": Concept("baz"), } sheerka, context, parser = self.init_parser(concepts_map, None) @@ -416,8 +457,8 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): ]) def test_i_can_post_fix_prefixed_unary_composition(self, expression, expected): concepts_map = { - "foo": Concept("a foo").def_prop("a"), - "bar": Concept("a bar").def_prop("a"), + "foo": Concept("a foo").def_var("a"), + "bar": Concept("a bar").def_var("a"), "baz": Concept("baz"), } sya_def = { @@ -439,29 +480,29 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): ("one mult (two plus three)", ["one", "two", "three", "plus", "mult"]), ]) def test_i_can_post_fix_binary_with_precedence(self, expression, expected): - concepts_map = { - "plus": Concept("a plus b").def_prop("a").def_prop("b"), - "mult": Concept("a mult b").def_prop("a").def_prop("b"), - "one": Concept("one"), - "two": Concept("two"), - "three": Concept("three"), - } - sya_def = { - concepts_map["plus"]: (5, SyaAssociativity.Right), - concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus - } - sheerka, context, parser = self.init_parser(concepts_map, sya_def) + # concepts_map = { + # "plus": Concept("a plus b").def_var("a").def_var("b"), + # "mult": Concept("a mult b").def_var("a").def_var("b"), + # "one": Concept("one"), + # "two": Concept("two"), + # "three": Concept("three"), + # } + # sya_def = { + # concepts_map["plus"]: (5, SyaAssociativity.Right), + # concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus + # } + sheerka, context, parser = self.init_parser() res = parser.infix_to_postfix(context, expression) - expected_array = compute_expected_array(concepts_map, expression, expected) + expected_array = compute_expected_array(cmap, expression, expected) assert len(res) == 1 assert res[0].out == expected_array def test_i_can_post_fix_unary_with_precedence(self): concepts_map = { - "suffixed": Concept("suffixed a").def_prop("a"), - "prefixed": Concept("a prefixed").def_prop("a"), + "suffixed": Concept("suffixed a").def_var("a"), + "prefixed": Concept("a prefixed").def_var("a"), "a": Concept("a"), } @@ -496,7 +537,7 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): def test_i_can_post_fix_right_associated_binary(self): concepts_map = { - "equals": Concept("a equals b").def_prop("a").def_prop("b"), + "equals": Concept("a equals b").def_var("a").def_var("b"), "one": Concept("one"), "two": Concept("two"), "three": Concept("three"), @@ -518,7 +559,7 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): def test_i_can_post_fix_left_associated_binary(self): concepts_map = { - "plus": Concept("a plus b").def_prop("a").def_prop("b"), + "plus": Concept("a plus b").def_var("a").def_var("b"), "one": Concept("one"), "two": Concept("two"), "three": Concept("three"), @@ -550,7 +591,7 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): ]) def test_i_can_post_fix_right_associated_ternary(self, expression, expected): concepts_map = { - "?": Concept("a ? b : c").def_prop("a").def_prop("b").def_prop("c"), + "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"), "one": Concept("one"), "two": Concept("two"), "three": Concept("three"), @@ -577,7 +618,7 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): ]) def test_i_can_post_fix_left_associated_ternary(self, expression, expected): concepts_map = { - "?": Concept("a ? b : c").def_prop("a").def_prop("b").def_prop("c"), + "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"), "one": Concept("one"), "two": Concept("two"), "three": Concept("three"), @@ -594,8 +635,8 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): def test_i_can_post_fix_when_multiple_concepts_are_found(self): concepts_map = { - "foo": Concept("foo a").def_prop("a"), - "foo bar": Concept("foo bar a").def_prop("a"), + "foo": Concept("foo a").def_var("a"), + "foo bar": Concept("foo bar a").def_var("a"), "baz": Concept("baz"), } sheerka, context, parser = self.init_parser(concepts_map, None) @@ -603,7 +644,7 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): expression = "foo bar baz" res = parser.infix_to_postfix(context, expression) expected_sequences = [ - [" bar ", "foo", "baz"], + [UTN(" bar "), "foo", "baz"], ["baz", "foo bar"] ] @@ -646,30 +687,30 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): ("foo bar ( one )", ["one", "foo bar"]), ]) def test_i_can_pos_fix_when_parenthesis(self, expression, expected): - concepts_map = { - "prefixed": Concept("a prefixed").def_prop("a"), - "suffixed": Concept("suffixed a").def_prop("a"), - "square": Concept("square(a)").def_prop("a"), - "foo bar": Concept("foo bar(a)").def_prop("a"), - "plus": Concept("a plus b").def_prop("a").def_prop("b"), - "minus": Concept("a minus b").def_prop("a").def_prop("b"), - "if": Concept("if a then b else c end").def_prop("a").def_prop("b").def_prop("c"), - "?": Concept("a ? b : c").def_prop("a").def_prop("b").def_prop("c"), - "one": Concept("one"), - "two": Concept("two"), - "three": Concept("three"), - } + # concepts_map = { + # "prefixed": Concept("a prefixed").def_var("a"), + # "suffixed": Concept("suffixed a").def_var("a"), + # "square": Concept("square(a)").def_var("a"), + # "foo bar": Concept("foo bar(a)").def_var("a"), + # "plus": Concept("a plus b").def_var("a").def_var("b"), + # "minus": Concept("a minus b").def_var("a").def_var("b"), + # "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"), + # "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"), + # "one": Concept("one"), + # "two": Concept("two"), + # "three": Concept("three"), + # } + # + # sya_def = { + # concepts_map["square"]: (None, SyaAssociativity.No), + # concepts_map["plus"]: (10, SyaAssociativity.Right), + # concepts_map["minus"]: (10, SyaAssociativity.Right), + # } - sya_def = { - concepts_map["square"]: (None, SyaAssociativity.No), - concepts_map["plus"]: (10, SyaAssociativity.Right), - concepts_map["minus"]: (10, SyaAssociativity.Right), - } - - sheerka, context, parser = self.init_parser(concepts_map, sya_def) + sheerka, context, parser = self.init_parser() res = parser.infix_to_postfix(context, expression) - expected_array = compute_expected_array(concepts_map, expression, expected) + expected_array = compute_expected_array(cmap, expression, expected) assert len(res) == 1 assert res[0].out == expected_array @@ -721,28 +762,28 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): ("suffixed one function(two)", [["one", "suffixed", SCWC(" function(", ")", "two")]]), ]) def test_i_can_post_fix_when_parenthesis_and_unknown(self, expression, expected_sequences): - concepts_map = { - "prefixed": Concept("a prefixed").def_prop("a"), - "suffixed": Concept("suffixed a").def_prop("a"), - "plus": Concept("a plus b").def_prop("a").def_prop("b"), - "mult": Concept("a mult b").def_prop("a").def_prop("b"), - "if": Concept("if a then b else c end").def_prop("a").def_prop("b").def_prop("c"), - "one": Concept("one"), - "two": Concept("two"), - "three": Concept("three"), - "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_prop("unit"), - } - sya_def = { - concepts_map["plus"]: (5, SyaAssociativity.Right), - concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus - } - sheerka, context, parser = self.init_parser(concepts_map, sya_def) + # concepts_map = { + # "prefixed": Concept("a prefixed").def_var("a"), + # "suffixed": Concept("suffixed a").def_var("a"), + # "plus": Concept("a plus b").def_var("a").def_var("b"), + # "mult": Concept("a mult b").def_var("a").def_var("b"), + # "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"), + # "one": Concept("one"), + # "two": Concept("two"), + # "three": Concept("three"), + # "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), + # } + # sya_def = { + # concepts_map["plus"]: (5, SyaAssociativity.Right), + # concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus + # } + sheerka, context, parser = self.init_parser() res = parser.infix_to_postfix(context, expression) assert len(res) == len(expected_sequences) for res_i, expected in zip(res, expected_sequences): - expected_array = compute_expected_array(concepts_map, expression, expected) + expected_array = compute_expected_array(cmap, expression, expected) assert res_i.out == expected_array @pytest.mark.parametrize("expression, expected", [ @@ -762,13 +803,13 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): ("(one plus ( 1 + )", ("(", 0)), ]) def test_i_can_detect_parenthesis_mismatch_error_when_post_fixing(self, expression, expected): - concepts_map = { - "one": Concept("one"), - "two": Concept("two"), - "plus": Concept("a plus b").def_prop("a").def_prop("b"), - "?": Concept("a ? b : c").def_prop("a").def_prop("b").def_prop("c"), - } - sheerka, context, parser = self.init_parser(concepts_map, None) + # concepts_map = { + # "one": Concept("one"), + # "two": Concept("two"), + # "plus": Concept("a plus b").def_var("a").def_var("b"), + # "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"), + # } + sheerka, context, parser = self.init_parser() res = parser.infix_to_postfix(context, expression) @@ -779,13 +820,13 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): ("one ? one two : three", ("?", ":")), ]) def test_i_can_detected_when_too_many_parameters(self, expression, expected): - concepts_map = { - "one": Concept("one"), - "two": Concept("two"), - "plus": Concept("a plus b").def_prop("a").def_prop("b"), - "?": Concept("a ? b : c").def_prop("a").def_prop("b").def_prop("c"), - } - sheerka, context, parser = self.init_parser(concepts_map, None) + # concepts_map = { + # "one": Concept("one"), + # "two": Concept("two"), + # "plus": Concept("a plus b").def_var("a").def_var("b"), + # "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"), + # } + sheerka, context, parser = self.init_parser(cmap, None) res = parser.infix_to_postfix(context, expression) @@ -793,7 +834,7 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): assert len(res[0].errors) == 1 error = res[0].errors[0] assert isinstance(error, TooManyParametersFound) - assert error.concept == concepts_map[expected[0]] + assert error.concept == cmap[expected[0]] assert error.token.value == expected[1] @pytest.mark.parametrize("expression, expected", [ @@ -819,30 +860,30 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): ("(one infix two) (three prefixed)", ["one", "two", "infix", "three", "prefixed"]), ]) def test_i_can_post_fix_sequences(self, expression, expected): - concepts_map = { - "prefixed": Concept("a prefixed").def_prop("a"), - "suffixed": Concept("suffixed a").def_prop("a"), - "infix": Concept("a infix b").def_prop("a").def_prop("b"), - "?": Concept("a ? b : c").def_prop("a").def_prop("b").def_prop("c"), - "one": Concept("one"), - "two": Concept("two"), - "three": Concept("three"), - "four": Concept("four"), - } + # concepts_map = { + # "prefixed": Concept("a prefixed").def_var("a"), + # "suffixed": Concept("suffixed a").def_var("a"), + # "infix": Concept("a infix b").def_var("a").def_var("b"), + # "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"), + # "one": Concept("one"), + # "two": Concept("two"), + # "three": Concept("three"), + # "four": Concept("four"), + # } - sheerka, context, parser = self.init_parser(concepts_map, None) + sheerka, context, parser = self.init_parser(cmap, None) res = parser.infix_to_postfix(context, expression) - expected_array = compute_expected_array(concepts_map, expression, expected) + expected_array = compute_expected_array(cmap, expression, expected) assert len(res) == 1 assert res[0].out == expected_array def test_the_more_concepts_the_more_results(self): concepts_map = { - "plus": Concept("a plus b").def_prop("a").def_prop("b"), - "plus plus": Concept("a plus plus").def_prop("a"), - "plus equals": Concept("a plus equals b").def_prop("a").def_prop("b"), + "plus": Concept("a plus b").def_var("a").def_var("b"), + "plus plus": Concept("a plus plus").def_var("a"), + "plus equals": Concept("a plus equals b").def_var("a").def_var("b"), } sheerka, context, parser = self.init_parser(concepts_map, None) @@ -864,7 +905,7 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): def test_i_can_use_string_instead_of_identifier(self): concepts_map = { - "ternary": Concept("a ? ? b '::' c").def_prop("a").def_prop("b").def_prop("c"), + "ternary": Concept("a ? ? b '::' c").def_var("a").def_var("b").def_var("c"), "one": Concept("one"), "two": Concept("two"), "three": Concept("three"), @@ -883,7 +924,7 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): def test_i_cannot_chain_non_associative(self): concepts_map = { - "less than": Concept("a less than b").def_prop("a").def_prop("b"), + "less than": Concept("a less than b").def_var("a").def_var("b"), "one": Concept("one"), "two": Concept("two"), "three": Concept("three"), @@ -904,35 +945,35 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): Not quite sure why this test is here :return: """ - concepts_map = { - "foo": Concept("foo a").def_prop("a"), - "one": Concept("one"), - "two": Concept("two"), - "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_prop("unit"), - } - sheerka, context, parser = self.init_parser(concepts_map, None) + # concepts_map = { + # "foo": Concept("foo a").def_var("a"), + # "one": Concept("one"), + # "two": Concept("two"), + # "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), + # } + sheerka, context, parser = self.init_parser(cmap, None) - expression = "foo twenties" + expression = "suffixed twenties" res = parser.infix_to_postfix(context, expression) - expected = [cnode("twenties", 2, 2, "twenties"), "foo"] - expected_array = compute_expected_array(concepts_map, expression, expected) + expected = [cnode("twenties", 2, 2, "twenties"), "suffixed"] + expected_array = compute_expected_array(cmap, expression, expected) assert len(res) == 1 assert res[0].out == expected_array def test_i_can_parse_when_concept_atom_only(self): - concepts_map = { - "plus": Concept("a plus b").def_prop("a").def_prop("b"), - "mult": Concept("a mult b").def_prop("a").def_prop("b"), - "one": Concept("one"), - "two": Concept("two"), - "three": Concept("three"), - } - sya_def = { - concepts_map["plus"]: (5, SyaAssociativity.Right), - concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus - } - sheerka, context, parser = self.init_parser(concepts_map, sya_def) + # concepts_map = { + # "plus": Concept("a plus b").def_var("a").def_var("b"), + # "mult": Concept("a mult b").def_var("a").def_var("b"), + # "one": Concept("one"), + # "two": Concept("two"), + # "three": Concept("three"), + # } + # sya_def = { + # concepts_map["plus"]: (5, SyaAssociativity.Right), + # concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus + # } + sheerka, context, parser = self.init_parser() text = "one plus two mult three" res = parser.parse(context, text) @@ -941,29 +982,29 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert lexer_nodes == [ConceptNode(concepts_map["plus"], 0, 8, source=text)] + assert lexer_nodes == [ConceptNode(cmap["plus"], 0, 8, source=text)] # check the compiled expected_concept = lexer_nodes[0].concept - assert expected_concept.compiled["a"] == concepts_map["one"] - assert expected_concept.compiled["b"] == concepts_map["mult"] - assert expected_concept.compiled["b"].compiled["a"] == concepts_map["two"] - assert expected_concept.compiled["b"].compiled["b"] == concepts_map["three"] + assert expected_concept.compiled["a"] == cmap["one"] + assert expected_concept.compiled["b"] == cmap["mult"] + assert expected_concept.compiled["b"].compiled["a"] == cmap["two"] + assert expected_concept.compiled["b"].compiled["b"] == cmap["three"] def test_i_can_parse_when_python_code(self): - concepts_map = { - "foo": Concept("foo a").def_prop("a") - } - sheerka, context, parser = self.init_parser(concepts_map, None) + # concepts_map = { + # "foo": Concept("foo a").def_var("a") + # } + sheerka, context, parser = self.init_parser(cmap, None) - text = "foo 1 + 1" + text = "suffixed 1 + 1" res = parser.parse(context, text) wrapper = res.body lexer_nodes = res.body.body assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert lexer_nodes == [ConceptNode(concepts_map["foo"], 0, 6, source=text)] + assert lexer_nodes == [ConceptNode(cmap["suffixed"], 0, 6, source=text)] # check the compiled expected_concept = lexer_nodes[0].concept @@ -977,15 +1018,15 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): assert isinstance(return_value_a.body.body, PythonNode) def test_i_can_parse_when_bnf_concept(self): - concepts_map = { - "foo": Concept("foo a").def_prop("a"), - "one": Concept("one"), - "two": Concept("two"), - "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_prop("unit"), - } - sheerka, context, parser = self.init_parser(concepts_map, None) + # concepts_map = { + # "foo": Concept("foo a").def_var("a"), + # "one": Concept("one"), + # "two": Concept("two"), + # "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), + # } + sheerka, context, parser = self.init_parser() - text = "foo twenty one" + text = "suffixed twenty one" res = parser.parse(context, text) assert len(res) == 2 assert res[1].status @@ -994,23 +1035,23 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): lexer_nodes = res[1].body.body assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert lexer_nodes == [ConceptNode(concepts_map["foo"], 0, 4, source=text)] + assert lexer_nodes == [ConceptNode(cmap["suffixed"], 0, 4, source=text)] # check the compiled expected_concept = lexer_nodes[0].concept assert sheerka.isinstance(expected_concept.compiled["a"], "twenties") - assert expected_concept.compiled["a"].compiled["unit"] == concepts_map["one"] + assert expected_concept.compiled["a"].compiled["unit"] == cmap["one"] def test_i_can_parse_sequences(self): - concepts_map = { - "plus": Concept("a plus b").def_prop("a").def_prop("b"), - "foo": Concept("foo a").def_prop("a"), - "one": Concept("one"), - "two": Concept("two"), - } - sheerka, context, parser = self.init_parser(concepts_map, None) + # concepts_map = { + # "plus": Concept("a plus b").def_var("a").def_var("b"), + # "foo": Concept("foo a").def_var("a"), + # "one": Concept("one"), + # "two": Concept("two"), + # } + sheerka, context, parser = self.init_parser(cmap, None) - text = "one plus 1 + 1 foo two" + text = "one plus 1 + 1 suffixed two" res = parser.parse(context, text) wrapper = res.body lexer_nodes = res.body.body @@ -1018,19 +1059,19 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert lexer_nodes == [ - ConceptNode(concepts_map["plus"], 0, 9, source="one plus 1 + 1 "), - ConceptNode(concepts_map["foo"], 10, 12, source="foo two")] + ConceptNode(cmap["plus"], 0, 9, source="one plus 1 + 1 "), + ConceptNode(cmap["suffixed"], 10, 12, source="suffixed two")] # check the compiled concept_plus_a = lexer_nodes[0].concept.compiled["a"] concept_plus_b = lexer_nodes[0].concept.compiled["b"] - concept_foo_a = lexer_nodes[1].concept.compiled["a"] + concept_suffixed_a = lexer_nodes[1].concept.compiled["a"] - assert concept_plus_a == concepts_map["one"] + assert concept_plus_a == cmap["one"] assert len(concept_plus_b) == 1 assert sheerka.isinstance(concept_plus_b[0], BuiltinConcepts.RETURN_VALUE) assert isinstance(concept_plus_b[0].body.body, PythonNode) - assert concept_foo_a == concepts_map["two"] + assert concept_suffixed_a == cmap["two"] @pytest.mark.parametrize("text, expected_status, expected_result", [ ("function(suffixed one)", True, [ @@ -1046,28 +1087,28 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): SCWC("function(", ")", CNC("suffixed", 2, 7, a=" x$!#"))]), ]) def test_i_can_parse_when_one_result(self, text, expected_status, expected_result): - concepts_map = { - "prefixed": Concept("a prefixed").def_prop("a"), - "suffixed": Concept("suffixed a").def_prop("a"), - "mult": Concept("a mult b").def_prop("a").def_prop("b"), - "plus": Concept("a plus b").def_prop("a").def_prop("b"), - "if": Concept("if a then b else c end").def_prop("a").def_prop("b").def_prop("c"), - "one": Concept("one"), - "two": Concept("two"), - "three": Concept("three"), - "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_prop("unit"), - } - sya_def = { - concepts_map["plus"]: (5, SyaAssociativity.Right), - concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus - } - sheerka, context, parser = self.init_parser(concepts_map, sya_def) + # concepts_map = { + # "prefixed": Concept("a prefixed").def_var("a"), + # "suffixed": Concept("suffixed a").def_var("a"), + # "mult": Concept("a mult b").def_var("a").def_var("b"), + # "plus": Concept("a plus b").def_var("a").def_var("b"), + # "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"), + # "one": Concept("one"), + # "two": Concept("two"), + # "three": Concept("three"), + # "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), + # } + # sya_def = { + # concepts_map["plus"]: (5, SyaAssociativity.Right), + # concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus + # } + sheerka, context, parser = self.init_parser() res = parser.parse(context, text) wrapper = res.body lexer_nodes = res.body.body - expected_array = compute_expected_array(concepts_map, text, expected_result) + expected_array = compute_expected_array(cmap, text, expected_result) assert res.status == expected_status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert lexer_nodes == expected_array @@ -1080,15 +1121,15 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): # ]) # def test_i_can_parse_when_multiple_results(self, text, list_of_expected): # concepts_map = { - # "prefixed": Concept("a prefixed").def_prop("a"), - # "suffixed": Concept("suffixed a").def_prop("a"), - # "mult": Concept("a mult b").def_prop("a").def_prop("b"), - # "plus": Concept("a plus b").def_prop("a").def_prop("b"), - # "if": Concept("if a then b else c end").def_prop("a").def_prop("b").def_prop("c"), + # "prefixed": Concept("a prefixed").def_var("a"), + # "suffixed": Concept("suffixed a").def_var("a"), + # "mult": Concept("a mult b").def_var("a").def_var("b"), + # "plus": Concept("a plus b").def_var("a").def_var("b"), + # "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"), # "one": Concept("one"), # "two": Concept("two"), # "three": Concept("three"), - # "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_prop("unit"), + # "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), # } # sya_def = { # concepts_map["plus"]: (5, SyaAssociativity.Right), @@ -1116,13 +1157,13 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): ("x$!# infix z$!#", "infix", ["a", "b"]), ]) def test_i_cannot_parse_when_unrecognized(self, text, expected_concept, expected_unrecognized): - concepts_map = { - "suffixed": Concept("suffixed a").def_prop("a"), - "prefixed": Concept("a prefixed").def_prop("a"), - "infix": Concept("a infix b").def_prop("a").def_prop("b"), - "one": Concept("one") - } - sheerka, context, parser = self.init_parser(concepts_map, None) + # concepts_map = { + # "suffixed": Concept("suffixed a").def_var("a"), + # "prefixed": Concept("a prefixed").def_var("a"), + # "infix": Concept("a infix b").def_var("a").def_var("b"), + # "one": Concept("one") + # } + sheerka, context, parser = self.init_parser() res = parser.parse(context, text) wrapper = res.body @@ -1131,7 +1172,7 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): assert not res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert lexer_nodes == [ConceptNode(concepts_map[expected_concept], 0, expected_end, source=text)] + assert lexer_nodes == [ConceptNode(cmap[expected_concept], 0, expected_end, source=text)] concept_found = lexer_nodes[0].concept for unrecognized in expected_unrecognized: @@ -1142,14 +1183,14 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): ("one prefixed x$!#", [cnode("__var__0 prefixed", 0, 2, "one prefixed"), utnode(3, 7, " x$!#")]), ]) def test_i_cannot_parse_when_part_of_the_sequence_is_not_recognized(self, text, expected): - concepts_map = { - "suffixed": Concept("suffixed a").def_prop("a"), - "prefixed": Concept("a prefixed").def_prop("a"), - "infix": Concept("a infix b").def_prop("a").def_prop("b"), - "one": Concept("one"), - "two": Concept("two"), - } - sheerka, context, parser = self.init_parser(concepts_map, None) + # concepts_map = { + # "suffixed": Concept("suffixed a").def_var("a"), + # "prefixed": Concept("a prefixed").def_var("a"), + # "infix": Concept("a infix b").def_var("a").def_var("b"), + # "one": Concept("one"), + # "two": Concept("two"), + # } + sheerka, context, parser = self.init_parser() res = parser.parse(context, text) wrapper = res.body @@ -1173,14 +1214,14 @@ class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): Atoms concepts, source code or BNF concepts alone are discarded by the lexer :return: """ - concepts_map = { - "plus": Concept("a plus b").def_prop("a").def_prop("b"), - "one": Concept("one"), - "two": Concept("two"), - "three": Concept("three"), - "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_prop("unit"), - } - sheerka, context, parser = self.init_parser(concepts_map, None) + # concepts_map = { + # "plus": Concept("a plus b").def_var("a").def_var("b"), + # "one": Concept("one"), + # "two": Concept("two"), + # "three": Concept("three"), + # "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), + # } + sheerka, context, parser = self.init_parser() res = parser.parse(context, text) diff --git a/tests/parsers/test_UnrecognizedNodeParser.py b/tests/parsers/test_UnrecognizedNodeParser.py index db3cd48..22fe8e3 100644 --- a/tests/parsers/test_UnrecognizedNodeParser.py +++ b/tests/parsers/test_UnrecognizedNodeParser.py @@ -42,17 +42,17 @@ def get_input_nodes_from(my_concepts_map, full_expr, *args): concepts_map = { - "5params": Concept("5params").def_prop("a").def_prop("b").def_prop("c").def_prop("d").def_prop("e"), - "plus": Concept("a plus b", body="a + b").def_prop("a").def_prop("b"), - "mult": Concept("a mult b", body="a * b").def_prop("a").def_prop("b"), + "5params": Concept("5params").def_var("a").def_var("b").def_var("c").def_var("d").def_var("e"), + "plus": Concept("a plus b", body="a + b").def_var("a").def_var("b"), + "mult": Concept("a mult b", body="a * b").def_var("a").def_var("b"), "one": Concept("one", body="1"), "two": Concept("two", body="2"), "three": Concept("three", body="3"), - "twenties": Concept("twenties", definition="'twenty' (one|two)=unit", body="20 + unit").def_prop("unit"), + "twenties": Concept("twenties", definition="'twenty' (one|two)=unit", body="20 + unit").def_var("unit"), "hello_atom": Concept("hello one"), - "hello_sya": Concept("hello a").def_prop("a"), - "greetings_a": Concept("greetings a").def_prop("a"), - "greetings_b": Concept("greetings b").def_prop("b"), + "hello_sya": Concept("hello a").def_var("a"), + "greetings_a": Concept("greetings a").def_var("a"), + "greetings_b": Concept("greetings b").def_var("b"), } @@ -64,7 +64,7 @@ class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka): def setup_class(cls): t = TestUnrecognizedNodeParser() TestUnrecognizedNodeParser.sheerka, context, _ = t.init_parser(concepts_map, create_new=True) - TestUnrecognizedNodeParser.sheerka.set_sya_def(context, [ + TestUnrecognizedNodeParser.sheerka.force_sya_def(context, [ (concepts_map["mult"].id, 20, SyaAssociativity.Right), (concepts_map["plus"].id, 10, SyaAssociativity.Right), ]) @@ -144,13 +144,15 @@ class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka): expected_nodes = compute_expected_array( concepts_map, " one plus two mult three ", - [CNC("plus", a="one", b=CC("mult", a="two", b="three"))]) + [CNC("plus", a="one", b=CC("mult", a="two", b="three"))], + exclude_body=True) + assert concept.compiled["e"][0].body.body == expected_nodes # # sanity check, I can evaluate the concept # evaluated = sheerka.evaluate_concept(self.get_context(sheerka, eval_body=True), concept) # assert evaluated.key == concept.key - # assert evaluated.get_prop("a") == + # assert evaluated.get_value("a") == def test_i_can_validate_with_recursion(self): sheerka, context, parser = self.init_parser() @@ -246,6 +248,22 @@ class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka): assert len(actual_nodes) == 1 assert actual_nodes[0] == scnode(0, 4, expression) + def test_i_cannot_parse_unrecognized_python_that_looks_like_concept(self): + sheerka, context, parser = self.init_parser() + + expression = "fake_concept_name" # as it's not a concept, it will be recognized as python node + nodes = get_input_nodes_from(concepts_map, expression, UTN(expression)) + parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes) + + res = parser.parse(context, parser_input) + parser_result = res.body + actual_nodes = res.body.body + + assert not res.status + assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) + assert len(actual_nodes) == 1 + assert actual_nodes[0] == nodes[0] + def test_i_can_parse_unrecognized_bnf_concept_node(self): sheerka, context, parser = self.init_parser() @@ -285,7 +303,8 @@ class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka): concepts_map, expression, [CNC("plus", a="one", - b=CC("mult", source="two mult three", a="two", b="three"))]) + b=CC("mult", source="two mult three", a="two", b="three"))], + exclude_body=True) assert actual_nodes == expected_array def test_i_can_parse_sequences(self): @@ -314,7 +333,7 @@ class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka): expression = "two hello one three" nodes = get_input_nodes_from(concepts_map, expression, "two", UTN("hello one"), "three") - parser_input = ParserResultConcept("parsers.xxx", source="one plus two hello one", value=nodes) + parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes) res = parser.parse(context, parser_input) assert len(res) == 2 @@ -332,7 +351,8 @@ class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka): expected_1 = compute_expected_array(concepts_map, expression, [ CN("two", 0, 0), CNC("hello_sya", source="hello one", start=2, end=4, a="one"), - CN("three", 6, 6)]) + CN("three", 6, 6)], + exclude_body=True) assert actual_nodes1 == expected_1 @@ -341,7 +361,7 @@ class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka): expression = "greetings two" nodes = get_input_nodes_from(concepts_map, expression, UTN("greetings two")) - parser_input = ParserResultConcept("parsers.xxx", source="greetings two", value=nodes) + parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes) res = parser.parse(context, parser_input) assert len(res) == 2 @@ -350,14 +370,25 @@ class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka): actual_nodes0 = res[0].body.body expected_0 = compute_expected_array(concepts_map, expression, [ - CNC("greetings_a", source="greetings two", start=0, end=2, a="two")]) + CNC("greetings_a", source="greetings two", start=0, end=2, a="two")], exclude_body=True) assert actual_nodes0 == expected_0 actual_nodes1 = res[1].body.body expected_1 = compute_expected_array(concepts_map, expression, [ - CNC("greetings_b", source="greetings two", start=0, end=2, b="two")]) + CNC("greetings_b", source="greetings two", start=0, end=2, b="two")], exclude_body=True) assert actual_nodes1 == expected_1 + def test_i_cannot_parse_when_some_unrecognized_remain(self): + sheerka, context, parser = self.init_parser() + expression = "twenty one + one" + nodes = get_input_nodes_from(concepts_map, expression, UTN("twenty "), "one", " + ", ("one", 1)) + + parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes) + res = parser.parse(context, parser_input) + + assert not res.status + assert res.body.body == nodes + def test_i_cannot_parse_when_i_cannot_validate(self): sheerka, context, parser = self.init_parser(concepts_map, create_new=True) expression = "one plus unknown tokens" diff --git a/tests/sdp/test_sheerkaDataProvider.py b/tests/sdp/test_sheerkaDataProvider.py index c1336bd..8e2da64 100644 --- a/tests/sdp/test_sheerkaDataProvider.py +++ b/tests/sdp/test_sheerkaDataProvider.py @@ -1,15 +1,12 @@ -import hashlib import json import os import shutil from datetime import date, datetime from os import path -import core.utils import pytest -from sdp.sheerkaDataProvider import SheerkaDataProvider, Event, SheerkaDataProviderError, \ - SheerkaDataProviderDuplicateKeyError, SheerkaDataProviderResult, SheerkaDataProviderRef -from sdp.sheerkaSerializer import JsonSerializer, Serializer, PickleSerializer +from sdp.sheerkaDataProvider import SheerkaDataProvider, Event +from sdp.sheerkaSerializer import PickleSerializer tests_root = path.abspath("../../build/tests") evt_digest = "3a571cb6034ef6fc8d7fe91948d0d29728eed74de02bac7968b0e9facca2c2d7" @@ -20,50 +17,6 @@ def read_json_file(sdp, file_name): return json.load(f) -class ObjWithKey: - """ - Object where the key can be resolved using get_key() - Not suitable for Json dump as there is no to_dict() method - """ - - def __init__(self, a, b): - self.a = a - self.b = b - - def __eq__(self, obj): - return isinstance(obj, ObjWithKey) and \ - self.a == obj.a and \ - self.b == obj.b - - def __repr__(self): - return f"ObjWithKey({self.a}, {self.b})" - - def get_key(self): - return self.a - - -class ObjSetKey: - """ - Object where the key can be be automatically set thanks to set_key() - Not suitable for Json dump as there is no to_dict() method - """ - - def __init__(self, value, key=None): - self.value = value - self.key = key - - def __eq__(self, obj): - return isinstance(obj, ObjSetKey) and \ - self.key == obj.key and \ - self.value == obj.value - - def __repr__(self): - return f"ObjSetKey({self.key}, {self.value})" - - def set_key(self, key): - self.key = key - - class ObjNoKey: """ Object with no key, they won't be ordered @@ -86,98 +39,6 @@ class ObjNoKey: return f"ObjNoKey({self.a}, {self.b})" -class ObjDumpJson: - """ - Object where the key can be resolved using get_key() - that can be used to dump as Json - """ - - def __init__(self, key=None, value=None): - self.key = key - self.value = value - - def __eq__(self, obj): - return isinstance(obj, ObjDumpJson) and \ - self.key == obj.key and \ - self.value == obj.value - - def __repr__(self): - return f"ObjDumpJson({self.key}, {self.value})" - - def get_key(self): - return self.key - - def get_digest(self): - """ - Returns the digest of the event - :return: hexa form of the sha256 - """ - return hashlib.sha256(f"Concept:{self.key}{self.value}".encode("utf-8")).hexdigest() - - def to_dict(self): - return self.__dict__ - - def from_dict(self, as_dict): - self.value = as_dict["value"] - self.key = as_dict["key"] - - -class ObjDumpJsonNoDigest: - """ - Object where the key can be resolved using get_key() - that can be used to dump as Json, - But with no builtin digest computation - """ - - def __init__(self, key=None, value=None): - self.key = key - self.value = value - - def __eq__(self, obj): - return isinstance(obj, ObjDumpJsonNoDigest) and \ - self.key == obj.key and \ - self.value == obj.value - - def __repr__(self): - return f"ObjDumpJsonNoDigest({self.key}, {self.value})" - - def get_key(self): - return self.key - - def to_dict(self): - return self.__dict__ - - def from_dict(self, as_dict): - self.value = as_dict["value"] - self.key = as_dict["key"] - - -class ObjWithDigestNoKey: - """ - Object that can compute its digest. - It can be used to test objects sharing the same entry (but that are different) - Not suitable for Json dump as there is no to_dict() method - """ - - def __init__(self, a, b): - self.a = a - self.b = b - - def __hash__(self): - return hash((self.a, self.b)) - - def __eq__(self, obj): - return isinstance(obj, ObjNoKey) and \ - self.a == obj.a and \ - self.b == obj.b - - def __repr__(self): - return f"ObjWithDigestNoKey({self.a}, {self.b})" - - def get_digest(self): - return str(self.a) + str(self.b) - - class ObjWithDigestWithKey: """ Object with a key that can compute its digest. @@ -239,14 +100,14 @@ def test_i_can_init_the_data_provider(root, expected): ]) def test_i_can_save_and_load_an_event(root): sdp = SheerkaDataProvider(root) - event = Event("hello world", date=date(year=2007, month=9, day=10), user="kodjo") + event = Event("hello world", date=date(year=2007, month=9, day=10), user_id="kodjo") evt_digest = sdp.save_event(event) evt = sdp.load_event(evt_digest) assert evt.version == 1 assert evt.date == datetime(year=2007, month=9, day=10) - assert evt.user == "kodjo" + assert evt.user_id == "kodjo" assert evt.message == "hello world" assert evt.parents is None assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.EventFolder, evt_digest[0:24], evt_digest)) @@ -262,8 +123,8 @@ def test_i_can_save_and_load_an_event(root): ]) def test_i_can_get_event_history(root): sdp = SheerkaDataProvider(root) - event = Event("hello world", date=date(year=2007, month=9, day=10), user="kodjo") - event2 = Event("hello world 2", date=date(year=2007, month=9, day=10), user="kodjo") + event = Event("hello world", date=date(year=2007, month=9, day=10), user_id="kodjo") + event2 = Event("hello world 2", date=date(year=2007, month=9, day=10), user_id="kodjo") evt_digest1 = sdp.save_event(event) evt_digest2 = sdp.save_event(event2) @@ -271,7 +132,7 @@ def test_i_can_get_event_history(root): evt = sdp.load_event(evt_digest2) assert evt.version == 1 assert evt.date == datetime(year=2007, month=9, day=10) - assert evt.user == "kodjo" + assert evt.user_id == "kodjo" assert evt.message == "hello world 2" assert evt.parents == [evt_digest1] @@ -326,20 +187,36 @@ def test_i_can_load_events_when_no_event(root): ".sheerka", "mem://" ]) -def test_i_can_add_an_string(root): +def test_i_can_add_and_reload_one_item(root): sdp = SheerkaDataProvider(root) - obj = "foo => bar" - result = sdp.add(evt_digest, "entry", obj) + event = Event("hello world", date=date(year=2007, month=9, day=10), user_id="kodjo") + with sdp.get_transaction(event) as transaction: + transaction.add("entry", "key", "foo => bar") + transaction.add("entry", "key2", ObjNoKey("a", "b")) + transaction.add("entry2", "key", "value2") + last_commit = sdp.get_snapshot(SheerkaDataProvider.HeadFile) state = sdp.load_state(last_commit) - loaded = sdp.get(result.entry, result.key) + loaded1 = sdp.get("entry", "key") + loaded2 = sdp.get("entry", "key2") + loaded3 = sdp.get("entry2", "key") - assert result.obj == obj - assert result.entry == "entry" - assert result.key is None - assert result.digest is None - assert loaded == obj + load_entry = sdp.get("entry") + + # check that the event is saved + evt_digest = event.get_digest() + assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.EventFolder, evt_digest[0:24], evt_digest)) + + # check the values + assert loaded1 == "foo => bar" + assert loaded2 == ObjNoKey("a", "b") + assert loaded3 == "value2" + + assert load_entry == { + "key": "foo => bar", + "key2": ObjNoKey("a", "b") + } assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit)) assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.HeadFile)) @@ -347,7 +224,8 @@ def test_i_can_add_an_string(root): assert state.date is not None assert state.parents == [] assert state.events == [evt_digest] - assert state.data == {"entry": "foo => bar"} + assert state.data == {"entry": {'key': 'foo => bar', 'key2': ObjNoKey("a", "b")}, + 'entry2': {'key': 'value2'}} assert sdp.io.read_text(path.join(sdp.io.root, SheerkaDataProvider.HeadFile)) == last_commit @@ -356,782 +234,190 @@ def test_i_can_add_an_string(root): ".sheerka", "mem://" ]) -def test_i_can_add_several_strings_if_allow_multiple_is_true(root): +def test_i_can_load_an_entry(root): sdp = SheerkaDataProvider(root) - sdp.add(evt_digest, "entry", "foo") - sdp.add(evt_digest, "entry", "foo") - result = sdp.add(evt_digest, "entry", "bar") - loaded = sdp.get(result.entry, result.key) + with sdp.get_transaction(evt_digest) as transaction: + transaction.add("entry", "key1", "foo") + transaction.add("entry", "key2", "bar") + transaction.add("entry", "key3", "baz") - assert result.obj == "bar" - assert result.entry == "entry" - assert result.key is None - assert result.digest is None - assert loaded == ["foo", "foo", "bar"] + load_entry = sdp.get("entry") + + assert load_entry == { + "key1": "foo", + "key2": "bar", + "key3": "baz", + } + + # load entry was a copy + load_entry["key1"] = "another foo" + assert sdp.get("entry", "key1") == "foo" @pytest.mark.parametrize("root", [ ".sheerka", "mem://" ]) -def test_i_cannot_add_several_strings_if_allow_multiple_is_false(root): +def test_i_can_add_and_reload_a_list_of_items(root): sdp = SheerkaDataProvider(root) - with pytest.raises(IndexError) as index_error: - sdp.add(evt_digest, "entry", "foo", False) - sdp.add(evt_digest, "entry", "foo", False) - assert index_error.value.args[0] == "entry" + with sdp.get_transaction(evt_digest) as transaction: + transaction.add("entry", "key", ["foo => bar", ObjNoKey("a", "b")]) - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_add_an_object_with_no_key(root): - sdp = SheerkaDataProvider(root) - obj = ObjNoKey("a", "b") - - result = sdp.add(evt_digest, "entry", obj) last_commit = sdp.get_snapshot(SheerkaDataProvider.HeadFile) state = sdp.load_state(last_commit) - loaded = sdp.get(result.entry, result.key) + loaded = sdp.get("entry", "key") - assert result.obj == obj - assert result.entry == "entry" - assert result.key is None - assert result.digest is None - - assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit)) - assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.HeadFile)) + # check the values + assert loaded == ["foo => bar", ObjNoKey("a", "b")] assert state.date is not None assert state.parents == [] assert state.events == [evt_digest] - assert state.data == {"entry": ObjNoKey("a", "b")} - - assert sdp.io.read_text(path.join(sdp.io.root, SheerkaDataProvider.HeadFile)) == last_commit + assert state.data == {"entry": {'key': ['foo => bar', ObjNoKey('a', 'b')]}} @pytest.mark.parametrize("root", [ ".sheerka", "mem://" ]) -def test_i_can_add_several_obj_no_key_if_allow_multiple_is_true(root): +def test_i_can_add_and_reload_a_set_of_items(root): sdp = SheerkaDataProvider(root) - sdp.add(evt_digest, "entry", ObjNoKey("a", "b")) - sdp.add(evt_digest, "entry", ObjNoKey("a", "b")) - result = sdp.add(evt_digest, "entry", ObjNoKey("c", "d")) - loaded = sdp.get(result.entry, result.key) + with sdp.get_transaction(evt_digest) as transaction: + transaction.add("entry", "key", {"foo => bar", ObjNoKey("a", "b")}) - assert result.obj == ObjNoKey("c", "d") - assert result.entry == "entry" - assert result.key is None - assert result.digest is None - assert loaded == [ObjNoKey("a", "b"), ObjNoKey("a", "b"), ObjNoKey("c", "d")] - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_cannot_add_several_obj_no_key_if_allow_multiple_is_false(root): - sdp = SheerkaDataProvider(root) - - with pytest.raises(IndexError) as index_error: - sdp.add(evt_digest, "entry", ObjNoKey("a", "b"), False) - sdp.add(evt_digest, "entry", ObjNoKey("c", "d"), False) - assert index_error.value.args[0] == "entry" - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_add_a_dict(root): - """ - Adding a dictionary. - Note that there is no key when adding a dictionary - - If you add {'my_key': 'my_value'} - 'my_key is not considered as the key of the entry' - - Because if you add {'my_key': 'my_value', 'my_key2': 'my_value2'} - There are now multiple keys. - - So for dictionary entries, the key is not managed - """ - sdp = SheerkaDataProvider(root) - obj = {"my_key": "my_value"} - - result = sdp.add(evt_digest, "entry", obj) last_commit = sdp.get_snapshot(SheerkaDataProvider.HeadFile) state = sdp.load_state(last_commit) - loaded = sdp.get(result.entry, result.key) + loaded = sdp.get("entry", "key") - loaded_value = sdp.get(result.entry, "my_key") # we can retrieve by key - - assert result.obj == obj - assert result.entry == "entry" - assert result.key is None # we return None as dict may contains several entries - assert result.digest is None - - assert loaded == obj - assert loaded_value == "my_value" - - assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit)) - assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.HeadFile)) + # check the values + assert loaded == {"foo => bar", ObjNoKey("a", "b")} assert state.date is not None assert state.parents == [] assert state.events == [evt_digest] - assert state.data == {"entry": obj} - - assert sdp.io.read_text(path.join(sdp.io.root, SheerkaDataProvider.HeadFile)) == last_commit + assert state.data == {"entry": {'key': {'foo => bar', ObjNoKey('a', 'b')}}} @pytest.mark.parametrize("root", [ ".sheerka", "mem://" ]) -def test_i_can_add_multiple_entries_at_once_with_dict(root): +def test_i_can_add_and_reload_an_entry(root): sdp = SheerkaDataProvider(root) - obj = {"my_key1": "value1", "my_key2": "value2"} - result = sdp.add(evt_digest, "entry", obj) - loaded = sdp.get(result.entry, result.key) - loaded_value1 = sdp.get(result.entry, "my_key1") - loaded_value2 = sdp.get(result.entry, "my_key2") + with sdp.get_transaction(evt_digest) as transaction: + transaction.add("entry1", None, "foo") + transaction.add("entry2", None, {"key": "foo", "key1": "bar"}) + transaction.add("entry3", None, {"foo", "bar"}) + transaction.add("entry4", None, ["foo", "bar"]) - assert result.obj == obj - assert result.entry == "entry" - assert result.key is None # we return None as dict may contains several entries - assert result.digest is None + loaded_entry1 = sdp.get("entry1") + loaded_entry2 = sdp.get("entry2") + loaded_entry3 = sdp.get("entry3") + loaded_entry4 = sdp.get("entry4") - assert loaded == {"my_key1": "value1", "my_key2": "value2"} - assert loaded_value1 == "value1" - assert loaded_value2 == "value2" + assert loaded_entry1 == "foo" + assert loaded_entry2 == {"key": "foo", "key1": "bar"} + assert loaded_entry3 == {"foo", "bar"} + assert loaded_entry4 == ["foo", "bar"] + + # loaded values are copies + loaded_entry2["key"] = "foo2" + assert sdp.get("entry2", "key") == "foo" + + loaded_entry3.remove("foo") + assert sdp.get("entry3") == {"foo", "bar"} + + loaded_entry4[0] = "foo2" + assert sdp.get("entry4")[0] == "foo" @pytest.mark.parametrize("root", [ ".sheerka", "mem://" ]) -def test_i_can_add_same_key_with_dict_if_allow_multiple_is_true(root): +def test_i_can_override_values(root): sdp = SheerkaDataProvider(root) - sdp.add(evt_digest, "entry", {"my_key": "my_value"}) - result = sdp.add(evt_digest, "entry", {"my_key": "my_value"}) - loaded1 = sdp.get(result.entry, result.key) + with sdp.get_transaction(evt_digest) as transaction: + transaction.add("entry", "key", {"foo => bar", ObjNoKey("a", "b")}) - result = sdp.add(evt_digest, "entry", {"my_key": "my_value2"}) - loaded2 = sdp.get(result.entry, result.key) + with sdp.get_transaction(evt_digest) as transaction: + transaction.add("entry", "key", "new_value") - assert result.entry == "entry" - assert result.key is None - assert loaded1 == {"my_key": ["my_value", "my_value"]} - assert loaded2 == {"my_key": ["my_value", "my_value", "my_value2"]} + loaded = sdp.get("entry", "key") + assert loaded == "new_value" @pytest.mark.parametrize("root", [ ".sheerka", "mem://" ]) -def test_i_cannot_add_same_key_with_dict_if_allow_multiple_is_false(root): +def test_i_can_add_an_object_and_save_it_as_a_reference(root): sdp = SheerkaDataProvider(root) + sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjNoKey))) - with pytest.raises(IndexError) as index_error: - sdp.add(evt_digest, "entry", {"my_key": "my_value"}, False) - sdp.add(evt_digest, "entry", {"my_key": "my_value2"}, False) - assert index_error.value.args[0] == "entry.my_key" + with sdp.get_transaction(evt_digest) as transaction: + transaction.add("entry", "key1", ObjNoKey("a", "b"), use_ref=True) + transaction.add("entry", "key2", [ObjNoKey("a", "b"), ObjNoKey("c", "d")], use_ref=True) + transaction.add("entry", "key3", {ObjNoKey("a", "b"), ObjNoKey("c", "d")}, use_ref=True) - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_add_object_with_different_key_if_allow_multiple_is_false(root): - sdp = SheerkaDataProvider(root) - - sdp.add(evt_digest, "entry", {"my_key": "a"}, False) - sdp.add(evt_digest, "entry", {"my_key2": "b"}, False) - - assert sdp.get("entry", "my_key") == "a" - assert sdp.get("entry", "my_key2") == "b" - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_add_obj_with_key(root): - sdp = SheerkaDataProvider(root) - obj1 = ObjWithKey("key1", "b") - obj2 = ObjSetKey("c", key="key2") - - result1 = sdp.add(evt_digest, "entry", obj1) # test when key is taken from obj.get_key() - result2 = sdp.add(evt_digest, "entry2", obj2) # test when key is taken from obj.key - last_commit = sdp.get_snapshot(SheerkaDataProvider.HeadFile) - state = sdp.load_state(last_commit) - - loaded1 = sdp.get(result1.entry, result1.key) - loaded2 = sdp.get(result2.entry, result2.key) - - assert result1.obj == obj1 - assert result1.entry == "entry" - assert result1.key == "key1" - assert result1.digest is None - - assert result2.obj == obj2 - assert result2.entry == "entry2" - assert result2.key == "key2" - assert result2.digest is None - - assert loaded1 == ObjWithKey("key1", "b") - assert loaded2 == ObjSetKey("c", key="key2") - - assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit)) - assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.HeadFile)) - - assert state.date is not None - assert len(state.parents) == 1 - assert state.events == [evt_digest] - assert state.data == {"entry": {"key1": obj1}, "entry2": {"key2": obj2}} - - assert sdp.io.read_text(path.join(sdp.io.root, SheerkaDataProvider.HeadFile)) == last_commit - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_add_objects_with_same_key_if_allow_multiple_is_true(root): - sdp = SheerkaDataProvider(root) - - sdp.add(evt_digest, "entry", ObjWithKey("my_key", "b")) - result = sdp.add(evt_digest, "entry", ObjSetKey("c", key="my_key")) - loaded1 = sdp.get(result.entry, result.key) - - result = sdp.add(evt_digest, "entry", ObjSetKey("c", key="my_key")) - sdp.add(evt_digest, "entry", ObjSetKey("c", key="my_key2")) # to prove that it does not melt everything - loaded2 = sdp.get(result.entry, result.key) - - assert loaded1 == [ObjWithKey("my_key", "b"), ObjSetKey("c", key="my_key")] - assert loaded2 == [ObjWithKey("my_key", "b"), ObjSetKey("c", key="my_key"), ObjSetKey("c", key="my_key")] - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_cannot_add_object_with_same_key_if_allow_multiple_is_false(root): - sdp = SheerkaDataProvider(root) - - with pytest.raises(IndexError) as index_error: - sdp.add(evt_digest, "entry", ObjWithKey("my_key", "b"), False) - sdp.add(evt_digest, "entry", ObjSetKey("c", key="my_key"), False) - assert index_error.value.args[0] == "entry.my_key" - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_add_obj_with_key_to_a_list(root): - sdp = SheerkaDataProvider(root) - - sdp.add(evt_digest, "entry", "foo") - sdp.add(evt_digest, "entry", "bar") # entry is now a list - sdp.add(evt_digest, "entry", ObjWithKey("a", "b")) # this entry must no be taken as a object with a key - - loaded = sdp.get("entry") - assert loaded == ["foo", "bar", ObjWithKey("a", "b")] - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_add_a_reference(root): - sdp = SheerkaDataProvider(root) - sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithDigestWithKey))) - obj1 = ObjWithDigestWithKey(1, "foo") - result1 = sdp.add(evt_digest, "entry", obj1, use_ref=True) - result3 = sdp.add(evt_digest, "entry_by_ref", SheerkaDataProviderRef(obj1.b, obj1.get_digest())) - - # another object - obj2 = ObjWithDigestWithKey(2, "bar") - sdp.add(evt_digest, "entry", obj2, use_ref=True) - sdp.add(evt_digest, "entry_by_ref", SheerkaDataProviderRef(obj2.b, obj2.get_digest())) - - assert result1.obj == obj1 - assert result1.entry == "entry" - assert result1.key == str(obj1.get_key()) - assert result1.digest == obj1.get_digest() - - assert result3.obj == SheerkaDataProviderRef(obj1.b, obj1.get_digest()) - assert result3.entry == "entry_by_ref" - assert result3.key == "foo" - assert result3.digest is None + assert sdp.get("entry", "key1") == ObjNoKey("a", "b") + assert sdp.get("entry", "key2") == [ObjNoKey("a", "b"), ObjNoKey("c", "d")] + assert sdp.get("entry", "key3") == {ObjNoKey("a", "b"), ObjNoKey("c", "d")} state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) assert state.data == { - "entry": { - "1": '##REF##:' + obj1.get_digest(), - "2": '##REF##:' + obj2.get_digest(), - }, - "entry_by_ref": { - "foo": SheerkaDataProviderRef(obj1.b, obj1.get_digest()), - "bar": SheerkaDataProviderRef(obj2.b, obj2.get_digest()) - }, + "entry": {'key1': '##REF##:8fac7e801d08361c3449c594b4261ab9c45ef47f1a08df68eb717db2b6919774', + 'key2': ['##REF##:8fac7e801d08361c3449c594b4261ab9c45ef47f1a08df68eb717db2b6919774', + '##REF##:2a07d90eefd71a1fc5fae4d4745ab969b2d9a3e7dd159da6d47ec69630b2acf2'], + 'key3': {'##REF##:2a07d90eefd71a1fc5fae4d4745ab969b2d9a3e7dd159da6d47ec69630b2acf2', + '##REF##:8fac7e801d08361c3449c594b4261ab9c45ef47f1a08df68eb717db2b6919774'}} } - # make sure that I can load back - loaded1 = sdp.get("entry_by_ref", "foo") - assert loaded1 == ObjWithDigestWithKey(1, "foo") - assert getattr(loaded1, Serializer.ORIGIN) == obj1.get_digest() - - loaded2 = sdp.get("entry_by_ref", "bar") - assert loaded2 == ObjWithDigestWithKey(2, "bar") - assert getattr(loaded2, Serializer.ORIGIN) == obj2.get_digest() - @pytest.mark.parametrize("root", [ ".sheerka", "mem://" ]) -def test_i_can_have_multiple_is_ref_to_the_same_key(root): +def test_i_can_add_an_object_as_a_reference_using_its_own_digest(root): sdp = SheerkaDataProvider(root) sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithDigestWithKey))) - ref_result1 = sdp.add(evt_digest, "entry", ObjWithDigestWithKey(1, "foo"), use_ref=True) - ref_result2 = sdp.add(evt_digest, "entry", ObjWithDigestWithKey(2, "bar"), use_ref=True) - sdp.add(evt_digest, "entry_ref", SheerkaDataProviderRef("1", ref_result1.digest)) - sdp.add(evt_digest, "entry_ref", SheerkaDataProviderRef("1", ref_result2.digest)) + with sdp.get_transaction(evt_digest) as transaction: + transaction.add("entry", "key1", ObjWithDigestWithKey("a", "b"), use_ref=True) - state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) - assert state.data == {'entry': {'1': '##REF##:1foo', '2': '##REF##:2bar'}, - 'entry_ref': {'1': [SheerkaDataProviderRef("1", ref_result1.digest), - SheerkaDataProviderRef("1", ref_result2.digest)]}, - } - - loaded = sdp.get("entry_ref", "1") - assert len(loaded) == 2 - assert loaded[0] == ObjWithDigestWithKey(1, "foo") - assert loaded[1] == ObjWithDigestWithKey(2, "bar") - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_cannot_add_obj_with_no_key_when_then_entry_has_keys(root): - sdp = SheerkaDataProvider(root) - - with pytest.raises(SheerkaDataProviderError) as error: - sdp.add(evt_digest, "entry", ObjWithKey("a", "b")) - sdp.add(evt_digest, "entry", "foo") - - assert error.value.obj == "foo" - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_add_string_using_auto_generated_key(root): - sdp = SheerkaDataProvider(root) - key_file = path.join(sdp.io.root, SheerkaDataProvider.KeysFile) - - result1 = sdp.add_with_auto_key(evt_digest, "entry1", "foo") - result2 = sdp.add_with_auto_key(evt_digest, "entry1", "bar") - result3 = sdp.add_with_auto_key(evt_digest, "entry2", "baz") - - state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) - - assert sdp.io.exists(key_file) - assert read_json_file(sdp, key_file) == {"entry1": 2, "entry2": 1} - assert state.data == {"entry1": {"1": "foo", "2": "bar"}, "entry2": {"1": "baz"}} - assert result1.obj == "foo" - assert result2.obj == "bar" - assert result3.obj == "baz" - assert result1.entry == "entry1" - assert result2.entry == "entry1" - assert result3.entry == "entry2" - assert result1.digest is None - assert result2.digest is None - assert result3.digest is None - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_cannot_add_the_same_digest_twice_in_the_same_entry(root): - """ - If get_digest() is implemented, checks for duplicates - :return: - """ - sdp = SheerkaDataProvider(root) - - with pytest.raises(SheerkaDataProviderDuplicateKeyError) as error: - sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "b")) - sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "b")) - - assert error.value.obj.get_digest() == ObjWithDigestNoKey("a", "b").get_digest() - assert error.value.key == "entry" - assert error.value.args[0] == "Duplicate object." - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_cannot_add_the_same_digest_twice_in_the_same_entry2(root): - """ - If get_digest() is implemented, checks for duplicates in list when no key - :return: - """ - sdp = SheerkaDataProvider(root) - - with pytest.raises(SheerkaDataProviderDuplicateKeyError) as error: - sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "b")) - sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "c")) - sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "b")) - - assert error.value.obj.get_digest() == ObjWithDigestNoKey("a", "b").get_digest() - assert error.value.key == "entry" - assert error.value.args[0] == "Duplicate object." - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_cannot_add_the_same_digest_twice_in_the_same_entry3(root): - """ - If get_digest() is implemented, checks for duplicates when the key is provided - :return: - """ - sdp = SheerkaDataProvider(root) - - with pytest.raises(SheerkaDataProviderDuplicateKeyError) as error: - sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b")) - sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b")) - - assert error.value.obj.get_digest() == ObjWithDigestWithKey("a", "b").get_digest() - assert error.value.key == "entry.a" - assert error.value.args[0] == "Duplicate object." - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_cannot_add_the_same_digest_twice_in_the_same_entry4(root): - """ - If get_digest() is implemented, checks for duplicates in list when the key is provided - :return: - """ - sdp = SheerkaDataProvider(root) - - with pytest.raises(SheerkaDataProviderDuplicateKeyError) as error: - sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b")) - sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "c")) - sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b")) - - assert error.value.obj.get_digest() == ObjWithDigestWithKey("a", "b").get_digest() - assert error.value.key == "entry.a" - assert error.value.args[0] == "Duplicate object." - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_get_and_set_key(root): - sdp = SheerkaDataProvider(root) - key_file = path.join(sdp.io.root, SheerkaDataProvider.KeysFile) - sdp.set_key("entry1", 1000) - - sdp.get_next_key("entry1") - sdp.get_next_key("entry1") - sdp.get_next_key("entry1") - sdp.get_next_key("entry2") - sdp.get_next_key("entry2") - - assert sdp.io.exists(key_file) - assert read_json_file(sdp, key_file) == {"entry1": 1003, "entry2": 2} - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_add_object_using_auto_generated_key(root): - sdp = SheerkaDataProvider(root) - key_file = path.join(sdp.io.root, SheerkaDataProvider.KeysFile) - - result1 = sdp.add_with_auto_key(evt_digest, "entry1", ObjNoKey("a", "b")) - result2 = sdp.add_with_auto_key(evt_digest, "entry1", ObjNoKey("a", "b")) - - state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) - - assert sdp.io.exists(key_file) - assert read_json_file(sdp, key_file) == {"entry1": 2} - assert state.data == {"entry1": {"1": ObjNoKey("a", "b"), "2": ObjNoKey("a", "b")}} - - assert result1.obj == ObjNoKey("a", "b") - assert result2.obj == ObjNoKey("a", "b") - assert result1.entry == "entry1" - assert result2.entry == "entry1" - assert result1.key == "1" - assert result2.key == "2" - assert result1.digest is None - assert result2.digest is None - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_object_key_is_updated_when_possible_using_auto_generated_key(root): - sdp = SheerkaDataProvider(root) - key_file = path.join(sdp.io.root, SheerkaDataProvider.KeysFile) - - result1 = sdp.add_with_auto_key(evt_digest, "entry1", ObjSetKey("foo")) - result2 = sdp.add_with_auto_key(evt_digest, "entry1", ObjSetKey("foo")) - - state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) - - assert sdp.io.exists(key_file) - assert read_json_file(sdp, key_file) == {"entry1": 2} - assert state.data == {"entry1": {"1": ObjSetKey("foo", "1"), "2": ObjSetKey("foo", "2")}} - - assert result1.obj == ObjSetKey("foo", "1") - assert result2.obj == ObjSetKey("foo", "2") - assert result1.entry == "entry1" - assert result2.entry == "entry1" - assert result1.key == "1" - assert result2.key == "2" - assert result1.digest is None - assert result2.digest is None - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_set_objects_with_key(root): - sdp = SheerkaDataProvider(root) - sdp.add(evt_digest, "entry", ObjWithKey(1, "foo")) - result = sdp.set(evt_digest, "entry", ObjWithKey(2, "foo")) - - state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) - assert state.data == {"entry": {"2": ObjWithKey(2, "foo")}} - assert result.entry == "entry" - assert result.key == "2" - assert result.digest is None - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_set_objects_with_no_key(root): - sdp = SheerkaDataProvider(root) - sdp.add(evt_digest, "entry", ObjNoKey(1, "foo")) - result = sdp.set(evt_digest, "entry", ObjNoKey(2, "foo")) - - state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) - assert state.data == {"entry": ObjNoKey(2, "foo")} - assert result.entry == "entry" - assert result.key is None - assert result.digest is None - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_set_from_list_to_dict(root): - sdp = SheerkaDataProvider(root) - sdp.set(evt_digest, "entry", [ObjNoKey(1, "foo"), ObjNoKey(2, "foo")]) - result = sdp.set(evt_digest, "entry", {"1": ObjNoKey(1, "foo"), "2": ObjNoKey(2, "foo")}) - - state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) - assert state.data == {"entry": {"1": ObjNoKey(1, "foo"), "2": ObjNoKey(2, "foo")}} - assert result.entry == "entry" - assert result.key is None - assert result.digest is None - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_set_using_reference(root): - sdp = SheerkaDataProvider(root) - sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithKey))) - sdp.add(evt_digest, "entry", ObjWithKey(1, "foo")) - result = sdp.set(evt_digest, "entry", ObjWithKey(2, "foo"), use_ref=True) - - state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) - assert state.data == {"entry": {"2": '##REF##:43f07065c7bad051cdd726bdfa4de7f8d754c31486c65ddb31d6b6548dec3db9'}} - - assert result.obj == ObjWithKey(2, "foo") - assert result.entry == "entry" - assert result.key == "2" - assert result.digest == "43f07065c7bad051cdd726bdfa4de7f8d754c31486c65ddb31d6b6548dec3db9" - - assert sdp.io.exists(sdp.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, - "43f07065c7bad051cdd726bdfa4de7f8d754c31486c65ddb31d6b6548dec3db9")) - - # sanity check, make sure that I can load back - loaded = sdp.get(result.entry, result.key) - assert loaded == ObjWithKey(2, "foo") - assert getattr(loaded, Serializer.ORIGIN) == "43f07065c7bad051cdd726bdfa4de7f8d754c31486c65ddb31d6b6548dec3db9" - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_set_a_reference(root): - sdp = SheerkaDataProvider(root) - sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithDigestWithKey))) - obj = ObjWithDigestWithKey(1, "foo") - sdp.add(evt_digest, "entry", obj, use_ref=True) - sdp.set(evt_digest, "entry_by_value", {obj.b: obj.get_digest()}, is_ref=True) + assert sdp.get("entry", "key1") == ObjWithDigestWithKey("a", "b") state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) assert state.data == { - "entry": {"1": '##REF##:' + obj.get_digest()}, - "entry_by_value": {"foo": '##REF##:' + obj.get_digest()}, + "entry": {'key1': '##REF##:ab'} } - # sanity check, make sure that I can load back - loaded = sdp.get("entry_by_value", "foo") - assert loaded == ObjWithDigestWithKey(1, "foo") - assert getattr(loaded, Serializer.ORIGIN) == obj.get_digest() - - -def test_i_cannot_set_using_use_ref_and_is_ref(): - sdp = SheerkaDataProvider("mem://") - - with pytest.raises(SheerkaDataProviderError): - sdp.set(evt_digest, "entry", ObjWithDigestWithKey("a", "b"), use_ref=True, is_ref=True) - - -def test_i_cannot_set_using_is_ref_if_obj_is_not_a_dictionary(): - sdp = SheerkaDataProvider("mem://") - - with pytest.raises(SheerkaDataProviderError): - sdp.set(evt_digest, "entry", ObjWithDigestWithKey("a", "b"), is_ref=True) - @pytest.mark.parametrize("root", [ ".sheerka", "mem://" ]) -def test_i_can_add_an_object_with_a_key_as_a_reference(root): +def test_i_can_remove_elements(root): sdp = SheerkaDataProvider(root) - obj = ObjDumpJson("my_key", "value1") - obj_serializer = JsonSerializer(core.utils.get_full_qualified_name(obj)) - sdp.serializer.register(obj_serializer) - result = sdp.add(evt_digest, "entry", obj, use_ref=True) - state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) - digest = state.data["entry"]["my_key"][len(SheerkaDataProvider.REF_PREFIX):] + with sdp.get_transaction(evt_digest) as transaction: + transaction.add("entry", "key", "value") + transaction.add("entry", "key2", "value2") - assert result.obj == obj - assert result.entry == "entry" - assert result.key == obj.key - assert result.digest == obj.get_digest() - assert digest == result.digest - assert state.data == {'entry': {'my_key': f"{SheerkaDataProvider.REF_PREFIX}{digest}"}} + with sdp.get_transaction(evt_digest) as transaction: + transaction.remove("entry", "key") - loaded = sdp.load_obj(digest) - assert loaded == obj - assert getattr(loaded, Serializer.ORIGIN) == digest - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_add_a_dictionary_as_a_reference(root): - sdp = SheerkaDataProvider(root) - obj = {"my_key": "value1"} - - # No need to register a serializer for dictionaries - - result = sdp.add(evt_digest, "entry", obj, use_ref=True) - state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) - digest = state.data["entry"][len(SheerkaDataProvider.REF_PREFIX):] - - assert result.obj == obj - assert result.entry == "entry" - assert result.key is None # we return None as dict may contains several entries - assert result.digest == "1790cae3f354ecb6b419faaa2ee2c374ff33efb8cddafda9960924036ac04c1f" # a digest is created - assert digest == result.digest - - assert state.data == {'entry': f"{SheerkaDataProvider.REF_PREFIX}{digest}"} - - loaded = sdp.load_obj(digest) - assert loaded["my_key"] == obj["my_key"] - assert loaded[Serializer.ORIGIN] == digest - assert len(loaded) == 2 - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_add_an_object_with_no_builtin_digest_as_a_reference(root): - sdp = SheerkaDataProvider(root) - obj = ObjDumpJsonNoDigest("a", "b") - - obj_serializer = JsonSerializer(core.utils.get_full_qualified_name(obj)) - sdp.serializer.register(obj_serializer) - - result = sdp.add(evt_digest, "entry", obj, use_ref=True) - state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) - digest = state.data["entry"][obj.get_key()][len(SheerkaDataProvider.REF_PREFIX):] - - assert result.obj == obj - assert result.entry == "entry" - assert result.key == obj.get_key() - assert result.digest is not None - assert digest == result.digest - - assert state.data == {'entry': {obj.key: f"{SheerkaDataProvider.REF_PREFIX}{result.digest}"}} - - loaded = sdp.load_obj(digest) - assert getattr(loaded, Serializer.ORIGIN) == digest - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_add_unique(root): - sdp = SheerkaDataProvider(root) - result = sdp.add_unique(evt_digest, "entry", ObjNoKey(1, "foo")) - assert result == SheerkaDataProviderResult(ObjNoKey(1, "foo"), "entry", None, None, False) - - result = sdp.add_unique(evt_digest, "entry", ObjNoKey(1, "foo")) - assert result == SheerkaDataProviderResult(ObjNoKey(1, "foo"), "entry", None, None, True) - - result = sdp.add_unique(evt_digest, "entry", ObjNoKey(2, "bar")) - assert result == SheerkaDataProviderResult(ObjNoKey(2, "bar"), "entry", None, None, False) - - result = sdp.add_unique(evt_digest, "entry", ObjNoKey(2, "bar")) - assert result == SheerkaDataProviderResult(ObjNoKey(2, "bar"), "entry", None, None, True) + assert sdp.get("entry", "key") is None state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) - assert state.data == {"entry": {ObjNoKey(1, "foo"), ObjNoKey(2, "bar")}} + assert state.data == { + "entry": {'key2': 'value2'} + } @pytest.mark.parametrize("root", [ @@ -1141,1174 +427,47 @@ def test_i_can_add_unique(root): def test_i_can_keep_state_history(root): sdp = SheerkaDataProvider(root) - event1 = Event("cmd add 'foo => bar'") - event_digest1 = sdp.save_event(event1) - obj1 = "foo => bar" - sdp.add(event_digest1, "entry1", obj1) - state_digest1 = sdp.get_snapshot(SheerkaDataProvider.HeadFile) + with sdp.get_transaction(Event("first event")) as transaction: + transaction.add("entry", "key", "value") + state_digest1 = transaction.snapshot - event2 = Event("cmd add 'foo => baz'") - event_digest2 = sdp.save_event(event2) - obj2 = "foo => baz" - sdp.add(event_digest2, "entry2", obj2) - state_digest2 = sdp.get_snapshot(SheerkaDataProvider.HeadFile) + with sdp.get_transaction(Event("second event")) as transaction: + transaction.add("entry", "key2", "value2") + state_digest2 = transaction.snapshot - state2 = sdp.load_state(state_digest2) + with sdp.get_transaction(Event("third event")) as transaction: + transaction.add("entry", "key2", "value2") + state_digest3 = transaction.snapshot - assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.EventFolder, event_digest1[0:24], event_digest1)) - assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.StateFolder, state_digest1[0:24], state_digest1)) - assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.EventFolder, event_digest2[0:24], event_digest2)) - assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.StateFolder, state_digest2[0:24], state_digest2)) - assert state2.date is not None - assert state2.parents == [state_digest1] - assert state2.events == [event_digest2] - assert state2.data == {"entry1": "foo => bar", "entry2": "foo => baz"} + state = sdp.load_state(state_digest3) + assert state.parents == [state_digest2] + state = sdp.load_state(state_digest2) + assert state.parents == [state_digest1] -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_list_elements_when_there_is_nothing_to_list(root): - sdp = SheerkaDataProvider(root) + state = sdp.load_state(state_digest1) + assert state.parents == [] - result = sdp.list("entry") - assert list(result) == [] - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_list_when_no_key(root): - sdp = SheerkaDataProvider(root) - sdp.serializer.register(PickleSerializer(lambda obj: isinstance(obj, str))) - - sdp.add(evt_digest, "entry1", "foo") - sdp.add(evt_digest, "entry1", "bar") - sdp.add(evt_digest, "entry1", "baz", use_ref=True) - sdp.add(evt_digest, "entry2", "xyz") - - result = sdp.list("entry1") - - assert list(result) == ["foo", "bar", "baz"] - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_list_when_key(root): - sdp = SheerkaDataProvider(root) - sdp.serializer.register(PickleSerializer(lambda obj: isinstance(obj, ObjWithKey))) - - sdp.add(evt_digest, "entry1", {"1": "foo"}) - sdp.add(evt_digest, "entry1", {"2": "bar"}) - sdp.add(evt_digest, "entry1", ObjWithKey("3", "value"), use_ref=True) - sdp.add(evt_digest, "entry2", {"4": "xxx"}) - - result = sdp.list("entry1") - - assert list(result) == ["foo", "bar", ObjWithKey("3", "value")] - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_list_when_one_element(root): - sdp = SheerkaDataProvider(root) - sdp.add(evt_digest, "entry1", "foo") - sdp.add(evt_digest, "entry2", "baz") - - result = sdp.list("entry1") - - assert list(result) == ["foo"] - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_list_when_multiple_entries_under_the_same_key(root): - sdp = SheerkaDataProvider(root) - sdp.add(evt_digest, "entry", ObjWithKey("a", "b")) - sdp.add(evt_digest, "entry", ObjWithKey("a", "c")) - - result = sdp.list("entry") - assert list(result) == [[ObjWithKey("a", "b"), ObjWithKey("a", "c")]] - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_list_when_multiple_entries_under_the_same_key_when_reference(root): - sdp = SheerkaDataProvider(root) - sdp.serializer.register(PickleSerializer(lambda obj: isinstance(obj, ObjWithKey))) - - sdp.add(evt_digest, "entry", ObjWithKey("a", "b"), use_ref=True) - sdp.add(evt_digest, "entry", ObjWithKey("a", "c"), use_ref=True) - - result = sdp.list("entry") - assert list(result) == [[ObjWithKey("a", "b"), ObjWithKey("a", "c")]] - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_list_when_multiple_entries_under_the_same_entry(root): - sdp = SheerkaDataProvider(root) - sdp.add(evt_digest, "entry", ObjNoKey("a", "b")) - sdp.add(evt_digest, "entry", ObjNoKey("a", "c")) - - result = sdp.list("entry") - assert list(result) == [ObjNoKey("a", "b"), ObjNoKey("a", "c")] - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_list_when_multiple_entries_under_the_same_entry_when_reference(root): - sdp = SheerkaDataProvider(root) - sdp.serializer.register(PickleSerializer(lambda obj: isinstance(obj, ObjNoKey))) - - sdp.add(evt_digest, "entry", ObjNoKey("a", "b"), use_ref=True) - sdp.add(evt_digest, "entry", ObjNoKey("a", "c"), use_ref=True) - - result = sdp.list("entry") - assert list(result) == [ObjNoKey("a", "b"), ObjNoKey("a", "c")] - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_filter_on_key_for_dict(root): - sdp = SheerkaDataProvider(root) - sdp.add(evt_digest, "entry1", {"1": "foo"}) - sdp.add(evt_digest, "entry1", {"2": "bar"}) - - result = sdp.list("entry1", lambda k, o: k == "1") - - assert list(result) == ["foo"] - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_filter_on_key_for_objects(root): - sdp = SheerkaDataProvider(root) - sdp.add(evt_digest, "entry1", ObjWithKey("a1", "b1")) - sdp.add(evt_digest, "entry1", ObjWithKey("a2", "b2")) - - result = sdp.list("entry1", lambda k, o: k == "a1") - - assert list(result) == [ObjWithKey("a1", "b1")] - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_filter_on_attribute_for_dict(root): - sdp = SheerkaDataProvider(root) - sdp.add(evt_digest, "entry1", {"1": {"a": "a1", "b": "b1"}}) - sdp.add(evt_digest, "entry1", {"2": {"a": "a2", "b": "b2"}}) - - result = sdp.list("entry1", lambda k, o: o["a"] == "a2") - - assert list(result) == [{"a": "a2", "b": "b2"}] - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_filter_on_attribute_for_object(root): - sdp = SheerkaDataProvider(root) - sdp.add(evt_digest, "entry1", ObjWithKey("a1", "b1")) - sdp.add(evt_digest, "entry1", ObjWithKey("a2", "b2")) - - result = sdp.list("entry1", lambda k, o: o.b == "b2") - - assert list(result) == [ObjWithKey("a2", "b2")] - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_filter_a_list(root): - sdp = SheerkaDataProvider(root) - sdp.add(evt_digest, "entry1", "foo") - sdp.add(evt_digest, "entry1", "bar") - - result = sdp.list("entry1", lambda o: o == "bar") - - assert list(result) == ["bar"] - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_filter_a_list_of_object(root): - sdp = SheerkaDataProvider(root) - sdp.add(evt_digest, "entry1", ObjNoKey("a1", "b1")) - sdp.add(evt_digest, "entry1", ObjNoKey("a2", "b2")) - - result = sdp.list("entry1", lambda o: o.b == "b1") - - assert list(result) == [ObjNoKey("a1", "b1")] - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_remove_all_elements(root): - sdp = SheerkaDataProvider(root) - sdp.add(evt_digest, "entry1", "foo") - sdp.add(evt_digest, "entry1", "bar") - - state_digest = sdp.remove(evt_digest, "entry1") - result = sdp.list("entry1") - - assert sdp.io.read_text(path.join(sdp.io.root, SheerkaDataProvider.HeadFile)) == state_digest - assert list(result) == [] - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_remove_a_element(root): - sdp = SheerkaDataProvider(root) - sdp.add(evt_digest, "entry1", "foo") - sdp.add(evt_digest, "entry1", "bar") - - sdp.remove(evt_digest, "entry1", lambda o: o == "foo") - result = sdp.list("entry1") - - assert list(result) == ["bar"] - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_remove_dict_by_key(root): - sdp = SheerkaDataProvider(root) - sdp.add(evt_digest, "entry1", {"1": ObjNoKey("a1", "b1")}) - sdp.add(evt_digest, "entry1", {"2": ObjNoKey("a2", "b2")}) - - sdp.remove(evt_digest, "entry1", lambda k, o: k == "2") - result = sdp.list("entry1") - - assert list(result) == [ObjNoKey("a1", "b1")] - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_remove_when_only_one_element(root): - sdp = SheerkaDataProvider(root) - sdp.add(evt_digest, "entry1", "foo") - - sdp.remove(evt_digest, "entry1", lambda o: o == "foo") - result = sdp.list("entry1") - - assert list(result) == [] - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_cannot_remove_if_entry_does_not_exist(root): - sdp = SheerkaDataProvider(root) - with pytest.raises(IndexError) as e: - sdp.remove(evt_digest, "entry", silent_remove=False) - assert str(e.value) == "entry" - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_no_exception_is_raise_when_remove_in_silent_mode(root): - sdp = SheerkaDataProvider(root) - sdp.remove(evt_digest, "entry", silent_remove=True) # default - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_cannot_modify_an_entry_without_a_key(root): - sdp = SheerkaDataProvider(root) - - with pytest.raises(SheerkaDataProviderError) as error: - sdp.modify(evt_digest, "entry", None, "baz") - - assert error.value.args[0] == "Key is mandatory." - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_modify_dict_with_a_key(root): - sdp = SheerkaDataProvider(root) - sdp.add(evt_digest, "entry", {"key1": "foo"}) - sdp.add(evt_digest, "entry", {"key2": "bar"}) - - result = sdp.modify(evt_digest, "entry", "key1", "baz") - - state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) - assert state.data == {"entry": {"key1": "baz", "key2": "bar"}} - assert result.obj == "baz" - assert result.entry == "entry" - assert result.key == "key1" - assert result.digest is None - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_modify_an_object_with_a_key(root): - sdp = SheerkaDataProvider(root) - sdp.add(evt_digest, "entry", ObjWithKey("key1", "foo")) - sdp.add(evt_digest, "entry", ObjWithKey("key2", "bar")) - - result = sdp.modify(evt_digest, "entry", "key1", ObjWithKey("key1", "baz")) - - state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) - - assert state.data == {"entry": {"key1": ObjWithKey("key1", "baz"), "key2": ObjWithKey("key2", "bar")}} - assert result.obj == ObjWithKey("key1", "baz") - assert result.entry == "entry" - assert result.key == "key1" - assert result.digest is None - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_modify_an_object_while_changing_the_key(root): - sdp = SheerkaDataProvider(root) - sdp.add(evt_digest, "entry", ObjWithKey("key1", "foo")) - sdp.add(evt_digest, "entry", ObjWithKey("key2", "bar")) - - result = sdp.modify(evt_digest, "entry", "key1", ObjWithKey("key3", "baz")) - - state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) - assert state.data == {"entry": {"key2": ObjWithKey("key2", "bar"), "key3": ObjWithKey("key3", "baz")}} - assert result.obj == ObjWithKey("key3", "baz") - assert result.entry == "entry" - assert result.key == "key3" - assert result.digest is None - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_key(root): - sdp = SheerkaDataProvider(root) - sdp.add(evt_digest, "entry", ObjWithKey("key1", "foo")) - sdp.add(evt_digest, "entry", ObjWithKey("key2", "bar")) - - result = sdp.modify(evt_digest, "entry", "key2", ObjWithKey("key1", "bar")) - - state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) - assert state.data == {"entry": {"key1": [ObjWithKey("key1", "foo"), ObjWithKey("key1", "bar")]}} - assert result.obj == ObjWithKey("key1", "bar") - assert result.entry == "entry" - assert result.key == "key1" - assert result.digest is None - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_when_list(root): - """ - In this example, the item to modify is within a list, and its key has changed - and in the new key, there is already a list - :return: - """ - sdp = SheerkaDataProvider(root) - - sdp.add(evt_digest, "entry", ObjDumpJson("key1", "value11")) - sdp.add(evt_digest, "entry", ObjDumpJson("key1", "value12")) - sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value21")) - sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value22")) - - new_value = ObjDumpJson("key1", "value13") - setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key2", "value21").get_digest()) - result = sdp.modify(evt_digest, "entry", "key2", new_value) - - state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) - assert state.data == {"entry": { - "key1": [ObjDumpJson("key1", "value11"), ObjDumpJson("key1", "value12"), ObjDumpJson("key1", "value13")], - "key2": [ObjDumpJson("key2", "value22")] - }} - assert result.obj == new_value - assert result.entry == "entry" - assert result.key == "key1" - assert result.digest is None - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_when_nothing(root): - """ - In this example, the item to modify is within a list, and its key has changed - and in the new key, there is nothing (the new key does not exist) - :return: - """ - sdp = SheerkaDataProvider(root) - sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson))) - - sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value21")) - sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value22")) - - new_value = ObjDumpJson("key1", "value13") - setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key2", "value21").get_digest()) - result = sdp.modify(evt_digest, "entry", "key2", new_value) - - state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) - assert state.data == {"entry": { - "key1": ObjDumpJson("key1", "value13"), - "key2": [ObjDumpJson("key2", "value22")] - }} - assert result.obj == new_value - assert result.entry == "entry" - assert result.key == "key1" - assert result.digest is None - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_when_one_item(root): - """ - In this example, the item to modify is within a list, and its key has changed - and in the new key, there is only one element - :return: - """ - sdp = SheerkaDataProvider(root) - sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson))) - - sdp.add(evt_digest, "entry", ObjDumpJson("key1", "value11")) - sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value21")) - sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value22")) - - new_value = ObjDumpJson("key1", "value13") - setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key2", "value21").get_digest()) - result = sdp.modify(evt_digest, "entry", "key2", new_value) - - state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) - assert state.data == {"entry": { - "key1": [ObjDumpJson("key1", "value11"), ObjDumpJson("key1", "value13")], - "key2": [ObjDumpJson("key2", "value22")] - }} - assert result.obj == new_value - assert result.entry == "entry" - assert result.key == "key1" - assert result.digest is None - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_modify_a_object_saved_by_ref(root): - sdp = SheerkaDataProvider(root) - sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithKey))) - sdp.add(evt_digest, "entry", ObjWithKey("key1", "foo")) - sdp.add(evt_digest, "entry", ObjWithKey("key2", "bar"), use_ref=True) - - result = sdp.modify(evt_digest, "entry", "key2", ObjWithKey("key2", "baz")) - state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) - assert state.data == {"entry": { - "key1": ObjWithKey("key1", "foo"), - "key2": "##REF##:041d3cca905b51bc2c66251e73e56b836aae7b9435ee3d7eb05d44bb67ff575e"}} - assert result.obj == ObjWithKey("key2", "baz") - assert result.entry == "entry" - assert result.key == "key2" - assert result.digest == "041d3cca905b51bc2c66251e73e56b836aae7b9435ee3d7eb05d44bb67ff575e" - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_modify_an_object_saved_by_ref_in_a_list(root): - sdp = SheerkaDataProvider(root) - sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJsonNoDigest))) - - sdp.add(evt_digest, "entry", ObjDumpJsonNoDigest("key1", "value11"), use_ref=True) - sdp.add(evt_digest, "entry", ObjDumpJsonNoDigest("key1", "value12"), use_ref=True) - result = sdp.add(evt_digest, "entry", ObjDumpJsonNoDigest("key2", "value21"), use_ref=True) - sdp.add(evt_digest, "entry", ObjDumpJsonNoDigest("key2", "value22"), use_ref=True) - - new_value = ObjDumpJsonNoDigest("key1", "value13") - setattr(new_value, Serializer.ORIGIN, result.digest) - result = sdp.modify(evt_digest, "entry", "key2", new_value) - - state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) - assert state.data == {"entry": { - 'key1': ['##REF##:f80a0c0aceb1a7a3d238c0cff2d86d6bd3a62e0c1a65c5b505f43b10c4604bd8', - '##REF##:239a8238d188c37afa10b1bcc312ca8a0e78f6e75d688ca65d08e16717ff68b0', - '##REF##:9d0a2bf9d4081de0b14837ea46bc7a1cfb6b7562f7ae86255ea9bd0ac53a6437'], - 'key2': ['##REF##:df8a38b07f469f2ff8001ea6a70f77f4f9ce85d69c530091fcaf4b380f1500d3'] - }} - assert result.obj == new_value - assert result.entry == "entry" - assert result.key == "key1" - assert result.digest is not None - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_modify_a_data_provider_ref(root): - # first, create a valid entry - sdp = SheerkaDataProvider(root) - sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithDigestWithKey))) - obj = ObjWithDigestWithKey("1", "foo") - sdp.add(evt_digest, "entry", obj, use_ref=True) - sdp.add(evt_digest, "entry_ref", SheerkaDataProviderRef(obj.b, obj.get_digest())) - - state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) - assert state.data == { - "entry": {"1": "##REF##:1foo"}, - "entry_ref": {"foo": SheerkaDataProviderRef(obj.b, obj.get_digest())}} - - # modify this entry - obj_new = ObjWithDigestWithKey("1", "bar") - sdp.modify(evt_digest, "entry", obj_new.a, obj_new) - result = sdp.modify(evt_digest, "entry_ref", "foo", SheerkaDataProviderRef(obj.b, obj_new.get_digest())) - - state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) - assert state.data == { - "entry": {"1": "##REF##:1bar"}, - "entry_ref": {"foo": SheerkaDataProviderRef(obj.b, obj_new.get_digest())}} - - assert result.obj == SheerkaDataProviderRef(obj.b, obj_new.get_digest()) - assert result.entry == "entry_ref" - assert result.key == "foo" - assert result.digest is None # digest is not set as what is saved (the digest) is not saved by ref - - # sanity check, I can load the modified entry - loaded = sdp.get("entry_ref", "foo") - assert loaded == ObjWithDigestWithKey("1", "bar") - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_modify_is_ref_when_in_list(root): - sdp = SheerkaDataProvider(root) - sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithDigestWithKey))) - ref_result1 = sdp.add(evt_digest, "entry", ObjWithDigestWithKey(1, "foo"), use_ref=True) - ref_result2 = sdp.add(evt_digest, "entry", ObjWithDigestWithKey(2, "bar"), use_ref=True) - - sdp.add(evt_digest, "entry_ref", SheerkaDataProviderRef("1", ref_result1.digest)) - sdp.add(evt_digest, "entry_ref", SheerkaDataProviderRef("1", ref_result2.digest)) - - ref_result3 = sdp.add(evt_digest, "entry", ObjWithDigestWithKey(3, "baz"), use_ref=True) - - result = sdp.modify( - evt_digest, - "entry_ref", - "1", - SheerkaDataProviderRef("1", ref_result3.digest, ref_result2.digest)) - - state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) - assert state.data == {'entry': {'1': '##REF##:1foo', '2': '##REF##:2bar', '3': '##REF##:3baz'}, - 'entry_ref': {'1': [ - SheerkaDataProviderRef("1", ref_result1.digest), - SheerkaDataProviderRef("1", ref_result3.digest, ref_result2.digest)]}} - - loaded = sdp.get("entry_ref", "1") - assert len(loaded) == 2 - assert loaded[0] == ObjWithDigestWithKey(1, "foo") - assert loaded[1] == ObjWithDigestWithKey(3, "baz") - - assert result.obj == SheerkaDataProviderRef("1", ref_result3.digest, ref_result2.digest) - assert result.entry == "entry_ref" - assert result.key == "1" - assert result.digest is None # digest is not set as what is saved (the digest) is not saved by ref - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_cannot_modify_an_entry_that_does_not_exist(root): - sdp = SheerkaDataProvider(root) - - with pytest.raises(IndexError) as e: - sdp.modify(evt_digest, "entry", "key", "foo") - - assert str(e.value) == "entry" - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_cannot_modify_a_key_that_does_not_exist(root): - sdp = SheerkaDataProvider(root) - sdp.add(evt_digest, "entry1", {"1": "foo"}) - - with pytest.raises(IndexError) as e: - sdp.modify(evt_digest, "entry1", "2", "bar") - assert str(e.value) == "entry1.2" - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_cannot_modify_a_list_when_origin_is_unknown(root): - sdp = SheerkaDataProvider(root) - - sdp.add(evt_digest, "entry", ObjWithKey("key", "value1")) - sdp.add(evt_digest, "entry", ObjWithKey("key", "value2")) # same they - - state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) - - with pytest.raises(SheerkaDataProviderError) as error: - sdp.modify(evt_digest, "entry", "key", ObjWithKey("key", "value2")) - - assert error.value.obj == ObjWithKey("key", "value2") - assert error.value.args[0] == "Multiple entries under 'entry.key'" - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_modify_a_list_when_the_origin_is_known(root): - sdp = SheerkaDataProvider(root) - - sdp.add(evt_digest, "entry", ObjDumpJson("key", "value1")) - sdp.add(evt_digest, "entry", ObjDumpJson("key", "value2")) # same they - - new_value = ObjDumpJson("key", "value3") - setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key", "value1").get_digest()) - - sdp.modify(evt_digest, "entry", "key", new_value) - - state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) - assert state.data == {"entry": {"key": [ObjDumpJson("key", "value3"), ObjDumpJson("key", "value2")]}} - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_modify_a_list_when_the_origin_is_known_2(root): - """ - This time, we check that the origin is automatically set when the object was saved as a reference - We also check that all objects are still persisted as reference - :return: - """ - sdp = SheerkaDataProvider(root) - sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson))) - - sdp.add(evt_digest, "entry", ObjDumpJson("key", "value1"), use_ref=True) - sdp.add(evt_digest, "entry", ObjDumpJson("key", "value2"), use_ref=True) # same they - - objs = sdp.get("entry", "key") # origin is automatically set to the loaded objects - objs[0].value = "value3" - - sdp.modify(evt_digest, "entry", "key", objs[0]) - - state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) - assert state.data == {"entry": {"key": [ - "##REF##:621771a3af6a331e9abb3a63fb25e0cac4b13df0b292dfa30db6bd89031bfad0", - "##REF##:5fe085e8366d35c5f04a18b2d3dada376128b246e07c66de5872830b00f5f517"]}} - - # checks that all objects are (still) persisted - sdp.io.exists( - sdp.io.get_obj_path(sdp.ObjectsFolder, "621771a3af6a331e9abb3a63fb25e0cac4b13df0b292dfa30db6bd89031bfad0")) - sdp.io.exists( - sdp.io.get_obj_path(sdp.ObjectsFolder, "5fe085e8366d35c5f04a18b2d3dada376128b246e07c66de5872830b00f5f517")) - sdp.io.exists( - sdp.io.get_obj_path(sdp.ObjectsFolder, "1aac9e0d5c74c3bb989fd0f9def792bba36c5595d32f61be7cbb1a38dcf75327")) - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_get_the_entire_entry(root): - sdp = SheerkaDataProvider(root) - sdp.add(evt_digest, "entry1", "foo") - sdp.add(evt_digest, "entry1", "bar") - - result = sdp.get("entry1") - result_safe = sdp.get_safe("entry1") - - assert result == ["foo", "bar"] - assert result_safe == ["foo", "bar"] - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_get_an_entry_with_on_object(root): - sdp = SheerkaDataProvider(root) - sdp.add(evt_digest, "entry1", "foo") - - result = sdp.get("entry1") - result_safe = sdp.get_safe("entry1") - - assert result == "foo" - assert result_safe == "foo" - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_get_an_entry_by_key(root): - sdp = SheerkaDataProvider(root) - sdp.add(evt_digest, "entry1", {"1": "foo"}) - sdp.add(evt_digest, "entry1", {"2": "bar"}) - - result = sdp.get("entry1", "2") - result_safe = sdp.get_safe("entry1", "2") - - assert result == "bar" - assert result_safe == "bar" - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_get_object_saved_by_reference(root): - sdp = SheerkaDataProvider(root) - obj = ObjDumpJson("my_key", "value1") - sdp.serializer.register(JsonSerializer(core.utils.get_full_qualified_name(obj))) - - result = sdp.add(evt_digest, "entry", obj, use_ref=True) - loaded = sdp.get(result.entry, result.key) - - assert loaded == obj - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_get_objects_from_list_when_saved_by_reference(root): - sdp = SheerkaDataProvider(root) - sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson))) - - sdp.add(evt_digest, "entry", ObjDumpJson("key", "value1"), use_ref=True) - sdp.add(evt_digest, "entry", ObjDumpJson("key", "value2"), use_ref=True) # same they - - objs = sdp.get("entry", "key") - - assert objs[0] == ObjDumpJson("key", "value1") - assert objs[1] == ObjDumpJson("key", "value2") - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_cannot_get_an_entry_that_does_not_exist(root): - sdp = SheerkaDataProvider(root) - - assert sdp.get_safe("entry") is None - with pytest.raises(IndexError) as e: - sdp.get("entry") - assert str(e.value) == "entry" - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_cannot_get_a_key_that_does_not_exist(root): - sdp = SheerkaDataProvider(root) - sdp.add(evt_digest, "entry1", {"1": "foo"}) - - assert sdp.get_safe("entry1", "2") is None - with pytest.raises(IndexError) as e: - sdp.get("entry1", "2") - assert str(e.value) == "entry1.2" - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_save_and_retrieve_cache(root): - sdp = SheerkaDataProvider(root) - txt = "foo bar baz foo bar baz foo bar baz" - key = "key_to_use" - category = "cache_category" - - assert not sdp.in_cache(category, key) - digest = sdp.add_to_cache(category, key, txt) - assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.CacheFolder, digest[0:24], digest)) - assert sdp.in_cache(category, key) - - from_cache = sdp.load_from_cache(category, key) - assert from_cache == txt - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_cache_is_not_updated_by_default(root): - sdp = SheerkaDataProvider(root) - txt = "foo bar baz foo bar baz foo bar baz" - txt2 = "foo foo foo foo foo foo foo foo foo" - key = "key_to_use" - category = "cache_category" - - sdp.add_to_cache(category, key, txt) - sdp.add_to_cache(category, key, txt2) - - from_cache = sdp.load_from_cache(category, key) - assert from_cache == txt - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_update_cache(root): - sdp = SheerkaDataProvider(root) - txt = "foo bar baz foo bar baz foo bar baz" - txt2 = "foo foo foo foo foo foo foo foo foo" - key = "key_to_use" - category = "cache_category" - - sdp.add_to_cache(category, key, txt) - sdp.add_to_cache(category, key, txt2, update=True) - - from_cache = sdp.load_from_cache(category, key) - assert from_cache == txt2 - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_remove_from_cache(root): - sdp = SheerkaDataProvider(root) - txt = "foo bar baz foo bar baz foo bar baz" - key = "key_to_use" - category = "cache_category" - - sdp.add_to_cache(category, key, txt) - digest = sdp.remove_from_cache(category, key) - assert not sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.CacheFolder, digest[0:24], digest)) - assert not sdp.in_cache(category, key) - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_test_than_an_entry_exists(root): - sdp = SheerkaDataProvider(root) - - assert not sdp.exists("entry") - sdp.add(evt_digest, "entry", "value") - assert sdp.exists("entry") - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_test_if_a_key_exists(root): - sdp = SheerkaDataProvider(root) - obj = ObjWithDigestWithKey("key", "value") - - assert not sdp.exists("entry") - assert not sdp.exists("entry", obj.get_key()) - - sdp.add(evt_digest, "entry", obj) - assert not sdp.exists("entry", "wrong_key") - assert sdp.exists("entry", obj.get_key()) - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_test_that_the_object_exists(root): - sdp = SheerkaDataProvider(root) - obj = ObjWithDigestWithKey("key", "value") - - assert not sdp.exists("entry") - assert not sdp.exists("entry", obj.get_key()) - assert not sdp.exists("entry", obj.get_key(), obj.get_digest()) - - # test for a single item under the key - sdp.add(evt_digest, "entry", obj) - assert not sdp.exists("entry", obj.get_key(), "wrong_digest") - assert sdp.exists("entry", obj.get_key(), obj.get_digest()) - - # test for a list item under the key - sdp.add(evt_digest, "entry2", ObjWithDigestWithKey("key", "value2")) - assert not sdp.exists("entry2", obj.get_key(), obj.get_digest()) - - sdp.add(evt_digest, "entry2", ObjWithDigestWithKey("key", "value3")) - assert not sdp.exists("entry2", obj.get_key(), obj.get_digest()) - - sdp.add(evt_digest, "entry2", obj) - assert sdp.exists("entry2", obj.get_key(), obj.get_digest()) - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_test_than_the_object_exists_when_using_references(root): - sdp = SheerkaDataProvider(root) - sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithDigestWithKey))) - obj = ObjWithDigestWithKey("key", "value") - - assert not sdp.exists("entry") - assert not sdp.exists("entry", obj.get_key()) - assert not sdp.exists("entry", obj.get_key(), obj.get_digest()) - - # test for a single item under the key - sdp.add(evt_digest, "entry", obj, use_ref=True) - assert not sdp.exists("entry", obj.get_key(), "wrong_digest") - assert sdp.exists("entry", obj.get_key(), obj.get_digest()) - - # test for a list item under the key - sdp.add(evt_digest, "entry2", ObjWithDigestWithKey("key", "value2"), use_ref=True) - assert not sdp.exists("entry2", obj.get_key(), obj.get_digest()) - - sdp.add(evt_digest, "entry2", ObjWithDigestWithKey("key", "value3"), use_ref=True) - assert not sdp.exists("entry2", obj.get_key(), obj.get_digest()) - - sdp.add(evt_digest, "entry2", obj, use_ref=True) - assert sdp.exists("entry2", obj.get_key(), obj.get_digest()) - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_save_and_load_object_ref_with_history(root): - sdp = SheerkaDataProvider(root) - obj = ObjDumpJson("my_key", "value1") - sdp.serializer.register(JsonSerializer(core.utils.get_full_qualified_name(obj))) - - result = sdp.add(evt_digest, "entry", obj, use_ref=True) - loaded = sdp.get(result.entry, result.key) - history = getattr(loaded, Serializer.HISTORY) - - assert result.obj == obj - assert result.entry == "entry" - assert result.key == obj.key - assert result.digest == obj.get_digest() - - assert loaded.key == obj.key - assert loaded.value == obj.value - - assert history[Serializer.USERNAME] == "kodjo" - assert history[Serializer.MODIFICATION_DATE] != "" - assert history[Serializer.PARENTS] == [] - - assert sdp.io.exists(sdp.io.get_obj_path(sdp.ObjectsFolder, obj.get_digest())) - - # save a second type with no modification - previous_modification_time = history[Serializer.MODIFICATION_DATE] - previous_parents = history[Serializer.PARENTS] - - sdp.modify(evt_digest, "entry", result.key, loaded) - loaded = sdp.get(result.entry, result.key) - history = getattr(loaded, Serializer.HISTORY) - - assert history[Serializer.MODIFICATION_DATE] == previous_modification_time - assert history[Serializer.PARENTS] == previous_parents - - # save again, but with a modification - previous_digest = loaded.get_digest() - loaded.value = "value2" - - sdp.modify(evt_digest, "entry", result.key, loaded) - loaded2 = sdp.get(result.entry, result.key) - history2 = getattr(loaded2, Serializer.HISTORY) - - assert loaded2.key == loaded.key - assert loaded2.value == loaded.value - - assert history2[Serializer.USERNAME] == "kodjo" - assert history2[Serializer.MODIFICATION_DATE] != "" - assert history2[Serializer.PARENTS] == [previous_digest] - - state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) - assert state.data == {"entry": { - "my_key": '##REF##:e6bf5b56428cfce0f08c94f2c3625dc3b3a8180d7229eaa9f8aa967fb16e5256'}} - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_can_add_obj_with_same_key_and_get_them_back(root): - sdp = SheerkaDataProvider(root) - obj1 = ObjDumpJson("key", "value1") - obj2 = ObjDumpJson("key", "value2") - sdp.serializer.register(JsonSerializer(core.utils.get_full_qualified_name(obj1))) - - result = sdp.add(evt_digest, "entry", obj1, use_ref=True) - sdp.add(evt_digest, "entry", obj2, use_ref=True) - - loaded = sdp.get(result.entry, result.key) - - assert len(loaded) == 2 - assert loaded[0] == obj1 - assert loaded[1] == obj2 - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_get_safe_dictionary_without_origin(root): - sdp = SheerkaDataProvider(root) - obj = {"my_key": "value1"} - - obj_serializer = JsonSerializer(core.utils.get_full_qualified_name(obj)) - sdp.serializer.register(obj_serializer) - - result = sdp.add(evt_digest, "entry", obj, use_ref=True) - from_db = sdp.get(result.entry, result.key) - - assert len(from_db) == 2 - assert from_db["my_key"] == obj["my_key"] - assert Serializer.ORIGIN in from_db - - from_db_no_origin = sdp.get_safe(result.entry, result.key, load_origin=False) - assert len(from_db_no_origin) == 1 - assert from_db_no_origin["my_key"] == obj["my_key"] - assert Serializer.ORIGIN not in from_db_no_origin - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_get_dictionary_without_origin(root): - sdp = SheerkaDataProvider(root) - obj = {"my_key": "value1"} - - obj_serializer = JsonSerializer(core.utils.get_full_qualified_name(obj)) - sdp.serializer.register(obj_serializer) - - result = sdp.add(evt_digest, "entry", obj, use_ref=True) - from_db = sdp.get(result.entry, result.key) - - assert len(from_db) == 2 - assert from_db["my_key"] == obj["my_key"] - assert Serializer.ORIGIN in from_db - - from_db_no_origin = sdp.get(result.entry, result.key, load_origin=False) - assert len(from_db_no_origin) == 1 - assert from_db_no_origin["my_key"] == obj["my_key"] - assert Serializer.ORIGIN not in from_db_no_origin - - -@pytest.mark.parametrize("root", [ - ".sheerka", - "mem://" -]) -def test_i_get_safe_object_without_origin(root): - sdp = SheerkaDataProvider(root) - obj = ObjDumpJson("my_key", "value1") - - obj_serializer = JsonSerializer(core.utils.get_full_qualified_name(obj)) - sdp.serializer.register(obj_serializer) - - result = sdp.add(evt_digest, "entry", obj, use_ref=True) - from_db = sdp.get(result.entry, result.key) - - assert from_db == obj - assert hasattr(from_db, Serializer.ORIGIN) - - from_db_no_origin = sdp.get_safe(result.entry, result.key, load_origin=False) - assert from_db_no_origin == obj - assert not hasattr(from_db_no_origin, Serializer.ORIGIN) - - -def test_i_can_get_ref(): +def test_i_can_remove_even_if_not_exist(): sdp = SheerkaDataProvider("mem://") - obj = ObjDumpJson("my_key", "value1") - - obj_serializer = JsonSerializer(core.utils.get_full_qualified_name(obj)) - sdp.serializer.register(obj_serializer) - - result = sdp.add(evt_digest, "entry", obj, use_ref=True) - - ref = sdp.get_ref(result.entry, result.key) - assert ref == "076f0df0f110c304982242a88088efacce71f361e49f065db75919a7f72c2821" + with sdp.get_transaction(evt_digest) as transaction: + transaction.remove("entry", None) + transaction.remove(None, "key") + transaction.remove("entry", "key") -def test_i_can_get_ref_when_list(): +def test_i_get_default_value_if_entry_is_missing(): + sdp = SheerkaDataProvider("mem://") + assert sdp.get("fake_entry", "fake_key", "default_value") == "default_value" + + +def test_exists(): sdp = SheerkaDataProvider("mem://") - obj_serializer = JsonSerializer(core.utils.get_full_qualified_name(ObjDumpJson)) - sdp.serializer.register(obj_serializer) + with sdp.get_transaction(evt_digest) as transaction: + transaction.add("entry", "key", "value") - sdp.add(evt_digest, "entry", ObjDumpJson("my_key", "value1"), use_ref=True) - result = sdp.add(evt_digest, "entry", ObjDumpJson("my_key", "value2"), use_ref=True) - - ref = sdp.get_ref(result.entry, result.key) - assert ref == [ - "076f0df0f110c304982242a88088efacce71f361e49f065db75919a7f72c2821", - "e6bf5b56428cfce0f08c94f2c3625dc3b3a8180d7229eaa9f8aa967fb16e5256" - ] - - -def test_i_cannot_get_ref_if_the_saved_item_is_not_a_ref(): - sdp = SheerkaDataProvider("mem://") - obj = ObjDumpJson("my_key", "value1") - result = sdp.add(evt_digest, "entry", obj, use_ref=False) - - with pytest.raises(SheerkaDataProviderError) as e: - sdp.get_ref(result.entry, result.key) - - assert e.value.args[0] == "Not a reference" - assert e.value.obj == f"{result.entry}.{result.key}" - - -def test_i_cannot_get_ref_if_the_item_does_not_exist(): - sdp = SheerkaDataProvider("mem://") - with pytest.raises(IndexError): - sdp.get_ref("fake", "fake") + assert not sdp.exists("entry2") + assert not sdp.exists("entry", "key2") + assert sdp.exists("entry", "key") diff --git a/tests/sdp/test_sheerkaDataProvider_Old.py b/tests/sdp/test_sheerkaDataProvider_Old.py new file mode 100644 index 0000000..9e50bb1 --- /dev/null +++ b/tests/sdp/test_sheerkaDataProvider_Old.py @@ -0,0 +1,2314 @@ +# import hashlib +# import json +# import os +# import shutil +# from datetime import date, datetime +# from os import path +# +# import core.utils +# import pytest +# from sdp.sheerkaDataProvider import SheerkaDataProvider, Event, SheerkaDataProviderError, \ +# SheerkaDataProviderDuplicateKeyError, SheerkaDataProviderResult, SheerkaDataProviderRef +# from sdp.sheerkaSerializer import JsonSerializer, Serializer, PickleSerializer +# +# tests_root = path.abspath("../../build/tests") +# evt_digest = "3a571cb6034ef6fc8d7fe91948d0d29728eed74de02bac7968b0e9facca2c2d7" +# +# +# def read_json_file(sdp, file_name): +# with sdp.io.open(file_name, "r") as f: +# return json.load(f) +# +# +# class ObjWithKey: +# """ +# Object where the key can be resolved using get_key() +# Not suitable for Json dump as there is no to_dict() method +# """ +# +# def __init__(self, a, b): +# self.a = a +# self.b = b +# +# def __eq__(self, obj): +# return isinstance(obj, ObjWithKey) and \ +# self.a == obj.a and \ +# self.b == obj.b +# +# def __repr__(self): +# return f"ObjWithKey({self.a}, {self.b})" +# +# def get_key(self): +# return self.a +# +# +# class ObjSetKey: +# """ +# Object where the key can be be automatically set thanks to set_key() +# Not suitable for Json dump as there is no to_dict() method +# """ +# +# def __init__(self, value, key=None): +# self.value = value +# self.key = key +# +# def __eq__(self, obj): +# return isinstance(obj, ObjSetKey) and \ +# self.key == obj.key and \ +# self.value == obj.value +# +# def __repr__(self): +# return f"ObjSetKey({self.key}, {self.value})" +# +# def set_key(self, key): +# self.key = key +# +# +# class ObjNoKey: +# """ +# Object with no key, they won't be ordered +# Not suitable for Json dump as there is no to_dict() method +# """ +# +# def __init__(self, a, b): +# self.a = a +# self.b = b +# +# def __hash__(self): +# return hash((self.a, self.b)) +# +# def __eq__(self, obj): +# return isinstance(obj, ObjNoKey) and \ +# self.a == obj.a and \ +# self.b == obj.b +# +# def __repr__(self): +# return f"ObjNoKey({self.a}, {self.b})" +# +# +# class ObjDumpJson: +# """ +# Object where the key can be resolved using get_key() +# that can be used to dump as Json +# """ +# +# def __init__(self, key=None, value=None): +# self.key = key +# self.value = value +# +# def __eq__(self, obj): +# return isinstance(obj, ObjDumpJson) and \ +# self.key == obj.key and \ +# self.value == obj.value +# +# def __repr__(self): +# return f"ObjDumpJson({self.key}, {self.value})" +# +# def get_key(self): +# return self.key +# +# def get_digest(self): +# """ +# Returns the digest of the event +# :return: hexa form of the sha256 +# """ +# return hashlib.sha256(f"Concept:{self.key}{self.value}".encode("utf-8")).hexdigest() +# +# def to_dict(self): +# return self.__dict__ +# +# def from_dict(self, as_dict): +# self.value = as_dict["value"] +# self.key = as_dict["key"] +# +# +# class ObjDumpJsonNoDigest: +# """ +# Object where the key can be resolved using get_key() +# that can be used to dump as Json, +# But with no builtin digest computation +# """ +# +# def __init__(self, key=None, value=None): +# self.key = key +# self.value = value +# +# def __eq__(self, obj): +# return isinstance(obj, ObjDumpJsonNoDigest) and \ +# self.key == obj.key and \ +# self.value == obj.value +# +# def __repr__(self): +# return f"ObjDumpJsonNoDigest({self.key}, {self.value})" +# +# def get_key(self): +# return self.key +# +# def to_dict(self): +# return self.__dict__ +# +# def from_dict(self, as_dict): +# self.value = as_dict["value"] +# self.key = as_dict["key"] +# +# +# class ObjWithDigestNoKey: +# """ +# Object that can compute its digest. +# It can be used to test objects sharing the same entry (but that are different) +# Not suitable for Json dump as there is no to_dict() method +# """ +# +# def __init__(self, a, b): +# self.a = a +# self.b = b +# +# def __hash__(self): +# return hash((self.a, self.b)) +# +# def __eq__(self, obj): +# return isinstance(obj, ObjNoKey) and \ +# self.a == obj.a and \ +# self.b == obj.b +# +# def __repr__(self): +# return f"ObjWithDigestNoKey({self.a}, {self.b})" +# +# def get_digest(self): +# return str(self.a) + str(self.b) +# +# +# class ObjWithDigestWithKey: +# """ +# Object with a key that can compute its digest. +# It can be used to test objects sharing the same key (but that are different) +# Not suitable for Json dump as there is no to_dict() method +# """ +# +# def __init__(self, a, b): +# self.a = a +# self.b = b +# +# def __hash__(self): +# return hash((self.a, self.b)) +# +# def __eq__(self, obj): +# return isinstance(obj, ObjWithDigestWithKey) and \ +# self.a == obj.a and \ +# self.b == obj.b +# +# def __repr__(self): +# return f"ObjWithDigestWithKey({self.a}, {self.b})" +# +# def get_key(self): +# return self.a +# +# def get_digest(self): +# return str(self.a) + str(self.b) +# +# +# @pytest.fixture(autouse=True) +# def init_test(): +# if path.exists(tests_root): +# shutil.rmtree(tests_root) +# +# if not path.exists(tests_root): +# os.makedirs(tests_root) +# current_pwd = os.getcwd() +# os.chdir(tests_root) +# +# yield None +# +# os.chdir(current_pwd) +# +# +# @pytest.mark.parametrize("root, expected", [ +# (".sheerka", path.abspath(path.join(tests_root, ".sheerka"))), +# ("mem://", "") +# ]) +# def test_i_can_init_the_data_provider(root, expected): +# sdp = SheerkaDataProvider(root) +# +# assert sdp.io.root == expected +# assert sdp.io.exists(sdp.io.root) +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_save_and_load_an_event(root): +# sdp = SheerkaDataProvider(root) +# event = Event("hello world", date=date(year=2007, month=9, day=10), user="kodjo") +# +# evt_digest = sdp.save_event(event) +# evt = sdp.load_event(evt_digest) +# +# assert evt.version == 1 +# assert evt.date == datetime(year=2007, month=9, day=10) +# assert evt.user_id == "kodjo" +# assert evt.message == "hello world" +# assert evt.parents is None +# assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.EventFolder, evt_digest[0:24], evt_digest)) +# +# # I can get the last event +# evt = sdp.load_event() +# assert evt.message == "hello world" +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_get_event_history(root): +# sdp = SheerkaDataProvider(root) +# event = Event("hello world", date=date(year=2007, month=9, day=10), user="kodjo") +# event2 = Event("hello world 2", date=date(year=2007, month=9, day=10), user="kodjo") +# +# evt_digest1 = sdp.save_event(event) +# evt_digest2 = sdp.save_event(event2) +# +# evt = sdp.load_event(evt_digest2) +# assert evt.version == 1 +# assert evt.date == datetime(year=2007, month=9, day=10) +# assert evt.user_id == "kodjo" +# assert evt.message == "hello world 2" +# assert evt.parents == [evt_digest1] +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_load_events(root): +# sdp = SheerkaDataProvider(root) +# +# for i in range(15): +# sdp.save_event(Event(f"Hello {i}")) +# +# events = list(sdp.load_events(10)) # first ten +# assert len(events) == 10 +# assert events[0].message == "Hello 14" +# assert events[9].message == "Hello 5" +# +# events = list(sdp.load_events(10, 5)) # skip first 5, then take 10 +# assert len(events) == 10 +# assert events[0].message == "Hello 9" +# assert events[9].message == "Hello 0" +# +# events = list(sdp.load_events(20, 10)) # skip first 10, take 20,(but only 5 remaining) +# assert len(events) == 5 +# assert events[0].message == "Hello 4" +# assert events[4].message == "Hello 0" +# +# events = list(sdp.load_events(1, 20)) # skip first 20, take one +# assert len(events) == 0 +# +# events = list(sdp.load_events(0)) # all +# assert len(events) == 15 +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_load_events_when_no_event(root): +# sdp = SheerkaDataProvider(root) +# +# events = list(sdp.load_events(1)) +# assert len(events) == 0 +# +# events = list(sdp.load_events(1, 5)) +# assert len(events) == 0 +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_add_an_string(root): +# sdp = SheerkaDataProvider(root) +# obj = "foo => bar" +# +# result = sdp.add(evt_digest, "entry", obj) +# last_commit = sdp.get_snapshot(SheerkaDataProvider.HeadFile) +# state = sdp.load_state(last_commit) +# loaded = sdp.get(result.entry, result.key) +# +# assert result.obj == obj +# assert result.entry == "entry" +# assert result.key is None +# assert result.digest is None +# assert loaded == obj +# +# assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit)) +# assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.HeadFile)) +# +# assert state.date is not None +# assert state.parents == [] +# assert state.events == [evt_digest] +# assert state.data == {"entry": "foo => bar"} +# +# assert sdp.io.read_text(path.join(sdp.io.root, SheerkaDataProvider.HeadFile)) == last_commit +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_add_several_strings_if_allow_multiple_is_true(root): +# sdp = SheerkaDataProvider(root) +# +# sdp.add(evt_digest, "entry", "foo") +# sdp.add(evt_digest, "entry", "foo") +# result = sdp.add(evt_digest, "entry", "bar") +# loaded = sdp.get(result.entry, result.key) +# +# assert result.obj == "bar" +# assert result.entry == "entry" +# assert result.key is None +# assert result.digest is None +# assert loaded == ["foo", "foo", "bar"] +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_cannot_add_several_strings_if_allow_multiple_is_false(root): +# sdp = SheerkaDataProvider(root) +# +# with pytest.raises(IndexError) as index_error: +# sdp.add(evt_digest, "entry", "foo", False) +# sdp.add(evt_digest, "entry", "foo", False) +# assert index_error.value.args[0] == "entry" +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_add_an_object_with_no_key(root): +# sdp = SheerkaDataProvider(root) +# obj = ObjNoKey("a", "b") +# +# result = sdp.add(evt_digest, "entry", obj) +# last_commit = sdp.get_snapshot(SheerkaDataProvider.HeadFile) +# state = sdp.load_state(last_commit) +# loaded = sdp.get(result.entry, result.key) +# +# assert result.obj == obj +# assert result.entry == "entry" +# assert result.key is None +# assert result.digest is None +# +# assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit)) +# assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.HeadFile)) +# +# assert state.date is not None +# assert state.parents == [] +# assert state.events == [evt_digest] +# assert state.data == {"entry": ObjNoKey("a", "b")} +# +# assert sdp.io.read_text(path.join(sdp.io.root, SheerkaDataProvider.HeadFile)) == last_commit +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_add_several_obj_no_key_if_allow_multiple_is_true(root): +# sdp = SheerkaDataProvider(root) +# +# sdp.add(evt_digest, "entry", ObjNoKey("a", "b")) +# sdp.add(evt_digest, "entry", ObjNoKey("a", "b")) +# result = sdp.add(evt_digest, "entry", ObjNoKey("c", "d")) +# loaded = sdp.get(result.entry, result.key) +# +# assert result.obj == ObjNoKey("c", "d") +# assert result.entry == "entry" +# assert result.key is None +# assert result.digest is None +# assert loaded == [ObjNoKey("a", "b"), ObjNoKey("a", "b"), ObjNoKey("c", "d")] +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_cannot_add_several_obj_no_key_if_allow_multiple_is_false(root): +# sdp = SheerkaDataProvider(root) +# +# with pytest.raises(IndexError) as index_error: +# sdp.add(evt_digest, "entry", ObjNoKey("a", "b"), False) +# sdp.add(evt_digest, "entry", ObjNoKey("c", "d"), False) +# assert index_error.value.args[0] == "entry" +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_add_a_dict(root): +# """ +# Adding a dictionary. +# Note that there is no key when adding a dictionary +# +# If you add {'my_key': 'my_value'} +# 'my_key is not considered as the key of the entry' +# +# Because if you add {'my_key': 'my_value', 'my_key2': 'my_value2'} +# There are now multiple keys. +# +# So for dictionary entries, the key is not managed +# """ +# sdp = SheerkaDataProvider(root) +# obj = {"my_key": "my_value"} +# +# result = sdp.add(evt_digest, "entry", obj) +# last_commit = sdp.get_snapshot(SheerkaDataProvider.HeadFile) +# state = sdp.load_state(last_commit) +# loaded = sdp.get(result.entry, result.key) +# +# loaded_value = sdp.get(result.entry, "my_key") # we can retrieve by key +# +# assert result.obj == obj +# assert result.entry == "entry" +# assert result.key is None # we return None as dict may contains several entries +# assert result.digest is None +# +# assert loaded == obj +# assert loaded_value == "my_value" +# +# assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit)) +# assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.HeadFile)) +# +# assert state.date is not None +# assert state.parents == [] +# assert state.events == [evt_digest] +# assert state.data == {"entry": obj} +# +# assert sdp.io.read_text(path.join(sdp.io.root, SheerkaDataProvider.HeadFile)) == last_commit +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_add_multiple_entries_at_once_with_dict(root): +# sdp = SheerkaDataProvider(root) +# obj = {"my_key1": "value1", "my_key2": "value2"} +# +# result = sdp.add(evt_digest, "entry", obj) +# loaded = sdp.get(result.entry, result.key) +# loaded_value1 = sdp.get(result.entry, "my_key1") +# loaded_value2 = sdp.get(result.entry, "my_key2") +# +# assert result.obj == obj +# assert result.entry == "entry" +# assert result.key is None # we return None as dict may contains several entries +# assert result.digest is None +# +# assert loaded == {"my_key1": "value1", "my_key2": "value2"} +# assert loaded_value1 == "value1" +# assert loaded_value2 == "value2" +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_add_same_key_with_dict_if_allow_multiple_is_true(root): +# sdp = SheerkaDataProvider(root) +# +# sdp.add(evt_digest, "entry", {"my_key": "my_value"}) +# result = sdp.add(evt_digest, "entry", {"my_key": "my_value"}) +# loaded1 = sdp.get(result.entry, result.key) +# +# result = sdp.add(evt_digest, "entry", {"my_key": "my_value2"}) +# loaded2 = sdp.get(result.entry, result.key) +# +# assert result.entry == "entry" +# assert result.key is None +# assert loaded1 == {"my_key": ["my_value", "my_value"]} +# assert loaded2 == {"my_key": ["my_value", "my_value", "my_value2"]} +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_cannot_add_same_key_with_dict_if_allow_multiple_is_false(root): +# sdp = SheerkaDataProvider(root) +# +# with pytest.raises(IndexError) as index_error: +# sdp.add(evt_digest, "entry", {"my_key": "my_value"}, False) +# sdp.add(evt_digest, "entry", {"my_key": "my_value2"}, False) +# assert index_error.value.args[0] == "entry.my_key" +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_add_object_with_different_key_if_allow_multiple_is_false(root): +# sdp = SheerkaDataProvider(root) +# +# sdp.add(evt_digest, "entry", {"my_key": "a"}, False) +# sdp.add(evt_digest, "entry", {"my_key2": "b"}, False) +# +# assert sdp.get("entry", "my_key") == "a" +# assert sdp.get("entry", "my_key2") == "b" +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_add_obj_with_key(root): +# sdp = SheerkaDataProvider(root) +# obj1 = ObjWithKey("key1", "b") +# obj2 = ObjSetKey("c", key="key2") +# +# result1 = sdp.add(evt_digest, "entry", obj1) # test when key is taken from obj.get_key() +# result2 = sdp.add(evt_digest, "entry2", obj2) # test when key is taken from obj.key +# last_commit = sdp.get_snapshot(SheerkaDataProvider.HeadFile) +# state = sdp.load_state(last_commit) +# +# loaded1 = sdp.get(result1.entry, result1.key) +# loaded2 = sdp.get(result2.entry, result2.key) +# +# assert result1.obj == obj1 +# assert result1.entry == "entry" +# assert result1.key == "key1" +# assert result1.digest is None +# +# assert result2.obj == obj2 +# assert result2.entry == "entry2" +# assert result2.key == "key2" +# assert result2.digest is None +# +# assert loaded1 == ObjWithKey("key1", "b") +# assert loaded2 == ObjSetKey("c", key="key2") +# +# assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit)) +# assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.HeadFile)) +# +# assert state.date is not None +# assert len(state.parents) == 1 +# assert state.events == [evt_digest] +# assert state.data == {"entry": {"key1": obj1}, "entry2": {"key2": obj2}} +# +# assert sdp.io.read_text(path.join(sdp.io.root, SheerkaDataProvider.HeadFile)) == last_commit +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_add_objects_with_same_key_if_allow_multiple_is_true(root): +# sdp = SheerkaDataProvider(root) +# +# sdp.add(evt_digest, "entry", ObjWithKey("my_key", "b")) +# result = sdp.add(evt_digest, "entry", ObjSetKey("c", key="my_key")) +# loaded1 = sdp.get(result.entry, result.key) +# +# result = sdp.add(evt_digest, "entry", ObjSetKey("c", key="my_key")) +# sdp.add(evt_digest, "entry", ObjSetKey("c", key="my_key2")) # to prove that it does not melt everything +# loaded2 = sdp.get(result.entry, result.key) +# +# assert loaded1 == [ObjWithKey("my_key", "b"), ObjSetKey("c", key="my_key")] +# assert loaded2 == [ObjWithKey("my_key", "b"), ObjSetKey("c", key="my_key"), ObjSetKey("c", key="my_key")] +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_cannot_add_object_with_same_key_if_allow_multiple_is_false(root): +# sdp = SheerkaDataProvider(root) +# +# with pytest.raises(IndexError) as index_error: +# sdp.add(evt_digest, "entry", ObjWithKey("my_key", "b"), False) +# sdp.add(evt_digest, "entry", ObjSetKey("c", key="my_key"), False) +# assert index_error.value.args[0] == "entry.my_key" +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_add_obj_with_key_to_a_list(root): +# sdp = SheerkaDataProvider(root) +# +# sdp.add(evt_digest, "entry", "foo") +# sdp.add(evt_digest, "entry", "bar") # entry is now a list +# sdp.add(evt_digest, "entry", ObjWithKey("a", "b")) # this entry must no be taken as a object with a key +# +# loaded = sdp.get("entry") +# assert loaded == ["foo", "bar", ObjWithKey("a", "b")] +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_add_a_reference(root): +# sdp = SheerkaDataProvider(root) +# sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithDigestWithKey))) +# obj1 = ObjWithDigestWithKey(1, "foo") +# result1 = sdp.add(evt_digest, "entry", obj1, use_ref=True) +# result3 = sdp.add(evt_digest, "entry_by_ref", SheerkaDataProviderRef(obj1.b, obj1.get_digest())) +# +# # another object +# obj2 = ObjWithDigestWithKey(2, "bar") +# sdp.add(evt_digest, "entry", obj2, use_ref=True) +# sdp.add(evt_digest, "entry_by_ref", SheerkaDataProviderRef(obj2.b, obj2.get_digest())) +# +# assert result1.obj == obj1 +# assert result1.entry == "entry" +# assert result1.key == str(obj1.get_key()) +# assert result1.digest == obj1.get_digest() +# +# assert result3.obj == SheerkaDataProviderRef(obj1.b, obj1.get_digest()) +# assert result3.entry == "entry_by_ref" +# assert result3.key == "foo" +# assert result3.digest is None +# +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# assert state.data == { +# "entry": { +# "1": '##REF##:' + obj1.get_digest(), +# "2": '##REF##:' + obj2.get_digest(), +# }, +# "entry_by_ref": { +# "foo": SheerkaDataProviderRef(obj1.b, obj1.get_digest()), +# "bar": SheerkaDataProviderRef(obj2.b, obj2.get_digest()) +# }, +# } +# +# # make sure that I can load back +# loaded1 = sdp.get("entry_by_ref", "foo") +# assert loaded1 == ObjWithDigestWithKey(1, "foo") +# assert getattr(loaded1, Serializer.ORIGIN) == obj1.get_digest() +# +# loaded2 = sdp.get("entry_by_ref", "bar") +# assert loaded2 == ObjWithDigestWithKey(2, "bar") +# assert getattr(loaded2, Serializer.ORIGIN) == obj2.get_digest() +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_have_multiple_is_ref_to_the_same_key(root): +# sdp = SheerkaDataProvider(root) +# sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithDigestWithKey))) +# ref_result1 = sdp.add(evt_digest, "entry", ObjWithDigestWithKey(1, "foo"), use_ref=True) +# ref_result2 = sdp.add(evt_digest, "entry", ObjWithDigestWithKey(2, "bar"), use_ref=True) +# +# sdp.add(evt_digest, "entry_ref", SheerkaDataProviderRef("1", ref_result1.digest)) +# sdp.add(evt_digest, "entry_ref", SheerkaDataProviderRef("1", ref_result2.digest)) +# +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# assert state.data == {'entry': {'1': '##REF##:1foo', '2': '##REF##:2bar'}, +# 'entry_ref': {'1': [SheerkaDataProviderRef("1", ref_result1.digest), +# SheerkaDataProviderRef("1", ref_result2.digest)]}, +# } +# +# loaded = sdp.get("entry_ref", "1") +# assert len(loaded) == 2 +# assert loaded[0] == ObjWithDigestWithKey(1, "foo") +# assert loaded[1] == ObjWithDigestWithKey(2, "bar") +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_cannot_add_obj_with_no_key_when_then_entry_has_keys(root): +# sdp = SheerkaDataProvider(root) +# +# with pytest.raises(SheerkaDataProviderError) as error: +# sdp.add(evt_digest, "entry", ObjWithKey("a", "b")) +# sdp.add(evt_digest, "entry", "foo") +# +# assert error.value.obj == "foo" +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_add_string_using_auto_generated_key(root): +# sdp = SheerkaDataProvider(root) +# key_file = path.join(sdp.io.root, SheerkaDataProvider.KeysFile) +# +# result1 = sdp.add_with_auto_key(evt_digest, "entry1", "foo") +# result2 = sdp.add_with_auto_key(evt_digest, "entry1", "bar") +# result3 = sdp.add_with_auto_key(evt_digest, "entry2", "baz") +# +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# +# assert sdp.io.exists(key_file) +# assert read_json_file(sdp, key_file) == {"entry1": 2, "entry2": 1} +# assert state.data == {"entry1": {"1": "foo", "2": "bar"}, "entry2": {"1": "baz"}} +# assert result1.obj == "foo" +# assert result2.obj == "bar" +# assert result3.obj == "baz" +# assert result1.entry == "entry1" +# assert result2.entry == "entry1" +# assert result3.entry == "entry2" +# assert result1.digest is None +# assert result2.digest is None +# assert result3.digest is None +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_cannot_add_the_same_digest_twice_in_the_same_entry(root): +# """ +# If get_digest() is implemented, checks for duplicates +# :return: +# """ +# sdp = SheerkaDataProvider(root) +# +# with pytest.raises(SheerkaDataProviderDuplicateKeyError) as error: +# sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "b")) +# sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "b")) +# +# assert error.value.obj.get_digest() == ObjWithDigestNoKey("a", "b").get_digest() +# assert error.value.key == "entry" +# assert error.value.args[0] == "Duplicate object." +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_cannot_add_the_same_digest_twice_in_the_same_entry2(root): +# """ +# If get_digest() is implemented, checks for duplicates in list when no key +# :return: +# """ +# sdp = SheerkaDataProvider(root) +# +# with pytest.raises(SheerkaDataProviderDuplicateKeyError) as error: +# sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "b")) +# sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "c")) +# sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "b")) +# +# assert error.value.obj.get_digest() == ObjWithDigestNoKey("a", "b").get_digest() +# assert error.value.key == "entry" +# assert error.value.args[0] == "Duplicate object." +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_cannot_add_the_same_digest_twice_in_the_same_entry3(root): +# """ +# If get_digest() is implemented, checks for duplicates when the key is provided +# :return: +# """ +# sdp = SheerkaDataProvider(root) +# +# with pytest.raises(SheerkaDataProviderDuplicateKeyError) as error: +# sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b")) +# sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b")) +# +# assert error.value.obj.get_digest() == ObjWithDigestWithKey("a", "b").get_digest() +# assert error.value.key == "entry.a" +# assert error.value.args[0] == "Duplicate object." +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_cannot_add_the_same_digest_twice_in_the_same_entry4(root): +# """ +# If get_digest() is implemented, checks for duplicates in list when the key is provided +# :return: +# """ +# sdp = SheerkaDataProvider(root) +# +# with pytest.raises(SheerkaDataProviderDuplicateKeyError) as error: +# sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b")) +# sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "c")) +# sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b")) +# +# assert error.value.obj.get_digest() == ObjWithDigestWithKey("a", "b").get_digest() +# assert error.value.key == "entry.a" +# assert error.value.args[0] == "Duplicate object." +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_get_and_set_key(root): +# sdp = SheerkaDataProvider(root) +# key_file = path.join(sdp.io.root, SheerkaDataProvider.KeysFile) +# sdp.set_key("entry1", 1000) +# +# sdp.get_next_key("entry1") +# sdp.get_next_key("entry1") +# sdp.get_next_key("entry1") +# sdp.get_next_key("entry2") +# sdp.get_next_key("entry2") +# +# assert sdp.io.exists(key_file) +# assert read_json_file(sdp, key_file) == {"entry1": 1003, "entry2": 2} +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_add_object_using_auto_generated_key(root): +# sdp = SheerkaDataProvider(root) +# key_file = path.join(sdp.io.root, SheerkaDataProvider.KeysFile) +# +# result1 = sdp.add_with_auto_key(evt_digest, "entry1", ObjNoKey("a", "b")) +# result2 = sdp.add_with_auto_key(evt_digest, "entry1", ObjNoKey("a", "b")) +# +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# +# assert sdp.io.exists(key_file) +# assert read_json_file(sdp, key_file) == {"entry1": 2} +# assert state.data == {"entry1": {"1": ObjNoKey("a", "b"), "2": ObjNoKey("a", "b")}} +# +# assert result1.obj == ObjNoKey("a", "b") +# assert result2.obj == ObjNoKey("a", "b") +# assert result1.entry == "entry1" +# assert result2.entry == "entry1" +# assert result1.key == "1" +# assert result2.key == "2" +# assert result1.digest is None +# assert result2.digest is None +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_object_key_is_updated_when_possible_using_auto_generated_key(root): +# sdp = SheerkaDataProvider(root) +# key_file = path.join(sdp.io.root, SheerkaDataProvider.KeysFile) +# +# result1 = sdp.add_with_auto_key(evt_digest, "entry1", ObjSetKey("foo")) +# result2 = sdp.add_with_auto_key(evt_digest, "entry1", ObjSetKey("foo")) +# +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# +# assert sdp.io.exists(key_file) +# assert read_json_file(sdp, key_file) == {"entry1": 2} +# assert state.data == {"entry1": {"1": ObjSetKey("foo", "1"), "2": ObjSetKey("foo", "2")}} +# +# assert result1.obj == ObjSetKey("foo", "1") +# assert result2.obj == ObjSetKey("foo", "2") +# assert result1.entry == "entry1" +# assert result2.entry == "entry1" +# assert result1.key == "1" +# assert result2.key == "2" +# assert result1.digest is None +# assert result2.digest is None +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_set_objects_with_key(root): +# sdp = SheerkaDataProvider(root) +# sdp.add(evt_digest, "entry", ObjWithKey(1, "foo")) +# result = sdp.set(evt_digest, "entry", ObjWithKey(2, "foo")) +# +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# assert state.data == {"entry": {"2": ObjWithKey(2, "foo")}} +# assert result.entry == "entry" +# assert result.key == "2" +# assert result.digest is None +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_set_objects_with_no_key(root): +# sdp = SheerkaDataProvider(root) +# sdp.add(evt_digest, "entry", ObjNoKey(1, "foo")) +# result = sdp.set(evt_digest, "entry", ObjNoKey(2, "foo")) +# +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# assert state.data == {"entry": ObjNoKey(2, "foo")} +# assert result.entry == "entry" +# assert result.key is None +# assert result.digest is None +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_set_from_list_to_dict(root): +# sdp = SheerkaDataProvider(root) +# sdp.set(evt_digest, "entry", [ObjNoKey(1, "foo"), ObjNoKey(2, "foo")]) +# result = sdp.set(evt_digest, "entry", {"1": ObjNoKey(1, "foo"), "2": ObjNoKey(2, "foo")}) +# +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# assert state.data == {"entry": {"1": ObjNoKey(1, "foo"), "2": ObjNoKey(2, "foo")}} +# assert result.entry == "entry" +# assert result.key is None +# assert result.digest is None +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_set_using_reference(root): +# sdp = SheerkaDataProvider(root) +# sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithKey))) +# sdp.add(evt_digest, "entry", ObjWithKey(1, "foo")) +# result = sdp.set(evt_digest, "entry", ObjWithKey(2, "foo"), use_ref=True) +# +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# assert state.data == {"entry": {"2": '##REF##:43f07065c7bad051cdd726bdfa4de7f8d754c31486c65ddb31d6b6548dec3db9'}} +# +# assert result.obj == ObjWithKey(2, "foo") +# assert result.entry == "entry" +# assert result.key == "2" +# assert result.digest == "43f07065c7bad051cdd726bdfa4de7f8d754c31486c65ddb31d6b6548dec3db9" +# +# assert sdp.io.exists(sdp.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, +# "43f07065c7bad051cdd726bdfa4de7f8d754c31486c65ddb31d6b6548dec3db9")) +# +# # sanity check, make sure that I can load back +# loaded = sdp.get(result.entry, result.key) +# assert loaded == ObjWithKey(2, "foo") +# assert getattr(loaded, Serializer.ORIGIN) == "43f07065c7bad051cdd726bdfa4de7f8d754c31486c65ddb31d6b6548dec3db9" +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_set_a_reference(root): +# sdp = SheerkaDataProvider(root) +# sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithDigestWithKey))) +# obj = ObjWithDigestWithKey(1, "foo") +# sdp.add(evt_digest, "entry", obj, use_ref=True) +# sdp.set(evt_digest, "entry_by_value", {obj.b: obj.get_digest()}, is_ref=True) +# +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# assert state.data == { +# "entry": {"1": '##REF##:' + obj.get_digest()}, +# "entry_by_value": {"foo": '##REF##:' + obj.get_digest()}, +# } +# +# # sanity check, make sure that I can load back +# loaded = sdp.get("entry_by_value", "foo") +# assert loaded == ObjWithDigestWithKey(1, "foo") +# assert getattr(loaded, Serializer.ORIGIN) == obj.get_digest() +# +# +# def test_i_cannot_set_using_use_ref_and_is_ref(): +# sdp = SheerkaDataProvider("mem://") +# +# with pytest.raises(SheerkaDataProviderError): +# sdp.set(evt_digest, "entry", ObjWithDigestWithKey("a", "b"), use_ref=True, is_ref=True) +# +# +# def test_i_cannot_set_using_is_ref_if_obj_is_not_a_dictionary(): +# sdp = SheerkaDataProvider("mem://") +# +# with pytest.raises(SheerkaDataProviderError): +# sdp.set(evt_digest, "entry", ObjWithDigestWithKey("a", "b"), is_ref=True) +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_add_an_object_with_a_key_as_a_reference(root): +# sdp = SheerkaDataProvider(root) +# obj = ObjDumpJson("my_key", "value1") +# obj_serializer = JsonSerializer(core.utils.get_full_qualified_name(obj)) +# sdp.serializer.register(obj_serializer) +# +# result = sdp.add(evt_digest, "entry", obj, use_ref=True) +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# digest = state.data["entry"]["my_key"][len(SheerkaDataProvider.REF_PREFIX):] +# +# assert result.obj == obj +# assert result.entry == "entry" +# assert result.key == obj.key +# assert result.digest == obj.get_digest() +# assert digest == result.digest +# assert state.data == {'entry': {'my_key': f"{SheerkaDataProvider.REF_PREFIX}{digest}"}} +# +# loaded = sdp.load_obj(digest) +# assert loaded == obj +# assert getattr(loaded, Serializer.ORIGIN) == digest +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_add_a_dictionary_as_a_reference(root): +# sdp = SheerkaDataProvider(root) +# obj = {"my_key": "value1"} +# +# # No need to register a serializer for dictionaries +# +# result = sdp.add(evt_digest, "entry", obj, use_ref=True) +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# digest = state.data["entry"][len(SheerkaDataProvider.REF_PREFIX):] +# +# assert result.obj == obj +# assert result.entry == "entry" +# assert result.key is None # we return None as dict may contains several entries +# assert result.digest == "1790cae3f354ecb6b419faaa2ee2c374ff33efb8cddafda9960924036ac04c1f" # a digest is created +# assert digest == result.digest +# +# assert state.data == {'entry': f"{SheerkaDataProvider.REF_PREFIX}{digest}"} +# +# loaded = sdp.load_obj(digest) +# assert loaded["my_key"] == obj["my_key"] +# assert loaded[Serializer.ORIGIN] == digest +# assert len(loaded) == 2 +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_add_an_object_with_no_builtin_digest_as_a_reference(root): +# sdp = SheerkaDataProvider(root) +# obj = ObjDumpJsonNoDigest("a", "b") +# +# obj_serializer = JsonSerializer(core.utils.get_full_qualified_name(obj)) +# sdp.serializer.register(obj_serializer) +# +# result = sdp.add(evt_digest, "entry", obj, use_ref=True) +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# digest = state.data["entry"][obj.get_key()][len(SheerkaDataProvider.REF_PREFIX):] +# +# assert result.obj == obj +# assert result.entry == "entry" +# assert result.key == obj.get_key() +# assert result.digest is not None +# assert digest == result.digest +# +# assert state.data == {'entry': {obj.key: f"{SheerkaDataProvider.REF_PREFIX}{result.digest}"}} +# +# loaded = sdp.load_obj(digest) +# assert getattr(loaded, Serializer.ORIGIN) == digest +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_add_unique(root): +# sdp = SheerkaDataProvider(root) +# result = sdp.add_unique(evt_digest, "entry", ObjNoKey(1, "foo")) +# assert result == SheerkaDataProviderResult(ObjNoKey(1, "foo"), "entry", None, None, False) +# +# result = sdp.add_unique(evt_digest, "entry", ObjNoKey(1, "foo")) +# assert result == SheerkaDataProviderResult(ObjNoKey(1, "foo"), "entry", None, None, True) +# +# result = sdp.add_unique(evt_digest, "entry", ObjNoKey(2, "bar")) +# assert result == SheerkaDataProviderResult(ObjNoKey(2, "bar"), "entry", None, None, False) +# +# result = sdp.add_unique(evt_digest, "entry", ObjNoKey(2, "bar")) +# assert result == SheerkaDataProviderResult(ObjNoKey(2, "bar"), "entry", None, None, True) +# +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# assert state.data == {"entry": {ObjNoKey(1, "foo"), ObjNoKey(2, "bar")}} +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_keep_state_history(root): +# sdp = SheerkaDataProvider(root) +# +# event1 = Event("cmd add 'foo => bar'") +# event_digest1 = sdp.save_event(event1) +# obj1 = "foo => bar" +# sdp.add(event_digest1, "entry1", obj1) +# state_digest1 = sdp.get_snapshot(SheerkaDataProvider.HeadFile) +# +# event2 = Event("cmd add 'foo => baz'") +# event_digest2 = sdp.save_event(event2) +# obj2 = "foo => baz" +# sdp.add(event_digest2, "entry2", obj2) +# state_digest2 = sdp.get_snapshot(SheerkaDataProvider.HeadFile) +# +# state2 = sdp.load_state(state_digest2) +# +# assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.EventFolder, event_digest1[0:24], event_digest1)) +# assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.StateFolder, state_digest1[0:24], state_digest1)) +# assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.EventFolder, event_digest2[0:24], event_digest2)) +# assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.StateFolder, state_digest2[0:24], state_digest2)) +# assert state2.date is not None +# assert state2.parents == [state_digest1] +# assert state2.events == [event_digest2] +# assert state2.data == {"entry1": "foo => bar", "entry2": "foo => baz"} +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_list_elements_when_there_is_nothing_to_list(root): +# sdp = SheerkaDataProvider(root) +# +# result = sdp.list("entry") +# +# assert list(result) == [] +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_list_when_no_key(root): +# sdp = SheerkaDataProvider(root) +# sdp.serializer.register(PickleSerializer(lambda obj: isinstance(obj, str))) +# +# sdp.add(evt_digest, "entry1", "foo") +# sdp.add(evt_digest, "entry1", "bar") +# sdp.add(evt_digest, "entry1", "baz", use_ref=True) +# sdp.add(evt_digest, "entry2", "xyz") +# +# result = sdp.list("entry1") +# +# assert list(result) == ["foo", "bar", "baz"] +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_list_when_key(root): +# sdp = SheerkaDataProvider(root) +# sdp.serializer.register(PickleSerializer(lambda obj: isinstance(obj, ObjWithKey))) +# +# sdp.add(evt_digest, "entry1", {"1": "foo"}) +# sdp.add(evt_digest, "entry1", {"2": "bar"}) +# sdp.add(evt_digest, "entry1", ObjWithKey("3", "value"), use_ref=True) +# sdp.add(evt_digest, "entry2", {"4": "xxx"}) +# +# result = sdp.list("entry1") +# +# assert list(result) == ["foo", "bar", ObjWithKey("3", "value")] +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_list_when_one_element(root): +# sdp = SheerkaDataProvider(root) +# sdp.add(evt_digest, "entry1", "foo") +# sdp.add(evt_digest, "entry2", "baz") +# +# result = sdp.list("entry1") +# +# assert list(result) == ["foo"] +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_list_when_multiple_entries_under_the_same_key(root): +# sdp = SheerkaDataProvider(root) +# sdp.add(evt_digest, "entry", ObjWithKey("a", "b")) +# sdp.add(evt_digest, "entry", ObjWithKey("a", "c")) +# +# result = sdp.list("entry") +# assert list(result) == [[ObjWithKey("a", "b"), ObjWithKey("a", "c")]] +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_list_when_multiple_entries_under_the_same_key_when_reference(root): +# sdp = SheerkaDataProvider(root) +# sdp.serializer.register(PickleSerializer(lambda obj: isinstance(obj, ObjWithKey))) +# +# sdp.add(evt_digest, "entry", ObjWithKey("a", "b"), use_ref=True) +# sdp.add(evt_digest, "entry", ObjWithKey("a", "c"), use_ref=True) +# +# result = sdp.list("entry") +# assert list(result) == [[ObjWithKey("a", "b"), ObjWithKey("a", "c")]] +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_list_when_multiple_entries_under_the_same_entry(root): +# sdp = SheerkaDataProvider(root) +# sdp.add(evt_digest, "entry", ObjNoKey("a", "b")) +# sdp.add(evt_digest, "entry", ObjNoKey("a", "c")) +# +# result = sdp.list("entry") +# assert list(result) == [ObjNoKey("a", "b"), ObjNoKey("a", "c")] +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_list_when_multiple_entries_under_the_same_entry_when_reference(root): +# sdp = SheerkaDataProvider(root) +# sdp.serializer.register(PickleSerializer(lambda obj: isinstance(obj, ObjNoKey))) +# +# sdp.add(evt_digest, "entry", ObjNoKey("a", "b"), use_ref=True) +# sdp.add(evt_digest, "entry", ObjNoKey("a", "c"), use_ref=True) +# +# result = sdp.list("entry") +# assert list(result) == [ObjNoKey("a", "b"), ObjNoKey("a", "c")] +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_filter_on_key_for_dict(root): +# sdp = SheerkaDataProvider(root) +# sdp.add(evt_digest, "entry1", {"1": "foo"}) +# sdp.add(evt_digest, "entry1", {"2": "bar"}) +# +# result = sdp.list("entry1", lambda k, o: k == "1") +# +# assert list(result) == ["foo"] +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_filter_on_key_for_objects(root): +# sdp = SheerkaDataProvider(root) +# sdp.add(evt_digest, "entry1", ObjWithKey("a1", "b1")) +# sdp.add(evt_digest, "entry1", ObjWithKey("a2", "b2")) +# +# result = sdp.list("entry1", lambda k, o: k == "a1") +# +# assert list(result) == [ObjWithKey("a1", "b1")] +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_filter_on_attribute_for_dict(root): +# sdp = SheerkaDataProvider(root) +# sdp.add(evt_digest, "entry1", {"1": {"a": "a1", "b": "b1"}}) +# sdp.add(evt_digest, "entry1", {"2": {"a": "a2", "b": "b2"}}) +# +# result = sdp.list("entry1", lambda k, o: o["a"] == "a2") +# +# assert list(result) == [{"a": "a2", "b": "b2"}] +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_filter_on_attribute_for_object(root): +# sdp = SheerkaDataProvider(root) +# sdp.add(evt_digest, "entry1", ObjWithKey("a1", "b1")) +# sdp.add(evt_digest, "entry1", ObjWithKey("a2", "b2")) +# +# result = sdp.list("entry1", lambda k, o: o.b == "b2") +# +# assert list(result) == [ObjWithKey("a2", "b2")] +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_filter_a_list(root): +# sdp = SheerkaDataProvider(root) +# sdp.add(evt_digest, "entry1", "foo") +# sdp.add(evt_digest, "entry1", "bar") +# +# result = sdp.list("entry1", lambda o: o == "bar") +# +# assert list(result) == ["bar"] +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_filter_a_list_of_object(root): +# sdp = SheerkaDataProvider(root) +# sdp.add(evt_digest, "entry1", ObjNoKey("a1", "b1")) +# sdp.add(evt_digest, "entry1", ObjNoKey("a2", "b2")) +# +# result = sdp.list("entry1", lambda o: o.b == "b1") +# +# assert list(result) == [ObjNoKey("a1", "b1")] +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_remove_all_elements(root): +# sdp = SheerkaDataProvider(root) +# sdp.add(evt_digest, "entry1", "foo") +# sdp.add(evt_digest, "entry1", "bar") +# +# state_digest = sdp.remove(evt_digest, "entry1") +# result = sdp.list("entry1") +# +# assert sdp.io.read_text(path.join(sdp.io.root, SheerkaDataProvider.HeadFile)) == state_digest +# assert list(result) == [] +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_remove_a_element(root): +# sdp = SheerkaDataProvider(root) +# sdp.add(evt_digest, "entry1", "foo") +# sdp.add(evt_digest, "entry1", "bar") +# +# sdp.remove(evt_digest, "entry1", lambda o: o == "foo") +# result = sdp.list("entry1") +# +# assert list(result) == ["bar"] +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_remove_dict_by_key(root): +# sdp = SheerkaDataProvider(root) +# sdp.add(evt_digest, "entry1", {"1": ObjNoKey("a1", "b1")}) +# sdp.add(evt_digest, "entry1", {"2": ObjNoKey("a2", "b2")}) +# +# sdp.remove(evt_digest, "entry1", lambda k, o: k == "2") +# result = sdp.list("entry1") +# +# assert list(result) == [ObjNoKey("a1", "b1")] +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_remove_when_only_one_element(root): +# sdp = SheerkaDataProvider(root) +# sdp.add(evt_digest, "entry1", "foo") +# +# sdp.remove(evt_digest, "entry1", lambda o: o == "foo") +# result = sdp.list("entry1") +# +# assert list(result) == [] +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_cannot_remove_if_entry_does_not_exist(root): +# sdp = SheerkaDataProvider(root) +# with pytest.raises(IndexError) as e: +# sdp.remove(evt_digest, "entry", silent_remove=False) +# assert str(e.value) == "entry" +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_no_exception_is_raise_when_remove_in_silent_mode(root): +# sdp = SheerkaDataProvider(root) +# sdp.remove(evt_digest, "entry", silent_remove=True) # default +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_cannot_modify_an_entry_without_a_key(root): +# sdp = SheerkaDataProvider(root) +# +# with pytest.raises(SheerkaDataProviderError) as error: +# sdp.modify(evt_digest, "entry", None, "baz") +# +# assert error.value.args[0] == "Key is mandatory." +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_modify_dict_with_a_key(root): +# sdp = SheerkaDataProvider(root) +# sdp.add(evt_digest, "entry", {"key1": "foo"}) +# sdp.add(evt_digest, "entry", {"key2": "bar"}) +# +# result = sdp.modify(evt_digest, "entry", "key1", "baz") +# +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# assert state.data == {"entry": {"key1": "baz", "key2": "bar"}} +# assert result.obj == "baz" +# assert result.entry == "entry" +# assert result.key == "key1" +# assert result.digest is None +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_modify_an_object_with_a_key(root): +# sdp = SheerkaDataProvider(root) +# sdp.add(evt_digest, "entry", ObjWithKey("key1", "foo")) +# sdp.add(evt_digest, "entry", ObjWithKey("key2", "bar")) +# +# result = sdp.modify(evt_digest, "entry", "key1", ObjWithKey("key1", "baz")) +# +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# +# assert state.data == {"entry": {"key1": ObjWithKey("key1", "baz"), "key2": ObjWithKey("key2", "bar")}} +# assert result.obj == ObjWithKey("key1", "baz") +# assert result.entry == "entry" +# assert result.key == "key1" +# assert result.digest is None +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_modify_an_object_while_changing_the_key(root): +# sdp = SheerkaDataProvider(root) +# sdp.add(evt_digest, "entry", ObjWithKey("key1", "foo")) +# sdp.add(evt_digest, "entry", ObjWithKey("key2", "bar")) +# +# result = sdp.modify(evt_digest, "entry", "key1", ObjWithKey("key3", "baz")) +# +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# assert state.data == {"entry": {"key2": ObjWithKey("key2", "bar"), "key3": ObjWithKey("key3", "baz")}} +# assert result.obj == ObjWithKey("key3", "baz") +# assert result.entry == "entry" +# assert result.key == "key3" +# assert result.digest is None +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_key(root): +# sdp = SheerkaDataProvider(root) +# sdp.add(evt_digest, "entry", ObjWithKey("key1", "foo")) +# sdp.add(evt_digest, "entry", ObjWithKey("key2", "bar")) +# +# result = sdp.modify(evt_digest, "entry", "key2", ObjWithKey("key1", "bar")) +# +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# assert state.data == {"entry": {"key1": [ObjWithKey("key1", "foo"), ObjWithKey("key1", "bar")]}} +# assert result.obj == ObjWithKey("key1", "bar") +# assert result.entry == "entry" +# assert result.key == "key1" +# assert result.digest is None +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_when_list(root): +# """ +# In this example, the item to modify is within a list, and its key has changed +# and in the new key, there is already a list +# :return: +# """ +# sdp = SheerkaDataProvider(root) +# +# sdp.add(evt_digest, "entry", ObjDumpJson("key1", "value11")) +# sdp.add(evt_digest, "entry", ObjDumpJson("key1", "value12")) +# sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value21")) +# sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value22")) +# +# new_value = ObjDumpJson("key1", "value13") +# setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key2", "value21").get_digest()) +# result = sdp.modify(evt_digest, "entry", "key2", new_value) +# +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# assert state.data == {"entry": { +# "key1": [ObjDumpJson("key1", "value11"), ObjDumpJson("key1", "value12"), ObjDumpJson("key1", "value13")], +# "key2": [ObjDumpJson("key2", "value22")] +# }} +# assert result.obj == new_value +# assert result.entry == "entry" +# assert result.key == "key1" +# assert result.digest is None +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_when_nothing(root): +# """ +# In this example, the item to modify is within a list, and its key has changed +# and in the new key, there is nothing (the new key does not exist) +# :return: +# """ +# sdp = SheerkaDataProvider(root) +# sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson))) +# +# sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value21")) +# sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value22")) +# +# new_value = ObjDumpJson("key1", "value13") +# setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key2", "value21").get_digest()) +# result = sdp.modify(evt_digest, "entry", "key2", new_value) +# +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# assert state.data == {"entry": { +# "key1": ObjDumpJson("key1", "value13"), +# "key2": [ObjDumpJson("key2", "value22")] +# }} +# assert result.obj == new_value +# assert result.entry == "entry" +# assert result.key == "key1" +# assert result.digest is None +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_when_one_item(root): +# """ +# In this example, the item to modify is within a list, and its key has changed +# and in the new key, there is only one element +# :return: +# """ +# sdp = SheerkaDataProvider(root) +# sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson))) +# +# sdp.add(evt_digest, "entry", ObjDumpJson("key1", "value11")) +# sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value21")) +# sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value22")) +# +# new_value = ObjDumpJson("key1", "value13") +# setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key2", "value21").get_digest()) +# result = sdp.modify(evt_digest, "entry", "key2", new_value) +# +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# assert state.data == {"entry": { +# "key1": [ObjDumpJson("key1", "value11"), ObjDumpJson("key1", "value13")], +# "key2": [ObjDumpJson("key2", "value22")] +# }} +# assert result.obj == new_value +# assert result.entry == "entry" +# assert result.key == "key1" +# assert result.digest is None +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_modify_a_object_saved_by_ref(root): +# sdp = SheerkaDataProvider(root) +# sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithKey))) +# sdp.add(evt_digest, "entry", ObjWithKey("key1", "foo")) +# sdp.add(evt_digest, "entry", ObjWithKey("key2", "bar"), use_ref=True) +# +# result = sdp.modify(evt_digest, "entry", "key2", ObjWithKey("key2", "baz")) +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# assert state.data == {"entry": { +# "key1": ObjWithKey("key1", "foo"), +# "key2": "##REF##:041d3cca905b51bc2c66251e73e56b836aae7b9435ee3d7eb05d44bb67ff575e"}} +# assert result.obj == ObjWithKey("key2", "baz") +# assert result.entry == "entry" +# assert result.key == "key2" +# assert result.digest == "041d3cca905b51bc2c66251e73e56b836aae7b9435ee3d7eb05d44bb67ff575e" +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_modify_an_object_saved_by_ref_in_a_list(root): +# sdp = SheerkaDataProvider(root) +# sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJsonNoDigest))) +# +# sdp.add(evt_digest, "entry", ObjDumpJsonNoDigest("key1", "value11"), use_ref=True) +# sdp.add(evt_digest, "entry", ObjDumpJsonNoDigest("key1", "value12"), use_ref=True) +# result = sdp.add(evt_digest, "entry", ObjDumpJsonNoDigest("key2", "value21"), use_ref=True) +# sdp.add(evt_digest, "entry", ObjDumpJsonNoDigest("key2", "value22"), use_ref=True) +# +# new_value = ObjDumpJsonNoDigest("key1", "value13") +# setattr(new_value, Serializer.ORIGIN, result.digest) +# result = sdp.modify(evt_digest, "entry", "key2", new_value) +# +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# assert state.data == {"entry": { +# 'key1': ['##REF##:f80a0c0aceb1a7a3d238c0cff2d86d6bd3a62e0c1a65c5b505f43b10c4604bd8', +# '##REF##:239a8238d188c37afa10b1bcc312ca8a0e78f6e75d688ca65d08e16717ff68b0', +# '##REF##:9d0a2bf9d4081de0b14837ea46bc7a1cfb6b7562f7ae86255ea9bd0ac53a6437'], +# 'key2': ['##REF##:df8a38b07f469f2ff8001ea6a70f77f4f9ce85d69c530091fcaf4b380f1500d3'] +# }} +# assert result.obj == new_value +# assert result.entry == "entry" +# assert result.key == "key1" +# assert result.digest is not None +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_modify_a_data_provider_ref(root): +# # first, create a valid entry +# sdp = SheerkaDataProvider(root) +# sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithDigestWithKey))) +# obj = ObjWithDigestWithKey("1", "foo") +# sdp.add(evt_digest, "entry", obj, use_ref=True) +# sdp.add(evt_digest, "entry_ref", SheerkaDataProviderRef(obj.b, obj.get_digest())) +# +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# assert state.data == { +# "entry": {"1": "##REF##:1foo"}, +# "entry_ref": {"foo": SheerkaDataProviderRef(obj.b, obj.get_digest())}} +# +# # modify this entry +# obj_new = ObjWithDigestWithKey("1", "bar") +# sdp.modify(evt_digest, "entry", obj_new.a, obj_new) +# result = sdp.modify(evt_digest, "entry_ref", "foo", SheerkaDataProviderRef(obj.b, obj_new.get_digest())) +# +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# assert state.data == { +# "entry": {"1": "##REF##:1bar"}, +# "entry_ref": {"foo": SheerkaDataProviderRef(obj.b, obj_new.get_digest())}} +# +# assert result.obj == SheerkaDataProviderRef(obj.b, obj_new.get_digest()) +# assert result.entry == "entry_ref" +# assert result.key == "foo" +# assert result.digest is None # digest is not set as what is saved (the digest) is not saved by ref +# +# # sanity check, I can load the modified entry +# loaded = sdp.get("entry_ref", "foo") +# assert loaded == ObjWithDigestWithKey("1", "bar") +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_modify_is_ref_when_in_list(root): +# sdp = SheerkaDataProvider(root) +# sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithDigestWithKey))) +# ref_result1 = sdp.add(evt_digest, "entry", ObjWithDigestWithKey(1, "foo"), use_ref=True) +# ref_result2 = sdp.add(evt_digest, "entry", ObjWithDigestWithKey(2, "bar"), use_ref=True) +# +# sdp.add(evt_digest, "entry_ref", SheerkaDataProviderRef("1", ref_result1.digest)) +# sdp.add(evt_digest, "entry_ref", SheerkaDataProviderRef("1", ref_result2.digest)) +# +# ref_result3 = sdp.add(evt_digest, "entry", ObjWithDigestWithKey(3, "baz"), use_ref=True) +# +# result = sdp.modify( +# evt_digest, +# "entry_ref", +# "1", +# SheerkaDataProviderRef("1", ref_result3.digest, ref_result2.digest)) +# +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# assert state.data == {'entry': {'1': '##REF##:1foo', '2': '##REF##:2bar', '3': '##REF##:3baz'}, +# 'entry_ref': {'1': [ +# SheerkaDataProviderRef("1", ref_result1.digest), +# SheerkaDataProviderRef("1", ref_result3.digest, ref_result2.digest)]}} +# +# loaded = sdp.get("entry_ref", "1") +# assert len(loaded) == 2 +# assert loaded[0] == ObjWithDigestWithKey(1, "foo") +# assert loaded[1] == ObjWithDigestWithKey(3, "baz") +# +# assert result.obj == SheerkaDataProviderRef("1", ref_result3.digest, ref_result2.digest) +# assert result.entry == "entry_ref" +# assert result.key == "1" +# assert result.digest is None # digest is not set as what is saved (the digest) is not saved by ref +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_cannot_modify_an_entry_that_does_not_exist(root): +# sdp = SheerkaDataProvider(root) +# +# with pytest.raises(IndexError) as e: +# sdp.modify(evt_digest, "entry", "key", "foo") +# +# assert str(e.value) == "entry" +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_cannot_modify_a_key_that_does_not_exist(root): +# sdp = SheerkaDataProvider(root) +# sdp.add(evt_digest, "entry1", {"1": "foo"}) +# +# with pytest.raises(IndexError) as e: +# sdp.modify(evt_digest, "entry1", "2", "bar") +# assert str(e.value) == "entry1.2" +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_cannot_modify_a_list_when_origin_is_unknown(root): +# sdp = SheerkaDataProvider(root) +# +# sdp.add(evt_digest, "entry", ObjWithKey("key", "value1")) +# sdp.add(evt_digest, "entry", ObjWithKey("key", "value2")) # same they +# +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# +# with pytest.raises(SheerkaDataProviderError) as error: +# sdp.modify(evt_digest, "entry", "key", ObjWithKey("key", "value2")) +# +# assert error.value.obj == ObjWithKey("key", "value2") +# assert error.value.args[0] == "Multiple entries under 'entry.key'" +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_modify_a_list_when_the_origin_is_known(root): +# sdp = SheerkaDataProvider(root) +# +# sdp.add(evt_digest, "entry", ObjDumpJson("key", "value1")) +# sdp.add(evt_digest, "entry", ObjDumpJson("key", "value2")) # same they +# +# new_value = ObjDumpJson("key", "value3") +# setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key", "value1").get_digest()) +# +# sdp.modify(evt_digest, "entry", "key", new_value) +# +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# assert state.data == {"entry": {"key": [ObjDumpJson("key", "value3"), ObjDumpJson("key", "value2")]}} +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_modify_a_list_when_the_origin_is_known_2(root): +# """ +# This time, we check that the origin is automatically set when the object was saved as a reference +# We also check that all objects are still persisted as reference +# :return: +# """ +# sdp = SheerkaDataProvider(root) +# sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson))) +# +# sdp.add(evt_digest, "entry", ObjDumpJson("key", "value1"), use_ref=True) +# sdp.add(evt_digest, "entry", ObjDumpJson("key", "value2"), use_ref=True) # same they +# +# objs = sdp.get("entry", "key") # origin is automatically set to the loaded objects +# objs[0].value = "value3" +# +# sdp.modify(evt_digest, "entry", "key", objs[0]) +# +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# assert state.data == {"entry": {"key": [ +# "##REF##:621771a3af6a331e9abb3a63fb25e0cac4b13df0b292dfa30db6bd89031bfad0", +# "##REF##:5fe085e8366d35c5f04a18b2d3dada376128b246e07c66de5872830b00f5f517"]}} +# +# # checks that all objects are (still) persisted +# sdp.io.exists( +# sdp.io.get_obj_path(sdp.ObjectsFolder, "621771a3af6a331e9abb3a63fb25e0cac4b13df0b292dfa30db6bd89031bfad0")) +# sdp.io.exists( +# sdp.io.get_obj_path(sdp.ObjectsFolder, "5fe085e8366d35c5f04a18b2d3dada376128b246e07c66de5872830b00f5f517")) +# sdp.io.exists( +# sdp.io.get_obj_path(sdp.ObjectsFolder, "1aac9e0d5c74c3bb989fd0f9def792bba36c5595d32f61be7cbb1a38dcf75327")) +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_get_the_entire_entry(root): +# sdp = SheerkaDataProvider(root) +# sdp.add(evt_digest, "entry1", "foo") +# sdp.add(evt_digest, "entry1", "bar") +# +# result = sdp.get("entry1") +# result_safe = sdp.get_safe("entry1") +# +# assert result == ["foo", "bar"] +# assert result_safe == ["foo", "bar"] +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_get_an_entry_with_on_object(root): +# sdp = SheerkaDataProvider(root) +# sdp.add(evt_digest, "entry1", "foo") +# +# result = sdp.get("entry1") +# result_safe = sdp.get_safe("entry1") +# +# assert result == "foo" +# assert result_safe == "foo" +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_get_an_entry_by_key(root): +# sdp = SheerkaDataProvider(root) +# sdp.add(evt_digest, "entry1", {"1": "foo"}) +# sdp.add(evt_digest, "entry1", {"2": "bar"}) +# +# result = sdp.get("entry1", "2") +# result_safe = sdp.get_safe("entry1", "2") +# +# assert result == "bar" +# assert result_safe == "bar" +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_get_object_saved_by_reference(root): +# sdp = SheerkaDataProvider(root) +# obj = ObjDumpJson("my_key", "value1") +# sdp.serializer.register(JsonSerializer(core.utils.get_full_qualified_name(obj))) +# +# result = sdp.add(evt_digest, "entry", obj, use_ref=True) +# loaded = sdp.get(result.entry, result.key) +# +# assert loaded == obj +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_get_objects_from_list_when_saved_by_reference(root): +# sdp = SheerkaDataProvider(root) +# sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson))) +# +# sdp.add(evt_digest, "entry", ObjDumpJson("key", "value1"), use_ref=True) +# sdp.add(evt_digest, "entry", ObjDumpJson("key", "value2"), use_ref=True) # same they +# +# objs = sdp.get("entry", "key") +# +# assert objs[0] == ObjDumpJson("key", "value1") +# assert objs[1] == ObjDumpJson("key", "value2") +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_cannot_get_an_entry_that_does_not_exist(root): +# sdp = SheerkaDataProvider(root) +# +# assert sdp.get_safe("entry") is None +# with pytest.raises(IndexError) as e: +# sdp.get("entry") +# assert str(e.value) == "entry" +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_cannot_get_a_key_that_does_not_exist(root): +# sdp = SheerkaDataProvider(root) +# sdp.add(evt_digest, "entry1", {"1": "foo"}) +# +# assert sdp.get_safe("entry1", "2") is None +# with pytest.raises(IndexError) as e: +# sdp.get("entry1", "2") +# assert str(e.value) == "entry1.2" +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_save_and_retrieve_cache(root): +# sdp = SheerkaDataProvider(root) +# txt = "foo bar baz foo bar baz foo bar baz" +# key = "key_to_use" +# category = "cache_category" +# +# assert not sdp.in_cache(category, key) +# digest = sdp.add_to_cache(category, key, txt) +# assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.CacheFolder, digest[0:24], digest)) +# assert sdp.in_cache(category, key) +# +# from_cache = sdp.load_from_cache(category, key) +# assert from_cache == txt +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_cache_is_not_updated_by_default(root): +# sdp = SheerkaDataProvider(root) +# txt = "foo bar baz foo bar baz foo bar baz" +# txt2 = "foo foo foo foo foo foo foo foo foo" +# key = "key_to_use" +# category = "cache_category" +# +# sdp.add_to_cache(category, key, txt) +# sdp.add_to_cache(category, key, txt2) +# +# from_cache = sdp.load_from_cache(category, key) +# assert from_cache == txt +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_update_cache(root): +# sdp = SheerkaDataProvider(root) +# txt = "foo bar baz foo bar baz foo bar baz" +# txt2 = "foo foo foo foo foo foo foo foo foo" +# key = "key_to_use" +# category = "cache_category" +# +# sdp.add_to_cache(category, key, txt) +# sdp.add_to_cache(category, key, txt2, update=True) +# +# from_cache = sdp.load_from_cache(category, key) +# assert from_cache == txt2 +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_remove_from_cache(root): +# sdp = SheerkaDataProvider(root) +# txt = "foo bar baz foo bar baz foo bar baz" +# key = "key_to_use" +# category = "cache_category" +# +# sdp.add_to_cache(category, key, txt) +# digest = sdp.remove_from_cache(category, key) +# assert not sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.CacheFolder, digest[0:24], digest)) +# assert not sdp.in_cache(category, key) +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_test_than_an_entry_exists(root): +# sdp = SheerkaDataProvider(root) +# +# assert not sdp.exists("entry") +# sdp.add(evt_digest, "entry", "value") +# assert sdp.exists("entry") +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_test_if_a_key_exists(root): +# sdp = SheerkaDataProvider(root) +# obj = ObjWithDigestWithKey("key", "value") +# +# assert not sdp.exists("entry") +# assert not sdp.exists("entry", obj.get_key()) +# +# sdp.add(evt_digest, "entry", obj) +# assert not sdp.exists("entry", "wrong_key") +# assert sdp.exists("entry", obj.get_key()) +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_test_that_the_object_exists(root): +# sdp = SheerkaDataProvider(root) +# obj = ObjWithDigestWithKey("key", "value") +# +# assert not sdp.exists("entry") +# assert not sdp.exists("entry", obj.get_key()) +# assert not sdp.exists("entry", obj.get_key(), obj.get_digest()) +# +# # test for a single item under the key +# sdp.add(evt_digest, "entry", obj) +# assert not sdp.exists("entry", obj.get_key(), "wrong_digest") +# assert sdp.exists("entry", obj.get_key(), obj.get_digest()) +# +# # test for a list item under the key +# sdp.add(evt_digest, "entry2", ObjWithDigestWithKey("key", "value2")) +# assert not sdp.exists("entry2", obj.get_key(), obj.get_digest()) +# +# sdp.add(evt_digest, "entry2", ObjWithDigestWithKey("key", "value3")) +# assert not sdp.exists("entry2", obj.get_key(), obj.get_digest()) +# +# sdp.add(evt_digest, "entry2", obj) +# assert sdp.exists("entry2", obj.get_key(), obj.get_digest()) +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_test_than_the_object_exists_when_using_references(root): +# sdp = SheerkaDataProvider(root) +# sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithDigestWithKey))) +# obj = ObjWithDigestWithKey("key", "value") +# +# assert not sdp.exists("entry") +# assert not sdp.exists("entry", obj.get_key()) +# assert not sdp.exists("entry", obj.get_key(), obj.get_digest()) +# +# # test for a single item under the key +# sdp.add(evt_digest, "entry", obj, use_ref=True) +# assert not sdp.exists("entry", obj.get_key(), "wrong_digest") +# assert sdp.exists("entry", obj.get_key(), obj.get_digest()) +# +# # test for a list item under the key +# sdp.add(evt_digest, "entry2", ObjWithDigestWithKey("key", "value2"), use_ref=True) +# assert not sdp.exists("entry2", obj.get_key(), obj.get_digest()) +# +# sdp.add(evt_digest, "entry2", ObjWithDigestWithKey("key", "value3"), use_ref=True) +# assert not sdp.exists("entry2", obj.get_key(), obj.get_digest()) +# +# sdp.add(evt_digest, "entry2", obj, use_ref=True) +# assert sdp.exists("entry2", obj.get_key(), obj.get_digest()) +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_save_and_load_object_ref_with_history(root): +# sdp = SheerkaDataProvider(root) +# obj = ObjDumpJson("my_key", "value1") +# sdp.serializer.register(JsonSerializer(core.utils.get_full_qualified_name(obj))) +# +# result = sdp.add(evt_digest, "entry", obj, use_ref=True) +# loaded = sdp.get(result.entry, result.key) +# history = getattr(loaded, Serializer.HISTORY) +# +# assert result.obj == obj +# assert result.entry == "entry" +# assert result.key == obj.key +# assert result.digest == obj.get_digest() +# +# assert loaded.key == obj.key +# assert loaded.value == obj.value +# +# assert history[Serializer.USERNAME] == "kodjo" +# assert history[Serializer.MODIFICATION_DATE] != "" +# assert history[Serializer.PARENTS] == [] +# +# assert sdp.io.exists(sdp.io.get_obj_path(sdp.ObjectsFolder, obj.get_digest())) +# +# # save a second type with no modification +# previous_modification_time = history[Serializer.MODIFICATION_DATE] +# previous_parents = history[Serializer.PARENTS] +# +# sdp.modify(evt_digest, "entry", result.key, loaded) +# loaded = sdp.get(result.entry, result.key) +# history = getattr(loaded, Serializer.HISTORY) +# +# assert history[Serializer.MODIFICATION_DATE] == previous_modification_time +# assert history[Serializer.PARENTS] == previous_parents +# +# # save again, but with a modification +# previous_digest = loaded.get_digest() +# loaded.value = "value2" +# +# sdp.modify(evt_digest, "entry", result.key, loaded) +# loaded2 = sdp.get(result.entry, result.key) +# history2 = getattr(loaded2, Serializer.HISTORY) +# +# assert loaded2.key == loaded.key +# assert loaded2.value == loaded.value +# +# assert history2[Serializer.USERNAME] == "kodjo" +# assert history2[Serializer.MODIFICATION_DATE] != "" +# assert history2[Serializer.PARENTS] == [previous_digest] +# +# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) +# assert state.data == {"entry": { +# "my_key": '##REF##:e6bf5b56428cfce0f08c94f2c3625dc3b3a8180d7229eaa9f8aa967fb16e5256'}} +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_can_add_obj_with_same_key_and_get_them_back(root): +# sdp = SheerkaDataProvider(root) +# obj1 = ObjDumpJson("key", "value1") +# obj2 = ObjDumpJson("key", "value2") +# sdp.serializer.register(JsonSerializer(core.utils.get_full_qualified_name(obj1))) +# +# result = sdp.add(evt_digest, "entry", obj1, use_ref=True) +# sdp.add(evt_digest, "entry", obj2, use_ref=True) +# +# loaded = sdp.get(result.entry, result.key) +# +# assert len(loaded) == 2 +# assert loaded[0] == obj1 +# assert loaded[1] == obj2 +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_get_safe_dictionary_without_origin(root): +# sdp = SheerkaDataProvider(root) +# obj = {"my_key": "value1"} +# +# obj_serializer = JsonSerializer(core.utils.get_full_qualified_name(obj)) +# sdp.serializer.register(obj_serializer) +# +# result = sdp.add(evt_digest, "entry", obj, use_ref=True) +# from_db = sdp.get(result.entry, result.key) +# +# assert len(from_db) == 2 +# assert from_db["my_key"] == obj["my_key"] +# assert Serializer.ORIGIN in from_db +# +# from_db_no_origin = sdp.get_safe(result.entry, result.key, load_origin=False) +# assert len(from_db_no_origin) == 1 +# assert from_db_no_origin["my_key"] == obj["my_key"] +# assert Serializer.ORIGIN not in from_db_no_origin +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_get_dictionary_without_origin(root): +# sdp = SheerkaDataProvider(root) +# obj = {"my_key": "value1"} +# +# obj_serializer = JsonSerializer(core.utils.get_full_qualified_name(obj)) +# sdp.serializer.register(obj_serializer) +# +# result = sdp.add(evt_digest, "entry", obj, use_ref=True) +# from_db = sdp.get(result.entry, result.key) +# +# assert len(from_db) == 2 +# assert from_db["my_key"] == obj["my_key"] +# assert Serializer.ORIGIN in from_db +# +# from_db_no_origin = sdp.get(result.entry, result.key, load_origin=False) +# assert len(from_db_no_origin) == 1 +# assert from_db_no_origin["my_key"] == obj["my_key"] +# assert Serializer.ORIGIN not in from_db_no_origin +# +# +# @pytest.mark.parametrize("root", [ +# ".sheerka", +# "mem://" +# ]) +# def test_i_get_safe_object_without_origin(root): +# sdp = SheerkaDataProvider(root) +# obj = ObjDumpJson("my_key", "value1") +# +# obj_serializer = JsonSerializer(core.utils.get_full_qualified_name(obj)) +# sdp.serializer.register(obj_serializer) +# +# result = sdp.add(evt_digest, "entry", obj, use_ref=True) +# from_db = sdp.get(result.entry, result.key) +# +# assert from_db == obj +# assert hasattr(from_db, Serializer.ORIGIN) +# +# from_db_no_origin = sdp.get_safe(result.entry, result.key, load_origin=False) +# assert from_db_no_origin == obj +# assert not hasattr(from_db_no_origin, Serializer.ORIGIN) +# +# +# def test_i_can_get_ref(): +# sdp = SheerkaDataProvider("mem://") +# obj = ObjDumpJson("my_key", "value1") +# +# obj_serializer = JsonSerializer(core.utils.get_full_qualified_name(obj)) +# sdp.serializer.register(obj_serializer) +# +# result = sdp.add(evt_digest, "entry", obj, use_ref=True) +# +# ref = sdp.get_ref(result.entry, result.key) +# assert ref == "076f0df0f110c304982242a88088efacce71f361e49f065db75919a7f72c2821" +# +# +# def test_i_can_get_ref_when_list(): +# sdp = SheerkaDataProvider("mem://") +# +# obj_serializer = JsonSerializer(core.utils.get_full_qualified_name(ObjDumpJson)) +# sdp.serializer.register(obj_serializer) +# +# sdp.add(evt_digest, "entry", ObjDumpJson("my_key", "value1"), use_ref=True) +# result = sdp.add(evt_digest, "entry", ObjDumpJson("my_key", "value2"), use_ref=True) +# +# ref = sdp.get_ref(result.entry, result.key) +# assert ref == [ +# "076f0df0f110c304982242a88088efacce71f361e49f065db75919a7f72c2821", +# "e6bf5b56428cfce0f08c94f2c3625dc3b3a8180d7229eaa9f8aa967fb16e5256" +# ] +# +# +# def test_i_cannot_get_ref_if_the_saved_item_is_not_a_ref(): +# sdp = SheerkaDataProvider("mem://") +# obj = ObjDumpJson("my_key", "value1") +# result = sdp.add(evt_digest, "entry", obj, use_ref=False) +# +# with pytest.raises(SheerkaDataProviderError) as e: +# sdp.get_ref(result.entry, result.key) +# +# assert e.value.args[0] == "Not a reference" +# assert e.value.obj == f"{result.entry}.{result.key}" +# +# +# def test_i_cannot_get_ref_if_the_item_does_not_exist(): +# sdp = SheerkaDataProvider("mem://") +# with pytest.raises(IndexError): +# sdp.get_ref("fake", "fake") diff --git a/tests/sdp/test_sheerkaSerializer.py b/tests/sdp/test_sheerkaSerializer.py index 95e5c3c..a96616f 100644 --- a/tests/sdp/test_sheerkaSerializer.py +++ b/tests/sdp/test_sheerkaSerializer.py @@ -22,14 +22,14 @@ class Obj: def test_i_can_serialize_an_event(): - event = Event("test", user="user", date=datetime.fromisoformat("2019-10-21T10:20:30.999")) + event = Event("test", user_id="user", date=datetime.fromisoformat("2019-10-21T10:20:30.999")) serializer = Serializer() stream = serializer.serialize(event, None) loaded = serializer.deserialize(stream, None) assert event.version == loaded.version - assert event.user == loaded.user + assert event.user_id == loaded.user_id assert event.date == loaded.date assert event.message == loaded.message diff --git a/tests/sheerkapickle/test_sheerka_handlers.py b/tests/sheerkapickle/test_sheerka_handlers.py index c4d8b33..7f6871c 100644 --- a/tests/sheerkapickle/test_sheerka_handlers.py +++ b/tests/sheerkapickle/test_sheerka_handlers.py @@ -1,5 +1,5 @@ import sheerkapickle -from core.builtin_concepts import BuiltinConcepts, UserInputConcept, ReturnValueConcept +from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept from core.concept import Concept, ConceptParts from core.sheerka.ExecutionContext import ExecutionContext from core.tokenizer import Tokenizer @@ -56,10 +56,11 @@ class TestSheerkaPickleHandler(TestUsingMemoryBasedSheerka): assert decoded == concept assert to_string == '{"_sheerka/obj": "core.concept.Concept", "meta.name": "foo", "meta.where": "my_where"}' - concept = set_full_serialization(Concept("foo").def_prop("a", "value_a").def_prop("b", "value_b")) + concept = set_full_serialization(Concept("foo").def_var("a", "value_a").def_var("b", "value_b")) to_string = sheerkapickle.encode(sheerka, concept) decoded = sheerkapickle.decode(sheerka, to_string) assert decoded == concept + assert to_string == '{"_sheerka/obj": "core.concept.Concept", "meta.name": "foo", "meta.variables": [["a", "value_a"], ["b", "value_b"]], "values": [["a", null], ["b", null]]}' concept = Concept("foo").init_key() sheerka.create_new_concept(self.get_context(sheerka), concept) @@ -73,76 +74,76 @@ class TestSheerkaPickleHandler(TestUsingMemoryBasedSheerka): sheerka = self.get_sheerka() concept = set_full_serialization(Concept("foo")) - concept.values[ConceptParts.PRE] = 10 # an int + concept.set_value(ConceptParts.PRE, 10) # an int to_string = sheerkapickle.encode(sheerka, concept) decoded = sheerkapickle.decode(sheerka, to_string) assert decoded == concept - assert to_string == '{"_sheerka/obj": "core.concept.Concept", "meta.name": "foo", "pre": 10}' + assert to_string == '{"_sheerka/obj": "core.concept.Concept", "meta.name": "foo", "values": [["cParts.pre", 10]]}' concept = set_full_serialization(Concept("foo")) - concept.values[ConceptParts.POST] = 'a string' # an string + concept.set_value(ConceptParts.POST, 'a string') # an string to_string = sheerkapickle.encode(sheerka, concept) decoded = sheerkapickle.decode(sheerka, to_string) assert decoded == concept - assert to_string == '{"_sheerka/obj": "core.concept.Concept", "meta.name": "foo", "post": "a string"}' + assert to_string == '{"_sheerka/obj": "core.concept.Concept", "meta.name": "foo", "values": [["cParts.post", "a string"]]}' concept = set_full_serialization(Concept("foo")) - concept.values[ConceptParts.WHERE] = ['a string', 3.14] # a list + concept.set_value(ConceptParts.WHERE, ['a string', 3.14]) # a list to_string = sheerkapickle.encode(sheerka, concept) decoded = sheerkapickle.decode(sheerka, to_string) assert decoded == concept - assert to_string == '{"_sheerka/obj": "core.concept.Concept", "meta.name": "foo", "where": ["a string", 3.14]}' + assert to_string == '{"_sheerka/obj": "core.concept.Concept", "meta.name": "foo", "values": [["cParts.where", ["a string", 3.14]]]}' concept = set_full_serialization(Concept("foo")) - concept.values[ConceptParts.WHERE] = ('a string', 3.14) # a tuple + concept.set_value(ConceptParts.WHERE, ('a string', 3.14)) # a tuple to_string = sheerkapickle.encode(sheerka, concept) decoded = sheerkapickle.decode(sheerka, to_string) assert decoded == concept - assert to_string == '{"_sheerka/obj": "core.concept.Concept", "meta.name": "foo", "where": {"_sheerka/tuple": ["a string", 3.14]}}' + assert to_string == '{"_sheerka/obj": "core.concept.Concept", "meta.name": "foo", "values": [["cParts.where", {"_sheerka/tuple": ["a string", 3.14]}]]}' concept = set_full_serialization(Concept("foo")) - concept.values[ConceptParts.BODY] = set_full_serialization(Concept("foo", body="foo_body")) + concept.set_value(ConceptParts.BODY, set_full_serialization(Concept("foo", body="foo_body"))) to_string = sheerkapickle.encode(sheerka, concept) decoded = sheerkapickle.decode(sheerka, to_string) assert decoded == concept - assert to_string == '{"_sheerka/obj": "core.concept.Concept", "meta.name": "foo", "body": {"_sheerka/obj": "core.concept.Concept", "meta.name": "foo", "meta.body": "foo_body"}}' + assert to_string == '{"_sheerka/obj": "core.concept.Concept", "meta.name": "foo", "values": [["cParts.body", {"_sheerka/obj": "core.concept.Concept", "meta.name": "foo", "meta.body": "foo_body"}]]}' - def test_i_can_encode_decode_unknown_concept_properties(self): + def test_i_can_encode_decode_unknown_concept_variables(self): sheerka = self.get_sheerka() concept = set_full_serialization(Concept("foo")) - concept.set_prop("a", "value_a") # string + concept.set_value("a", "value_a") # string to_string = sheerkapickle.encode(sheerka, concept) decoded = sheerkapickle.decode(sheerka, to_string) assert decoded == concept - assert to_string == '{"_sheerka/obj": "core.concept.Concept", "meta.name": "foo", "props": [["a", "value_a"]]}' + assert to_string == '{"_sheerka/obj": "core.concept.Concept", "meta.name": "foo", "values": [["a", "value_a"]]}' concept = set_full_serialization(Concept("foo")) - concept.set_prop("a", 10) # int + concept.set_value("a", 10) # int to_string = sheerkapickle.encode(sheerka, concept) decoded = sheerkapickle.decode(sheerka, to_string) assert decoded == concept - assert to_string == '{"_sheerka/obj": "core.concept.Concept", "meta.name": "foo", "props": [["a", 10]]}' + assert to_string == '{"_sheerka/obj": "core.concept.Concept", "meta.name": "foo", "values": [["a", 10]]}' concept = set_full_serialization(Concept("foo")) - concept.set_prop("a", set_full_serialization(Concept("bar"))) # another concept + concept.set_value("a", set_full_serialization(Concept("bar"))) # another concept to_string = sheerkapickle.encode(sheerka, concept) decoded = sheerkapickle.decode(sheerka, to_string) assert decoded == concept - assert to_string == '{"_sheerka/obj": "core.concept.Concept", "meta.name": "foo", "props": [["a", {"_sheerka/obj": "core.concept.Concept", "meta.name": "bar"}]]}' + assert to_string == '{"_sheerka/obj": "core.concept.Concept", "meta.name": "foo", "values": [["a", {"_sheerka/obj": "core.concept.Concept", "meta.name": "bar"}]]}' concept = set_full_serialization(Concept("foo")) - concept.set_prop("a", "a").set_prop("b", "b") # at least two props + concept.set_value("a", "a").set_value("b", "b") # at least two variables to_string = sheerkapickle.encode(sheerka, concept) decoded = sheerkapickle.decode(sheerka, to_string) assert decoded == concept - assert to_string == '{"_sheerka/obj": "core.concept.Concept", "meta.name": "foo", "props": [["a", "a"], ["b", "b"]]}' + assert to_string == '{"_sheerka/obj": "core.concept.Concept", "meta.name": "foo", "values": [["a", "a"], ["b", "b"]]}' def test_i_can_encode_decode_known_concepts(self): sheerka = self.get_sheerka() ref_concept = Concept("my_name", True, True, "my_key", "my_body", "my_where", "my_pre", "my_post", "my_def") - ref_concept.def_prop("a", "value_a").def_prop("b", "value_b") + ref_concept.def_var("a", "value_a").def_var("b", "value_b") sheerka.create_new_concept(self.get_context(sheerka), ref_concept) @@ -153,29 +154,30 @@ class TestSheerkaPickleHandler(TestUsingMemoryBasedSheerka): # same test, modify a value and check if this modification is correctly saved concept = Concept().update_from(sheerka.get_by_id(ref_concept.id)) - concept.set_metadata_value(ConceptParts.BODY, set_full_serialization(Concept("bar"))) + concept.set_value(ConceptParts.BODY, set_full_serialization(Concept("bar"))) to_string = sheerkapickle.encode(sheerka, concept) decoded = sheerkapickle.decode(sheerka, to_string) assert decoded == concept - assert to_string == '{"_sheerka/obj": "core.concept.Concept", "concept/id": ["my_key", "1001"], "body": {"_sheerka/obj": "core.concept.Concept", "meta.name": "bar"}}' + assert to_string == '{"_sheerka/obj": "core.concept.Concept", "concept/id": ["my_key", "1001"], "values": [["cParts.body", {"_sheerka/obj": "core.concept.Concept", "meta.name": "bar"}]]}' - def test_i_can_encode_decode_when_property_is_a_concept(self): + def test_i_can_encode_decode_when_variable_is_a_concept(self): sheerka = self.get_sheerka() - foo = Concept("foo").init_key() + foo = Concept("foo") sheerka.create_new_concept(self.get_context(sheerka), foo) - concept = Concept("my_name").init_key() + concept = Concept("my_name") sheerka.create_new_concept(self.get_context(sheerka), concept) - concept.def_prop(foo, "a value") + concept.def_var(foo, "a value") + concept.set_value(foo, "another value") concept.metadata.full_serialization = True to_string = sheerkapickle.encode(sheerka, concept) decoded = sheerkapickle.decode(sheerka, to_string) assert decoded == concept assert to_string == '{"_sheerka/obj": "core.concept.Concept", "meta.name": "my_name", "meta.key": "my_name", ' + \ - '"meta.props": [{"_sheerka/tuple": [{"_sheerka/obj": "core.concept.Concept", "concept/id": ["foo", "1001"]}, "a value"]}], ' + \ - '"meta.id": "1002", "props": [[{"_sheerka/id": 1}, null]]}' + '"meta.variables": [[{"_sheerka/obj": "core.concept.Concept", "concept/id": ["foo", "1001"]}, "a value"]], ' + \ + '"meta.id": "1002", "values": [[{"_sheerka/id": 1}, "another value"]]}' def test_i_can_manage_reference_of_the_same_object(self): sheerka = self.get_sheerka() @@ -183,13 +185,13 @@ class TestSheerkaPickleHandler(TestUsingMemoryBasedSheerka): concept_ref = set_full_serialization(Concept("foo")) concept = set_full_serialization(Concept("bar")) - concept.set_metadata_value(ConceptParts.PRE, concept_ref) - concept.set_metadata_value(ConceptParts.BODY, concept_ref) + concept.set_value(ConceptParts.PRE, concept_ref) + concept.set_value(ConceptParts.BODY, concept_ref) to_string = sheerkapickle.encode(sheerka, concept) decoded = sheerkapickle.decode(sheerka, to_string) assert decoded == concept - assert to_string == '{"_sheerka/obj": "core.concept.Concept", "meta.name": "bar", "pre": {"_sheerka/obj": "core.concept.Concept", "meta.name": "foo"}, "body": {"_sheerka/id": 1}}' + assert to_string == '{"_sheerka/obj": "core.concept.Concept", "meta.name": "bar", "values": [["cParts.pre", {"_sheerka/obj": "core.concept.Concept", "meta.name": "foo"}], ["cParts.body", {"_sheerka/id": 1}]]}' def test_i_can_encode_decode_user_input(self): sheerka = self.get_sheerka() @@ -199,7 +201,7 @@ class TestSheerkaPickleHandler(TestUsingMemoryBasedSheerka): to_string = sheerkapickle.encode(sheerka, user_input) decoded = sheerkapickle.decode(sheerka, to_string) assert decoded == user_input - assert to_string == '{"_sheerka/obj": "core.builtin_concepts.UserInputConcept", "concept/id": ["__USER_INPUT", null], "user_name": "my_user_name", "text": "my_text"}' + assert to_string == '{"_sheerka/obj": "core.builtin_concepts.UserInputConcept", "concept/id": ["__USER_INPUT", "11"], "user_name": "my_user_name", "text": "my_text"}' def test_i_can_encode_decode_user_input_when_tokens(self): sheerka = self.get_sheerka() @@ -210,8 +212,8 @@ class TestSheerkaPickleHandler(TestUsingMemoryBasedSheerka): to_string = sheerkapickle.encode(sheerka, user_input) decoded = sheerkapickle.decode(sheerka, to_string) - assert decoded == UserInputConcept(text, "my_user_name") - assert to_string == '{' + f'"_sheerka/obj": "core.builtin_concepts.UserInputConcept", "concept/id": ["__USER_INPUT", null], "user_name": "my_user_name", "text": "{text}"' + '}' + assert decoded == sheerka.new(BuiltinConcepts.USER_INPUT, body=text, user_name="my_user_name") + assert to_string == '{' + f'"_sheerka/obj": "core.builtin_concepts.UserInputConcept", "concept/id": ["__USER_INPUT", "11"], "user_name": "my_user_name", "text": "{text}"' + '}' def test_i_can_encode_decode_return_value(self): sheerka = self.get_sheerka() @@ -221,7 +223,7 @@ class TestSheerkaPickleHandler(TestUsingMemoryBasedSheerka): to_string = sheerkapickle.encode(sheerka, ret_val) decoded = sheerkapickle.decode(sheerka, to_string) assert decoded == ret_val - assert to_string == '{"_sheerka/obj": "core.builtin_concepts.ReturnValueConcept", "concept/id": ["__RETURN_VALUE", null], "who": "who", "status": true, "value": 10}' + assert to_string == '{"_sheerka/obj": "core.builtin_concepts.ReturnValueConcept", "concept/id": ["__RETURN_VALUE", "16"], "who": "who", "status": true, "value": 10}' def test_i_can_encode_decode_return_value_with_parent(self): sheerka = self.get_sheerka() @@ -234,7 +236,7 @@ class TestSheerkaPickleHandler(TestUsingMemoryBasedSheerka): decoded = sheerkapickle.decode(sheerka, to_string) assert decoded == ret_val assert decoded.parents == ret_val.parents - id_str = ', "concept/id": ["__RETURN_VALUE", null]' + id_str = ', "concept/id": ["__RETURN_VALUE", "16"]' parents_str = '[{"_sheerka/obj": "core.builtin_concepts.ReturnValueConcept"' + id_str + ', "who": "parent_who", "status": true, "value": "10"}, {"_sheerka/id": 1}]' assert to_string == '{"_sheerka/obj": "core.builtin_concepts.ReturnValueConcept"' + id_str + ', "who": "who", "status": true, "value": 10, "parents": ' + parents_str + '}'