From f0d98d23ff3f9fd9da5d8bffa8a39a94a1788521 Mon Sep 17 00:00:00 2001 From: Kodjo Sossouvi Date: Tue, 5 Aug 2025 19:45:25 +0200 Subject: [PATCH] I can finally chain Processor calls --- src/core/preprocessor.py | 189 ++++++++++++++ src/workflow/engine.py | 17 +- tests/test_preprocessor.py | 491 +++++++++++++++++++++++++++++++++++++ 3 files changed, 695 insertions(+), 2 deletions(-) create mode 100644 src/core/preprocessor.py create mode 100644 tests/test_preprocessor.py diff --git a/src/core/preprocessor.py b/src/core/preprocessor.py new file mode 100644 index 0000000..f4fc4b2 --- /dev/null +++ b/src/core/preprocessor.py @@ -0,0 +1,189 @@ +from arpeggio import RegExMatch, ZeroOrMore, OneOrMore, ParserPython, EOF, NoMatch + + +class VariableParsingError(Exception): + """Custom exception for variable parsing errors""" + + def __init__(self, message, position): + self.message = message + self.position = position + super().__init__(f"Variable parsing error at position {position}: {message}") + + +class VariableProcessingError(Exception): + """Custom exception for variable parsing errors""" + + def __init__(self, message, position): + self.message = message + self.position = position + super().__init__(f"Variable processing error at position {position}: {message}") + + +def variable_name(): + """Variable name: alphanumeric characters and underscores""" + return RegExMatch(r'[a-zA-Z_][a-zA-Z0-9_]*') + + +def property_name(): + """Property name: same rules as variable name""" + return RegExMatch(r'[a-zA-Z_][a-zA-Z0-9_]*') + + +def variable_property(): + """A property access: .property_name""" + return ".", property_name + + +def variable(): + """A complete variable: $variable_name(.property)*""" + return "$", variable_name, ZeroOrMore(variable_property) + + +def text_char(): + """Any character that is not the start of a variable""" + return RegExMatch(r'[^$]') + + +def text_segment(): + """One or more non-variable characters""" + return OneOrMore(text_char) + + +def element(): + """Either a variable or a text segment""" + return [variable, text_segment] + + +def expression(): + """Complete expression: sequence of elements""" + return ZeroOrMore(element), EOF + + +class PlainTextPreprocessor: + def __init__(self): + self.parser = ParserPython(expression, debug=False, skipws=False) + + @staticmethod + def _post_validation(elements): + if len(elements) < 2: + return + + for element, next_element in [(element, elements[i + 1]) for i, element in enumerate(elements[:-1])]: + if element['type'] == 'variable' and next_element['type'] == 'variable': + raise VariableParsingError("Invalid syntax.", next_element['start']) + + @staticmethod + def _extract_elements_from_tree(parse_tree, original_text): + """Extract elements with positions from the parse tree""" + elements = [] + + def process_node(node, current_pos=0): + nonlocal elements + + if hasattr(node, 'rule_name'): + if node.rule_name == 'variable': + # Extract variable information + var_start = node.position + var_end = node.position_end + var_text = original_text[var_start:var_end] + + parts = var_text[1:].split('.') # Remove $ and split by . + var_name = parts[0] + properties = parts[1:] if len(parts) > 1 else [] + + elements.append({ + "type": "variable", + "name": var_name, + "properties": properties, + "start": var_start, + "end": var_end + }) + + elif node.rule_name == 'text_segment': + # Extract text segment + text_start = node.position + text_end = node.position_end + content = original_text[text_start:text_end] + + stripped = content.strip() + if len(stripped) > 0 and stripped[0] == '.': + raise VariableParsingError("Invalid syntax in property name.", text_start) + + elements.append({ + "type": "text", + "content": content, + "start": text_start, + "end": text_end + }) + + elif node.rule_name in ('expression', 'element'): + for child in node: + process_node(child, current_pos) + + # Process children + if hasattr(node, '_tx_children') and node._tx_children: + for child in node._tx_children: + process_node(child, current_pos) + + process_node(parse_tree) + return elements + + def parse(self, text): + """ + Parse text and return structure with text segments and variables with positions + + Returns: + [ + {"type": "text", "content": "...", "start": int, "end": int}, + {"type": "variable", "name": "...", "properties": [...], "start": int, "end": int} + ] + """ + if not text: + return [] + + try: + # Parse the text + parse_tree = self.parser.parse(text) + + # Extract elements from parse tree + elements = self._extract_elements_from_tree(parse_tree, text) + + # Extra validations + self._post_validation(elements) + + # Sort elements by start position + elements.sort(key=lambda x: x['start']) + + return elements + + except NoMatch as e: + # Convert Arpeggio parsing errors to our custom error + raise VariableParsingError(f"Invalid syntax", e.position) + except Exception as e: + if isinstance(e, VariableParsingError): + raise + raise VariableParsingError(f"Parsing failed: {str(e)}", 0) + + def preprocess(self, text, namepace): + result = "" + elements = self.parse(text) + for element in elements: + if element['type'] == 'text': + result += element['content'] + elif element['type'] == 'variable': + value = namepace.get(element['name']) + if value is None: + raise VariableProcessingError(f"Variable '{element['name']}' is not defined.", element['start']) + + try: + pos = element['start'] + len(element['name']) + 1 # +1 for the starting '$' + for property_name in element['properties']: + value = getattr(value, property_name) + pos += len(property_name) + 1 # +1 for the dot '.' + except AttributeError as e: + raise VariableProcessingError(f"Invalid property '{property_name}' for variable '{element['name']}'.", + pos) from e + + result += str(value) + + return result diff --git a/src/workflow/engine.py b/src/workflow/engine.py index ce60a71..d841bab 100644 --- a/src/workflow/engine.py +++ b/src/workflow/engine.py @@ -1,10 +1,12 @@ import ast +import logging from abc import ABC, abstractmethod from typing import Any, Generator from components.admin.admin_db_manager import AdminDbManager from core.Expando import Expando from core.jira import Jira +from core.preprocessor import PlainTextPreprocessor from core.utils import UnreferencedNamesVisitor from utils.Datahelper import DataHelper @@ -92,6 +94,8 @@ class TableDataProducer(DataProducer): class JiraDataProducer(DataProducer): """Base class for data producers that emit data from Jira.""" + logger = logging.getLogger("DataProcessor.Producer.Jira") + def __init__(self, session, settings_manager, component_id, request_type='issues', request='', fields=None): super().__init__(component_id) self._session = session @@ -102,11 +106,20 @@ class JiraDataProducer(DataProducer): self.db = AdminDbManager(session, settings_manager).jira def emit(self, data: Any = None) -> Generator[Any, None, None]: - jira = Jira(self.db.user_name, self.db.api_token, fields=self.fields) + self.logger.debug(f"Emitting data from Jira: {self.request_type} {self.request} {self.fields}") + + preprocessor = PlainTextPreprocessor() + preprocessed_fields = preprocessor.preprocess(self.fields, {"data": data}) + self.logger.debug(f" {preprocessed_fields=}") + + jira = Jira(self.db.user_name, self.db.api_token, fields=preprocessed_fields) if not hasattr(jira, self.request_type): raise ValueError(f"Invalid request type: {self.request_type}") - yield from getattr(jira, self.request_type)(self.request) + preprocessed_request = preprocessor.preprocess(self.request, {"data": data}) + self.logger.debug(f" {preprocessed_request=}") + + yield from getattr(jira, self.request_type)(preprocessed_request) class DefaultDataFilter(DataFilter): diff --git a/tests/test_preprocessor.py b/tests/test_preprocessor.py new file mode 100644 index 0000000..1a5a064 --- /dev/null +++ b/tests/test_preprocessor.py @@ -0,0 +1,491 @@ +import pytest + +from core.preprocessor import PlainTextPreprocessor, VariableParsingError, VariableProcessingError + + +def test_i_can_parse_empty_text(): + """Test that I can parse empty text input""" + processor = PlainTextPreprocessor() + result = processor.parse("") + assert result == [] + + +def test_i_can_parse_text_without_variables(): + """Test that I can parse text without any variables""" + processor = PlainTextPreprocessor() + text = "This is just plain text with no variables" + result = processor.parse(text) + + expected = [{ + "type": "text", + "content": text, + "start": 0, + "end": len(text) + }] + + assert result == expected + + +def test_i_can_parse_simple_variable(): + """Test that I can parse text with only a simple variable""" + processor = PlainTextPreprocessor() + text = "$variable" + result = processor.parse(text) + + expected = [{ + "type": "variable", + "name": "variable", + "properties": [], + "start": 0, + "end": 9 + }] + + assert result == expected + + +def test_i_can_parse_variable_with_underscores(): + """Test that I can parse variable with underscores in name""" + processor = PlainTextPreprocessor() + text = "$my_variable_name" + result = processor.parse(text) + + expected = [{ + "type": "variable", + "name": "my_variable_name", + "properties": [], + "start": 0, + "end": 17 + }] + + assert result == expected + + +def test_i_can_parse_variable_with_numbers(): + """Test that I can parse variable with numbers in name""" + processor = PlainTextPreprocessor() + text = "$var123" + result = processor.parse(text) + + expected = [{ + "type": "variable", + "name": "var123", + "properties": [], + "start": 0, + "end": 7 + }] + assert result == expected + + +def test_i_can_parse_properties_with_underscores_and_numbers(): + """Test that I can parse property names with underscores and numbers""" + processor = PlainTextPreprocessor() + text = "$var._prop123.sub_prop_456" + result = processor.parse(text) + + expected = [{ + "type": "variable", + "name": "var", + "properties": ["_prop123", "sub_prop_456"], + "start": 0, + "end": 26 + }] + + assert result == expected + + +def test_i_can_parse_variable_starting_with_underscore(): + """Test that I can parse variable name starting with underscore""" + processor = PlainTextPreprocessor() + text = "$_private_var" + result = processor.parse(text) + + expected = [ + { + "type": "variable", + "name": "_private_var", + "properties": [], + "start": 0, + "end": 13 + } + ] + assert result == expected + + +def test_i_can_parse_variable_with_single_property(): + """Test that I can parse variable with one property""" + processor = PlainTextPreprocessor() + text = "$variable.prop" + result = processor.parse(text) + + expected = [{ + "type": "variable", + "name": "variable", + "properties": ["prop"], + "start": 0, + "end": 14 + }] + assert result == expected + + +def test_i_can_parse_variable_with_multiple_properties(): + """Test that I can parse variable with multiple properties""" + processor = PlainTextPreprocessor() + text = "$variable.prop.subprop.deep" + result = processor.parse(text) + + expected = [{ + "type": "variable", + "name": "variable", + "properties": ["prop", "subprop", "deep"], + "start": 0, + "end": 27 + }] + + assert result == expected + + +def test_i_can_parse_text_with_variable_in_middle(): + """Test that I can parse text with variable in the middle""" + processor = PlainTextPreprocessor() + text = "project > $project_id and more" + result = processor.parse(text) + + expected = [ + { + "type": "text", + "content": "project > ", + "start": 0, + "end": 10 + }, + { + "type": "variable", + "name": "project_id", + "properties": [], + "start": 10, + "end": 21 + }, + { + "type": "text", + "content": " and more", + "start": 21, + "end": 30 + } + ] + + assert result == expected + + +def test_i_can_parse_multiple_variables(): + """Test that I can parse text with multiple variables""" + processor = PlainTextPreprocessor() + text = "value == $variable.prop and $other_var" + result = processor.parse(text) + + expected = [ + { + "type": "text", + "content": "value == ", + "start": 0, + "end": 9 + }, + { + "type": "variable", + "name": "variable", + "properties": ["prop"], + "start": 9, + "end": 23 + }, + { + "type": "text", + "content": " and ", + "start": 23, + "end": 28 + }, + { + "type": "variable", + "name": "other_var", + "properties": [], + "start": 28, + "end": 38 + } + ] + assert result == expected + + +def test_i_can_preserve_all_whitespace(): + """Test that I can preserve all whitespace including tabs and newlines""" + processor = PlainTextPreprocessor() + text = " $var \t\n $other.prop " + result = processor.parse(text) + + expected = [ + { + "type": "text", + "content": " ", + "start": 0, + "end": 2 + }, + { + "type": "variable", + "name": "var", + "properties": [], + "start": 2, + "end": 6 + }, + { + "type": "text", + "content": " \t\n ", + "start": 6, + "end": 12 + }, + { + "type": "variable", + "name": "other", + "properties": ["prop"], + "start": 12, + "end": 23 + }, + { + "type": "text", + "content": " ", + "start": 23, + "end": 25 + } + ] + assert result == expected + + +def test_i_can_parse_text_with_special_characters(): + """Test that I can parse text with special characters""" + processor = PlainTextPreprocessor() + text = "Hello $user! @#%^&*()+={}[]|\\:;\"'<>?,./~`" + result = processor.parse(text) + + expected = [ + { + "type": "text", + "content": "Hello ", + "start": 0, + "end": 6 + }, + { + "type": "variable", + "name": "user", + "properties": [], + "start": 6, + "end": 11 + }, + { + "type": "text", + "content": "! @#%^&*()+={}[]|\\:;\"'<>?,./~`", + "start": 11, + "end": 41 + } + ] + assert result == expected + + +def test_i_can_parse_complex_expression(): + """Test that I can parse complex but valid expression""" + processor = PlainTextPreprocessor() + text = "if ($user.profile.age > 18 && $user.status == 'active') { $action.execute(); }" + result = processor.parse(text) + + # Should parse successfully and find all variables + variables = [elem for elem in result if elem["type"] == "variable"] + assert len(variables) == 3 + + # Check variable details + assert variables[0]["name"] == "user" + assert variables[0]["properties"] == ["profile", "age"] + + assert variables[1]["name"] == "user" + assert variables[1]["properties"] == ["status"] + + assert variables[2]["name"] == "action" + assert variables[2]["properties"] == ["execute"] + + +def test_positions_are_accurate(): + """Test that element positions are accurate""" + processor = PlainTextPreprocessor() + text = "abc$var123*def" + result = processor.parse(text) + + assert len(result) == 3 + + # Text before + assert result[0]["start"] == 0 + assert result[0]["end"] == 3 + assert result[0]["content"] == "abc" + + # Variable + assert result[1]["start"] == 3 + assert result[1]["end"] == 10 + assert result[1]["name"] == "var123" + + # Text after + assert result[2]["start"] == 10 + assert result[2]["end"] == 14 + assert result[2]["content"] == "*def" + + +# Error cases +def test_i_cannot_parse_dollar_alone_at_end(): + """Test that I cannot parse $ at the end of text""" + processor = PlainTextPreprocessor() + text = "Hello $" + with pytest.raises(VariableParsingError) as exc_info: + processor.parse(text) + + assert exc_info.value.position == 7 + assert "Invalid syntax" in str(exc_info.value) + # assert "Variable name missing after '$'" in str(exc_info.value) + + +def test_i_cannot_parse_dollar_alone_in_middle(): + """Test that I cannot parse $ alone in middle of text""" + processor = PlainTextPreprocessor() + text = "Hello $ world" + with pytest.raises(VariableParsingError) as exc_info: + processor.parse(text) + + assert exc_info.value.position == 7 + assert "Invalid syntax" in str(exc_info.value) + + +def test_i_cannot_parse_dot_immediately_after_dollar(): + """Test that I cannot parse $.property (dot immediately after $)""" + processor = PlainTextPreprocessor() + text = "$.property" + with pytest.raises(VariableParsingError) as exc_info: + processor.parse(text) + + assert exc_info.value.position == 1 + assert "Invalid syntax" in str(exc_info.value) + # assert "Variable name missing before '.'" in str(exc_info.value) + + +def test_i_cannot_parse_variable_ending_with_dot(): + """Test that I cannot parse $variable. (dot at the end)""" + processor = PlainTextPreprocessor() + text = "$variable." + with pytest.raises(VariableParsingError) as exc_info: + processor.parse(text) + + assert exc_info.value.position == 9 + assert "Invalid syntax in property name." in str(exc_info.value) + + +@pytest.mark.parametrize("text", ["$variable. prop", "$variable .prop", "$variable . prop"]) +def test_i_cannot_parse_variable_when_space_in_variable_name(text): + """Test that I cannot parse $variable. (dot at the end)""" + processor = PlainTextPreprocessor() + # text = "$variable. " + with pytest.raises(VariableParsingError) as exc_info: + processor.parse(text) + + assert exc_info.value.position == 9 + assert "Invalid syntax in property name." in str(exc_info.value) + + +def test_i_cannot_parse_variable_with_empty_property(): + """Test that I cannot parse $variable..property (empty property between dots)""" + processor = PlainTextPreprocessor() + text = "$variable..property" + with pytest.raises(VariableParsingError) as exc_info: + processor.parse(text) + + assert exc_info.value.position == 9 + assert "Invalid syntax in property name." in str(exc_info.value) + + +def test_i_cannot_parse_variable_ending_with_multiple_dots(): + """Test that I cannot parse $variable... (multiple dots at end)""" + processor = PlainTextPreprocessor() + text = "$variable..." + with pytest.raises(VariableParsingError) as exc_info: + processor.parse(text) + + assert exc_info.value.position == 9 + assert "Invalid syntax in property name." in str(exc_info.value) + + +def test_i_cannot_parse_when_consecutive_variables(): + """Test that I can parse consecutive variables without text between""" + processor = PlainTextPreprocessor() + text = "$var1$var2" + + with pytest.raises(VariableParsingError) as exc_info: + processor.parse(text) + + assert exc_info.value.position == 5 + assert "Invalid syntax." in str(exc_info.value) + + +def test_first_error_is_reported_with_multiple_errors(): + """Test that first error is reported when multiple $ errors exist""" + processor = PlainTextPreprocessor() + text = "$ and $. and $var." + with pytest.raises(VariableParsingError) as exc_info: + processor.parse(text) + + # Should report the first error ($ alone) + assert exc_info.value.position == 1 + + +def test_i_can_preprocess_simple_variable(): + """Test preprocessing text with a simple variable""" + processor = PlainTextPreprocessor() + namespace = {"name": "John"} + result = processor.preprocess("Hello $name!", namespace) + assert result == "Hello John!" + + +def test_i_can_preprocess_with_properties(): + """Test preprocessing text with variable properties""" + + class User: + def __init__(self): + self.profile = type('Profile', (), {'age': 25})() + + processor = PlainTextPreprocessor() + namespace = {"user": User()} + result = processor.preprocess("Age: $user.profile.age", namespace) + assert result == "Age: 25" + + +def test_i_can_preprocess_multiple_variables(): + """Test preprocessing text with multiple variables""" + processor = PlainTextPreprocessor() + namespace = {"first": "Hello", "second": "World"} + result = processor.preprocess("$first $second!", namespace) + assert result == "Hello World!" + + +def test_i_can_preprocess_empty_text(): + """Test preprocessing empty text""" + processor = PlainTextPreprocessor() + namespace = {} + result = processor.preprocess("", namespace) + assert result == "" + + +def test_i_cannot_preprocess_undefined_variable(): + """Test preprocessing with undefined variable raises error""" + processor = PlainTextPreprocessor() + namespace = {} + with pytest.raises(VariableProcessingError) as exc_info: + processor.preprocess("$undefined_var", namespace) + assert "Variable 'undefined_var' is not defined" in str(exc_info.value) + + +def test_i_cannot_preprocess_invalid_property(): + """Test preprocessing with invalid property access""" + processor = PlainTextPreprocessor() + namespace = {"obj": object()} + with pytest.raises(VariableProcessingError) as exc_info: + processor.preprocess("some text $obj.invalid_prop", namespace) + + assert "Invalid property 'invalid_prop' for variable 'obj'" in str(exc_info.value) + assert exc_info.value.position == 14