I can finally chain Processor calls

This commit is contained in:
2025-08-05 19:45:25 +02:00
parent 64e7c44a7d
commit f0d98d23ff
3 changed files with 695 additions and 2 deletions

189
src/core/preprocessor.py Normal file
View File

@@ -0,0 +1,189 @@
from arpeggio import RegExMatch, ZeroOrMore, OneOrMore, ParserPython, EOF, NoMatch
class VariableParsingError(Exception):
"""Custom exception for variable parsing errors"""
def __init__(self, message, position):
self.message = message
self.position = position
super().__init__(f"Variable parsing error at position {position}: {message}")
class VariableProcessingError(Exception):
"""Custom exception for variable parsing errors"""
def __init__(self, message, position):
self.message = message
self.position = position
super().__init__(f"Variable processing error at position {position}: {message}")
def variable_name():
"""Variable name: alphanumeric characters and underscores"""
return RegExMatch(r'[a-zA-Z_][a-zA-Z0-9_]*')
def property_name():
"""Property name: same rules as variable name"""
return RegExMatch(r'[a-zA-Z_][a-zA-Z0-9_]*')
def variable_property():
"""A property access: .property_name"""
return ".", property_name
def variable():
"""A complete variable: $variable_name(.property)*"""
return "$", variable_name, ZeroOrMore(variable_property)
def text_char():
"""Any character that is not the start of a variable"""
return RegExMatch(r'[^$]')
def text_segment():
"""One or more non-variable characters"""
return OneOrMore(text_char)
def element():
"""Either a variable or a text segment"""
return [variable, text_segment]
def expression():
"""Complete expression: sequence of elements"""
return ZeroOrMore(element), EOF
class PlainTextPreprocessor:
def __init__(self):
self.parser = ParserPython(expression, debug=False, skipws=False)
@staticmethod
def _post_validation(elements):
if len(elements) < 2:
return
for element, next_element in [(element, elements[i + 1]) for i, element in enumerate(elements[:-1])]:
if element['type'] == 'variable' and next_element['type'] == 'variable':
raise VariableParsingError("Invalid syntax.", next_element['start'])
@staticmethod
def _extract_elements_from_tree(parse_tree, original_text):
"""Extract elements with positions from the parse tree"""
elements = []
def process_node(node, current_pos=0):
nonlocal elements
if hasattr(node, 'rule_name'):
if node.rule_name == 'variable':
# Extract variable information
var_start = node.position
var_end = node.position_end
var_text = original_text[var_start:var_end]
parts = var_text[1:].split('.') # Remove $ and split by .
var_name = parts[0]
properties = parts[1:] if len(parts) > 1 else []
elements.append({
"type": "variable",
"name": var_name,
"properties": properties,
"start": var_start,
"end": var_end
})
elif node.rule_name == 'text_segment':
# Extract text segment
text_start = node.position
text_end = node.position_end
content = original_text[text_start:text_end]
stripped = content.strip()
if len(stripped) > 0 and stripped[0] == '.':
raise VariableParsingError("Invalid syntax in property name.", text_start)
elements.append({
"type": "text",
"content": content,
"start": text_start,
"end": text_end
})
elif node.rule_name in ('expression', 'element'):
for child in node:
process_node(child, current_pos)
# Process children
if hasattr(node, '_tx_children') and node._tx_children:
for child in node._tx_children:
process_node(child, current_pos)
process_node(parse_tree)
return elements
def parse(self, text):
"""
Parse text and return structure with text segments and variables with positions
Returns:
[
{"type": "text", "content": "...", "start": int, "end": int},
{"type": "variable", "name": "...", "properties": [...], "start": int, "end": int}
]
"""
if not text:
return []
try:
# Parse the text
parse_tree = self.parser.parse(text)
# Extract elements from parse tree
elements = self._extract_elements_from_tree(parse_tree, text)
# Extra validations
self._post_validation(elements)
# Sort elements by start position
elements.sort(key=lambda x: x['start'])
return elements
except NoMatch as e:
# Convert Arpeggio parsing errors to our custom error
raise VariableParsingError(f"Invalid syntax", e.position)
except Exception as e:
if isinstance(e, VariableParsingError):
raise
raise VariableParsingError(f"Parsing failed: {str(e)}", 0)
def preprocess(self, text, namepace):
result = ""
elements = self.parse(text)
for element in elements:
if element['type'] == 'text':
result += element['content']
elif element['type'] == 'variable':
value = namepace.get(element['name'])
if value is None:
raise VariableProcessingError(f"Variable '{element['name']}' is not defined.", element['start'])
try:
pos = element['start'] + len(element['name']) + 1 # +1 for the starting '$'
for property_name in element['properties']:
value = getattr(value, property_name)
pos += len(property_name) + 1 # +1 for the dot '.'
except AttributeError as e:
raise VariableProcessingError(f"Invalid property '{property_name}' for variable '{element['name']}'.",
pos) from e
result += str(value)
return result

View File

@@ -1,10 +1,12 @@
import ast
import logging
from abc import ABC, abstractmethod
from typing import Any, Generator
from components.admin.admin_db_manager import AdminDbManager
from core.Expando import Expando
from core.jira import Jira
from core.preprocessor import PlainTextPreprocessor
from core.utils import UnreferencedNamesVisitor
from utils.Datahelper import DataHelper
@@ -92,6 +94,8 @@ class TableDataProducer(DataProducer):
class JiraDataProducer(DataProducer):
"""Base class for data producers that emit data from Jira."""
logger = logging.getLogger("DataProcessor.Producer.Jira")
def __init__(self, session, settings_manager, component_id, request_type='issues', request='', fields=None):
super().__init__(component_id)
self._session = session
@@ -102,11 +106,20 @@ class JiraDataProducer(DataProducer):
self.db = AdminDbManager(session, settings_manager).jira
def emit(self, data: Any = None) -> Generator[Any, None, None]:
jira = Jira(self.db.user_name, self.db.api_token, fields=self.fields)
self.logger.debug(f"Emitting data from Jira: {self.request_type} {self.request} {self.fields}")
preprocessor = PlainTextPreprocessor()
preprocessed_fields = preprocessor.preprocess(self.fields, {"data": data})
self.logger.debug(f" {preprocessed_fields=}")
jira = Jira(self.db.user_name, self.db.api_token, fields=preprocessed_fields)
if not hasattr(jira, self.request_type):
raise ValueError(f"Invalid request type: {self.request_type}")
yield from getattr(jira, self.request_type)(self.request)
preprocessed_request = preprocessor.preprocess(self.request, {"data": data})
self.logger.debug(f" {preprocessed_request=}")
yield from getattr(jira, self.request_type)(preprocessed_request)
class DefaultDataFilter(DataFilter):

491
tests/test_preprocessor.py Normal file
View File

@@ -0,0 +1,491 @@
import pytest
from core.preprocessor import PlainTextPreprocessor, VariableParsingError, VariableProcessingError
def test_i_can_parse_empty_text():
"""Test that I can parse empty text input"""
processor = PlainTextPreprocessor()
result = processor.parse("")
assert result == []
def test_i_can_parse_text_without_variables():
"""Test that I can parse text without any variables"""
processor = PlainTextPreprocessor()
text = "This is just plain text with no variables"
result = processor.parse(text)
expected = [{
"type": "text",
"content": text,
"start": 0,
"end": len(text)
}]
assert result == expected
def test_i_can_parse_simple_variable():
"""Test that I can parse text with only a simple variable"""
processor = PlainTextPreprocessor()
text = "$variable"
result = processor.parse(text)
expected = [{
"type": "variable",
"name": "variable",
"properties": [],
"start": 0,
"end": 9
}]
assert result == expected
def test_i_can_parse_variable_with_underscores():
"""Test that I can parse variable with underscores in name"""
processor = PlainTextPreprocessor()
text = "$my_variable_name"
result = processor.parse(text)
expected = [{
"type": "variable",
"name": "my_variable_name",
"properties": [],
"start": 0,
"end": 17
}]
assert result == expected
def test_i_can_parse_variable_with_numbers():
"""Test that I can parse variable with numbers in name"""
processor = PlainTextPreprocessor()
text = "$var123"
result = processor.parse(text)
expected = [{
"type": "variable",
"name": "var123",
"properties": [],
"start": 0,
"end": 7
}]
assert result == expected
def test_i_can_parse_properties_with_underscores_and_numbers():
"""Test that I can parse property names with underscores and numbers"""
processor = PlainTextPreprocessor()
text = "$var._prop123.sub_prop_456"
result = processor.parse(text)
expected = [{
"type": "variable",
"name": "var",
"properties": ["_prop123", "sub_prop_456"],
"start": 0,
"end": 26
}]
assert result == expected
def test_i_can_parse_variable_starting_with_underscore():
"""Test that I can parse variable name starting with underscore"""
processor = PlainTextPreprocessor()
text = "$_private_var"
result = processor.parse(text)
expected = [
{
"type": "variable",
"name": "_private_var",
"properties": [],
"start": 0,
"end": 13
}
]
assert result == expected
def test_i_can_parse_variable_with_single_property():
"""Test that I can parse variable with one property"""
processor = PlainTextPreprocessor()
text = "$variable.prop"
result = processor.parse(text)
expected = [{
"type": "variable",
"name": "variable",
"properties": ["prop"],
"start": 0,
"end": 14
}]
assert result == expected
def test_i_can_parse_variable_with_multiple_properties():
"""Test that I can parse variable with multiple properties"""
processor = PlainTextPreprocessor()
text = "$variable.prop.subprop.deep"
result = processor.parse(text)
expected = [{
"type": "variable",
"name": "variable",
"properties": ["prop", "subprop", "deep"],
"start": 0,
"end": 27
}]
assert result == expected
def test_i_can_parse_text_with_variable_in_middle():
"""Test that I can parse text with variable in the middle"""
processor = PlainTextPreprocessor()
text = "project > $project_id and more"
result = processor.parse(text)
expected = [
{
"type": "text",
"content": "project > ",
"start": 0,
"end": 10
},
{
"type": "variable",
"name": "project_id",
"properties": [],
"start": 10,
"end": 21
},
{
"type": "text",
"content": " and more",
"start": 21,
"end": 30
}
]
assert result == expected
def test_i_can_parse_multiple_variables():
"""Test that I can parse text with multiple variables"""
processor = PlainTextPreprocessor()
text = "value == $variable.prop and $other_var"
result = processor.parse(text)
expected = [
{
"type": "text",
"content": "value == ",
"start": 0,
"end": 9
},
{
"type": "variable",
"name": "variable",
"properties": ["prop"],
"start": 9,
"end": 23
},
{
"type": "text",
"content": " and ",
"start": 23,
"end": 28
},
{
"type": "variable",
"name": "other_var",
"properties": [],
"start": 28,
"end": 38
}
]
assert result == expected
def test_i_can_preserve_all_whitespace():
"""Test that I can preserve all whitespace including tabs and newlines"""
processor = PlainTextPreprocessor()
text = " $var \t\n $other.prop "
result = processor.parse(text)
expected = [
{
"type": "text",
"content": " ",
"start": 0,
"end": 2
},
{
"type": "variable",
"name": "var",
"properties": [],
"start": 2,
"end": 6
},
{
"type": "text",
"content": " \t\n ",
"start": 6,
"end": 12
},
{
"type": "variable",
"name": "other",
"properties": ["prop"],
"start": 12,
"end": 23
},
{
"type": "text",
"content": " ",
"start": 23,
"end": 25
}
]
assert result == expected
def test_i_can_parse_text_with_special_characters():
"""Test that I can parse text with special characters"""
processor = PlainTextPreprocessor()
text = "Hello $user! @#%^&*()+={}[]|\\:;\"'<>?,./~`"
result = processor.parse(text)
expected = [
{
"type": "text",
"content": "Hello ",
"start": 0,
"end": 6
},
{
"type": "variable",
"name": "user",
"properties": [],
"start": 6,
"end": 11
},
{
"type": "text",
"content": "! @#%^&*()+={}[]|\\:;\"'<>?,./~`",
"start": 11,
"end": 41
}
]
assert result == expected
def test_i_can_parse_complex_expression():
"""Test that I can parse complex but valid expression"""
processor = PlainTextPreprocessor()
text = "if ($user.profile.age > 18 && $user.status == 'active') { $action.execute(); }"
result = processor.parse(text)
# Should parse successfully and find all variables
variables = [elem for elem in result if elem["type"] == "variable"]
assert len(variables) == 3
# Check variable details
assert variables[0]["name"] == "user"
assert variables[0]["properties"] == ["profile", "age"]
assert variables[1]["name"] == "user"
assert variables[1]["properties"] == ["status"]
assert variables[2]["name"] == "action"
assert variables[2]["properties"] == ["execute"]
def test_positions_are_accurate():
"""Test that element positions are accurate"""
processor = PlainTextPreprocessor()
text = "abc$var123*def"
result = processor.parse(text)
assert len(result) == 3
# Text before
assert result[0]["start"] == 0
assert result[0]["end"] == 3
assert result[0]["content"] == "abc"
# Variable
assert result[1]["start"] == 3
assert result[1]["end"] == 10
assert result[1]["name"] == "var123"
# Text after
assert result[2]["start"] == 10
assert result[2]["end"] == 14
assert result[2]["content"] == "*def"
# Error cases
def test_i_cannot_parse_dollar_alone_at_end():
"""Test that I cannot parse $ at the end of text"""
processor = PlainTextPreprocessor()
text = "Hello $"
with pytest.raises(VariableParsingError) as exc_info:
processor.parse(text)
assert exc_info.value.position == 7
assert "Invalid syntax" in str(exc_info.value)
# assert "Variable name missing after '$'" in str(exc_info.value)
def test_i_cannot_parse_dollar_alone_in_middle():
"""Test that I cannot parse $ alone in middle of text"""
processor = PlainTextPreprocessor()
text = "Hello $ world"
with pytest.raises(VariableParsingError) as exc_info:
processor.parse(text)
assert exc_info.value.position == 7
assert "Invalid syntax" in str(exc_info.value)
def test_i_cannot_parse_dot_immediately_after_dollar():
"""Test that I cannot parse $.property (dot immediately after $)"""
processor = PlainTextPreprocessor()
text = "$.property"
with pytest.raises(VariableParsingError) as exc_info:
processor.parse(text)
assert exc_info.value.position == 1
assert "Invalid syntax" in str(exc_info.value)
# assert "Variable name missing before '.'" in str(exc_info.value)
def test_i_cannot_parse_variable_ending_with_dot():
"""Test that I cannot parse $variable. (dot at the end)"""
processor = PlainTextPreprocessor()
text = "$variable."
with pytest.raises(VariableParsingError) as exc_info:
processor.parse(text)
assert exc_info.value.position == 9
assert "Invalid syntax in property name." in str(exc_info.value)
@pytest.mark.parametrize("text", ["$variable. prop", "$variable .prop", "$variable . prop"])
def test_i_cannot_parse_variable_when_space_in_variable_name(text):
"""Test that I cannot parse $variable. (dot at the end)"""
processor = PlainTextPreprocessor()
# text = "$variable. "
with pytest.raises(VariableParsingError) as exc_info:
processor.parse(text)
assert exc_info.value.position == 9
assert "Invalid syntax in property name." in str(exc_info.value)
def test_i_cannot_parse_variable_with_empty_property():
"""Test that I cannot parse $variable..property (empty property between dots)"""
processor = PlainTextPreprocessor()
text = "$variable..property"
with pytest.raises(VariableParsingError) as exc_info:
processor.parse(text)
assert exc_info.value.position == 9
assert "Invalid syntax in property name." in str(exc_info.value)
def test_i_cannot_parse_variable_ending_with_multiple_dots():
"""Test that I cannot parse $variable... (multiple dots at end)"""
processor = PlainTextPreprocessor()
text = "$variable..."
with pytest.raises(VariableParsingError) as exc_info:
processor.parse(text)
assert exc_info.value.position == 9
assert "Invalid syntax in property name." in str(exc_info.value)
def test_i_cannot_parse_when_consecutive_variables():
"""Test that I can parse consecutive variables without text between"""
processor = PlainTextPreprocessor()
text = "$var1$var2"
with pytest.raises(VariableParsingError) as exc_info:
processor.parse(text)
assert exc_info.value.position == 5
assert "Invalid syntax." in str(exc_info.value)
def test_first_error_is_reported_with_multiple_errors():
"""Test that first error is reported when multiple $ errors exist"""
processor = PlainTextPreprocessor()
text = "$ and $. and $var."
with pytest.raises(VariableParsingError) as exc_info:
processor.parse(text)
# Should report the first error ($ alone)
assert exc_info.value.position == 1
def test_i_can_preprocess_simple_variable():
"""Test preprocessing text with a simple variable"""
processor = PlainTextPreprocessor()
namespace = {"name": "John"}
result = processor.preprocess("Hello $name!", namespace)
assert result == "Hello John!"
def test_i_can_preprocess_with_properties():
"""Test preprocessing text with variable properties"""
class User:
def __init__(self):
self.profile = type('Profile', (), {'age': 25})()
processor = PlainTextPreprocessor()
namespace = {"user": User()}
result = processor.preprocess("Age: $user.profile.age", namespace)
assert result == "Age: 25"
def test_i_can_preprocess_multiple_variables():
"""Test preprocessing text with multiple variables"""
processor = PlainTextPreprocessor()
namespace = {"first": "Hello", "second": "World"}
result = processor.preprocess("$first $second!", namespace)
assert result == "Hello World!"
def test_i_can_preprocess_empty_text():
"""Test preprocessing empty text"""
processor = PlainTextPreprocessor()
namespace = {}
result = processor.preprocess("", namespace)
assert result == ""
def test_i_cannot_preprocess_undefined_variable():
"""Test preprocessing with undefined variable raises error"""
processor = PlainTextPreprocessor()
namespace = {}
with pytest.raises(VariableProcessingError) as exc_info:
processor.preprocess("$undefined_var", namespace)
assert "Variable 'undefined_var' is not defined" in str(exc_info.value)
def test_i_cannot_preprocess_invalid_property():
"""Test preprocessing with invalid property access"""
processor = PlainTextPreprocessor()
namespace = {"obj": object()}
with pytest.raises(VariableProcessingError) as exc_info:
processor.preprocess("some text $obj.invalid_prop", namespace)
assert "Invalid property 'invalid_prop' for variable 'obj'" in str(exc_info.value)
assert exc_info.value.position == 14