I can finally chain Processor calls

This commit is contained in:
2025-08-05 19:45:25 +02:00
parent 64e7c44a7d
commit f0d98d23ff
3 changed files with 695 additions and 2 deletions

189
src/core/preprocessor.py Normal file
View File

@@ -0,0 +1,189 @@
from arpeggio import RegExMatch, ZeroOrMore, OneOrMore, ParserPython, EOF, NoMatch
class VariableParsingError(Exception):
"""Custom exception for variable parsing errors"""
def __init__(self, message, position):
self.message = message
self.position = position
super().__init__(f"Variable parsing error at position {position}: {message}")
class VariableProcessingError(Exception):
"""Custom exception for variable parsing errors"""
def __init__(self, message, position):
self.message = message
self.position = position
super().__init__(f"Variable processing error at position {position}: {message}")
def variable_name():
"""Variable name: alphanumeric characters and underscores"""
return RegExMatch(r'[a-zA-Z_][a-zA-Z0-9_]*')
def property_name():
"""Property name: same rules as variable name"""
return RegExMatch(r'[a-zA-Z_][a-zA-Z0-9_]*')
def variable_property():
"""A property access: .property_name"""
return ".", property_name
def variable():
"""A complete variable: $variable_name(.property)*"""
return "$", variable_name, ZeroOrMore(variable_property)
def text_char():
"""Any character that is not the start of a variable"""
return RegExMatch(r'[^$]')
def text_segment():
"""One or more non-variable characters"""
return OneOrMore(text_char)
def element():
"""Either a variable or a text segment"""
return [variable, text_segment]
def expression():
"""Complete expression: sequence of elements"""
return ZeroOrMore(element), EOF
class PlainTextPreprocessor:
def __init__(self):
self.parser = ParserPython(expression, debug=False, skipws=False)
@staticmethod
def _post_validation(elements):
if len(elements) < 2:
return
for element, next_element in [(element, elements[i + 1]) for i, element in enumerate(elements[:-1])]:
if element['type'] == 'variable' and next_element['type'] == 'variable':
raise VariableParsingError("Invalid syntax.", next_element['start'])
@staticmethod
def _extract_elements_from_tree(parse_tree, original_text):
"""Extract elements with positions from the parse tree"""
elements = []
def process_node(node, current_pos=0):
nonlocal elements
if hasattr(node, 'rule_name'):
if node.rule_name == 'variable':
# Extract variable information
var_start = node.position
var_end = node.position_end
var_text = original_text[var_start:var_end]
parts = var_text[1:].split('.') # Remove $ and split by .
var_name = parts[0]
properties = parts[1:] if len(parts) > 1 else []
elements.append({
"type": "variable",
"name": var_name,
"properties": properties,
"start": var_start,
"end": var_end
})
elif node.rule_name == 'text_segment':
# Extract text segment
text_start = node.position
text_end = node.position_end
content = original_text[text_start:text_end]
stripped = content.strip()
if len(stripped) > 0 and stripped[0] == '.':
raise VariableParsingError("Invalid syntax in property name.", text_start)
elements.append({
"type": "text",
"content": content,
"start": text_start,
"end": text_end
})
elif node.rule_name in ('expression', 'element'):
for child in node:
process_node(child, current_pos)
# Process children
if hasattr(node, '_tx_children') and node._tx_children:
for child in node._tx_children:
process_node(child, current_pos)
process_node(parse_tree)
return elements
def parse(self, text):
"""
Parse text and return structure with text segments and variables with positions
Returns:
[
{"type": "text", "content": "...", "start": int, "end": int},
{"type": "variable", "name": "...", "properties": [...], "start": int, "end": int}
]
"""
if not text:
return []
try:
# Parse the text
parse_tree = self.parser.parse(text)
# Extract elements from parse tree
elements = self._extract_elements_from_tree(parse_tree, text)
# Extra validations
self._post_validation(elements)
# Sort elements by start position
elements.sort(key=lambda x: x['start'])
return elements
except NoMatch as e:
# Convert Arpeggio parsing errors to our custom error
raise VariableParsingError(f"Invalid syntax", e.position)
except Exception as e:
if isinstance(e, VariableParsingError):
raise
raise VariableParsingError(f"Parsing failed: {str(e)}", 0)
def preprocess(self, text, namepace):
result = ""
elements = self.parse(text)
for element in elements:
if element['type'] == 'text':
result += element['content']
elif element['type'] == 'variable':
value = namepace.get(element['name'])
if value is None:
raise VariableProcessingError(f"Variable '{element['name']}' is not defined.", element['start'])
try:
pos = element['start'] + len(element['name']) + 1 # +1 for the starting '$'
for property_name in element['properties']:
value = getattr(value, property_name)
pos += len(property_name) + 1 # +1 for the dot '.'
except AttributeError as e:
raise VariableProcessingError(f"Invalid property '{property_name}' for variable '{element['name']}'.",
pos) from e
result += str(value)
return result

View File

@@ -1,10 +1,12 @@
import ast import ast
import logging
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Any, Generator from typing import Any, Generator
from components.admin.admin_db_manager import AdminDbManager from components.admin.admin_db_manager import AdminDbManager
from core.Expando import Expando from core.Expando import Expando
from core.jira import Jira from core.jira import Jira
from core.preprocessor import PlainTextPreprocessor
from core.utils import UnreferencedNamesVisitor from core.utils import UnreferencedNamesVisitor
from utils.Datahelper import DataHelper from utils.Datahelper import DataHelper
@@ -92,6 +94,8 @@ class TableDataProducer(DataProducer):
class JiraDataProducer(DataProducer): class JiraDataProducer(DataProducer):
"""Base class for data producers that emit data from Jira.""" """Base class for data producers that emit data from Jira."""
logger = logging.getLogger("DataProcessor.Producer.Jira")
def __init__(self, session, settings_manager, component_id, request_type='issues', request='', fields=None): def __init__(self, session, settings_manager, component_id, request_type='issues', request='', fields=None):
super().__init__(component_id) super().__init__(component_id)
self._session = session self._session = session
@@ -102,11 +106,20 @@ class JiraDataProducer(DataProducer):
self.db = AdminDbManager(session, settings_manager).jira self.db = AdminDbManager(session, settings_manager).jira
def emit(self, data: Any = None) -> Generator[Any, None, None]: def emit(self, data: Any = None) -> Generator[Any, None, None]:
jira = Jira(self.db.user_name, self.db.api_token, fields=self.fields) self.logger.debug(f"Emitting data from Jira: {self.request_type} {self.request} {self.fields}")
preprocessor = PlainTextPreprocessor()
preprocessed_fields = preprocessor.preprocess(self.fields, {"data": data})
self.logger.debug(f" {preprocessed_fields=}")
jira = Jira(self.db.user_name, self.db.api_token, fields=preprocessed_fields)
if not hasattr(jira, self.request_type): if not hasattr(jira, self.request_type):
raise ValueError(f"Invalid request type: {self.request_type}") raise ValueError(f"Invalid request type: {self.request_type}")
yield from getattr(jira, self.request_type)(self.request) preprocessed_request = preprocessor.preprocess(self.request, {"data": data})
self.logger.debug(f" {preprocessed_request=}")
yield from getattr(jira, self.request_type)(preprocessed_request)
class DefaultDataFilter(DataFilter): class DefaultDataFilter(DataFilter):

491
tests/test_preprocessor.py Normal file
View File

@@ -0,0 +1,491 @@
import pytest
from core.preprocessor import PlainTextPreprocessor, VariableParsingError, VariableProcessingError
def test_i_can_parse_empty_text():
"""Test that I can parse empty text input"""
processor = PlainTextPreprocessor()
result = processor.parse("")
assert result == []
def test_i_can_parse_text_without_variables():
"""Test that I can parse text without any variables"""
processor = PlainTextPreprocessor()
text = "This is just plain text with no variables"
result = processor.parse(text)
expected = [{
"type": "text",
"content": text,
"start": 0,
"end": len(text)
}]
assert result == expected
def test_i_can_parse_simple_variable():
"""Test that I can parse text with only a simple variable"""
processor = PlainTextPreprocessor()
text = "$variable"
result = processor.parse(text)
expected = [{
"type": "variable",
"name": "variable",
"properties": [],
"start": 0,
"end": 9
}]
assert result == expected
def test_i_can_parse_variable_with_underscores():
"""Test that I can parse variable with underscores in name"""
processor = PlainTextPreprocessor()
text = "$my_variable_name"
result = processor.parse(text)
expected = [{
"type": "variable",
"name": "my_variable_name",
"properties": [],
"start": 0,
"end": 17
}]
assert result == expected
def test_i_can_parse_variable_with_numbers():
"""Test that I can parse variable with numbers in name"""
processor = PlainTextPreprocessor()
text = "$var123"
result = processor.parse(text)
expected = [{
"type": "variable",
"name": "var123",
"properties": [],
"start": 0,
"end": 7
}]
assert result == expected
def test_i_can_parse_properties_with_underscores_and_numbers():
"""Test that I can parse property names with underscores and numbers"""
processor = PlainTextPreprocessor()
text = "$var._prop123.sub_prop_456"
result = processor.parse(text)
expected = [{
"type": "variable",
"name": "var",
"properties": ["_prop123", "sub_prop_456"],
"start": 0,
"end": 26
}]
assert result == expected
def test_i_can_parse_variable_starting_with_underscore():
"""Test that I can parse variable name starting with underscore"""
processor = PlainTextPreprocessor()
text = "$_private_var"
result = processor.parse(text)
expected = [
{
"type": "variable",
"name": "_private_var",
"properties": [],
"start": 0,
"end": 13
}
]
assert result == expected
def test_i_can_parse_variable_with_single_property():
"""Test that I can parse variable with one property"""
processor = PlainTextPreprocessor()
text = "$variable.prop"
result = processor.parse(text)
expected = [{
"type": "variable",
"name": "variable",
"properties": ["prop"],
"start": 0,
"end": 14
}]
assert result == expected
def test_i_can_parse_variable_with_multiple_properties():
"""Test that I can parse variable with multiple properties"""
processor = PlainTextPreprocessor()
text = "$variable.prop.subprop.deep"
result = processor.parse(text)
expected = [{
"type": "variable",
"name": "variable",
"properties": ["prop", "subprop", "deep"],
"start": 0,
"end": 27
}]
assert result == expected
def test_i_can_parse_text_with_variable_in_middle():
"""Test that I can parse text with variable in the middle"""
processor = PlainTextPreprocessor()
text = "project > $project_id and more"
result = processor.parse(text)
expected = [
{
"type": "text",
"content": "project > ",
"start": 0,
"end": 10
},
{
"type": "variable",
"name": "project_id",
"properties": [],
"start": 10,
"end": 21
},
{
"type": "text",
"content": " and more",
"start": 21,
"end": 30
}
]
assert result == expected
def test_i_can_parse_multiple_variables():
"""Test that I can parse text with multiple variables"""
processor = PlainTextPreprocessor()
text = "value == $variable.prop and $other_var"
result = processor.parse(text)
expected = [
{
"type": "text",
"content": "value == ",
"start": 0,
"end": 9
},
{
"type": "variable",
"name": "variable",
"properties": ["prop"],
"start": 9,
"end": 23
},
{
"type": "text",
"content": " and ",
"start": 23,
"end": 28
},
{
"type": "variable",
"name": "other_var",
"properties": [],
"start": 28,
"end": 38
}
]
assert result == expected
def test_i_can_preserve_all_whitespace():
"""Test that I can preserve all whitespace including tabs and newlines"""
processor = PlainTextPreprocessor()
text = " $var \t\n $other.prop "
result = processor.parse(text)
expected = [
{
"type": "text",
"content": " ",
"start": 0,
"end": 2
},
{
"type": "variable",
"name": "var",
"properties": [],
"start": 2,
"end": 6
},
{
"type": "text",
"content": " \t\n ",
"start": 6,
"end": 12
},
{
"type": "variable",
"name": "other",
"properties": ["prop"],
"start": 12,
"end": 23
},
{
"type": "text",
"content": " ",
"start": 23,
"end": 25
}
]
assert result == expected
def test_i_can_parse_text_with_special_characters():
"""Test that I can parse text with special characters"""
processor = PlainTextPreprocessor()
text = "Hello $user! @#%^&*()+={}[]|\\:;\"'<>?,./~`"
result = processor.parse(text)
expected = [
{
"type": "text",
"content": "Hello ",
"start": 0,
"end": 6
},
{
"type": "variable",
"name": "user",
"properties": [],
"start": 6,
"end": 11
},
{
"type": "text",
"content": "! @#%^&*()+={}[]|\\:;\"'<>?,./~`",
"start": 11,
"end": 41
}
]
assert result == expected
def test_i_can_parse_complex_expression():
"""Test that I can parse complex but valid expression"""
processor = PlainTextPreprocessor()
text = "if ($user.profile.age > 18 && $user.status == 'active') { $action.execute(); }"
result = processor.parse(text)
# Should parse successfully and find all variables
variables = [elem for elem in result if elem["type"] == "variable"]
assert len(variables) == 3
# Check variable details
assert variables[0]["name"] == "user"
assert variables[0]["properties"] == ["profile", "age"]
assert variables[1]["name"] == "user"
assert variables[1]["properties"] == ["status"]
assert variables[2]["name"] == "action"
assert variables[2]["properties"] == ["execute"]
def test_positions_are_accurate():
"""Test that element positions are accurate"""
processor = PlainTextPreprocessor()
text = "abc$var123*def"
result = processor.parse(text)
assert len(result) == 3
# Text before
assert result[0]["start"] == 0
assert result[0]["end"] == 3
assert result[0]["content"] == "abc"
# Variable
assert result[1]["start"] == 3
assert result[1]["end"] == 10
assert result[1]["name"] == "var123"
# Text after
assert result[2]["start"] == 10
assert result[2]["end"] == 14
assert result[2]["content"] == "*def"
# Error cases
def test_i_cannot_parse_dollar_alone_at_end():
"""Test that I cannot parse $ at the end of text"""
processor = PlainTextPreprocessor()
text = "Hello $"
with pytest.raises(VariableParsingError) as exc_info:
processor.parse(text)
assert exc_info.value.position == 7
assert "Invalid syntax" in str(exc_info.value)
# assert "Variable name missing after '$'" in str(exc_info.value)
def test_i_cannot_parse_dollar_alone_in_middle():
"""Test that I cannot parse $ alone in middle of text"""
processor = PlainTextPreprocessor()
text = "Hello $ world"
with pytest.raises(VariableParsingError) as exc_info:
processor.parse(text)
assert exc_info.value.position == 7
assert "Invalid syntax" in str(exc_info.value)
def test_i_cannot_parse_dot_immediately_after_dollar():
"""Test that I cannot parse $.property (dot immediately after $)"""
processor = PlainTextPreprocessor()
text = "$.property"
with pytest.raises(VariableParsingError) as exc_info:
processor.parse(text)
assert exc_info.value.position == 1
assert "Invalid syntax" in str(exc_info.value)
# assert "Variable name missing before '.'" in str(exc_info.value)
def test_i_cannot_parse_variable_ending_with_dot():
"""Test that I cannot parse $variable. (dot at the end)"""
processor = PlainTextPreprocessor()
text = "$variable."
with pytest.raises(VariableParsingError) as exc_info:
processor.parse(text)
assert exc_info.value.position == 9
assert "Invalid syntax in property name." in str(exc_info.value)
@pytest.mark.parametrize("text", ["$variable. prop", "$variable .prop", "$variable . prop"])
def test_i_cannot_parse_variable_when_space_in_variable_name(text):
"""Test that I cannot parse $variable. (dot at the end)"""
processor = PlainTextPreprocessor()
# text = "$variable. "
with pytest.raises(VariableParsingError) as exc_info:
processor.parse(text)
assert exc_info.value.position == 9
assert "Invalid syntax in property name." in str(exc_info.value)
def test_i_cannot_parse_variable_with_empty_property():
"""Test that I cannot parse $variable..property (empty property between dots)"""
processor = PlainTextPreprocessor()
text = "$variable..property"
with pytest.raises(VariableParsingError) as exc_info:
processor.parse(text)
assert exc_info.value.position == 9
assert "Invalid syntax in property name." in str(exc_info.value)
def test_i_cannot_parse_variable_ending_with_multiple_dots():
"""Test that I cannot parse $variable... (multiple dots at end)"""
processor = PlainTextPreprocessor()
text = "$variable..."
with pytest.raises(VariableParsingError) as exc_info:
processor.parse(text)
assert exc_info.value.position == 9
assert "Invalid syntax in property name." in str(exc_info.value)
def test_i_cannot_parse_when_consecutive_variables():
"""Test that I can parse consecutive variables without text between"""
processor = PlainTextPreprocessor()
text = "$var1$var2"
with pytest.raises(VariableParsingError) as exc_info:
processor.parse(text)
assert exc_info.value.position == 5
assert "Invalid syntax." in str(exc_info.value)
def test_first_error_is_reported_with_multiple_errors():
"""Test that first error is reported when multiple $ errors exist"""
processor = PlainTextPreprocessor()
text = "$ and $. and $var."
with pytest.raises(VariableParsingError) as exc_info:
processor.parse(text)
# Should report the first error ($ alone)
assert exc_info.value.position == 1
def test_i_can_preprocess_simple_variable():
"""Test preprocessing text with a simple variable"""
processor = PlainTextPreprocessor()
namespace = {"name": "John"}
result = processor.preprocess("Hello $name!", namespace)
assert result == "Hello John!"
def test_i_can_preprocess_with_properties():
"""Test preprocessing text with variable properties"""
class User:
def __init__(self):
self.profile = type('Profile', (), {'age': 25})()
processor = PlainTextPreprocessor()
namespace = {"user": User()}
result = processor.preprocess("Age: $user.profile.age", namespace)
assert result == "Age: 25"
def test_i_can_preprocess_multiple_variables():
"""Test preprocessing text with multiple variables"""
processor = PlainTextPreprocessor()
namespace = {"first": "Hello", "second": "World"}
result = processor.preprocess("$first $second!", namespace)
assert result == "Hello World!"
def test_i_can_preprocess_empty_text():
"""Test preprocessing empty text"""
processor = PlainTextPreprocessor()
namespace = {}
result = processor.preprocess("", namespace)
assert result == ""
def test_i_cannot_preprocess_undefined_variable():
"""Test preprocessing with undefined variable raises error"""
processor = PlainTextPreprocessor()
namespace = {}
with pytest.raises(VariableProcessingError) as exc_info:
processor.preprocess("$undefined_var", namespace)
assert "Variable 'undefined_var' is not defined" in str(exc_info.value)
def test_i_cannot_preprocess_invalid_property():
"""Test preprocessing with invalid property access"""
processor = PlainTextPreprocessor()
namespace = {"obj": object()}
with pytest.raises(VariableProcessingError) as exc_info:
processor.preprocess("some text $obj.invalid_prop", namespace)
assert "Invalid property 'invalid_prop' for variable 'obj'" in str(exc_info.value)
assert exc_info.value.position == 14