Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: multiple unit tests. #42

Merged
merged 9 commits into from
Nov 1, 2023
32 changes: 32 additions & 0 deletions tests/unit/processing_engine/test_sec_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from sec_parser.semantic_elements.composite_semantic_element import (
CompositeSemanticElement,
)
from sec_parser.processing_engine.processing_log import LogItem
from sec_parser.semantic_elements.top_level_section_title import TopLevelSectionTitle
from tests.unit._utils import assert_elements

Expand Down Expand Up @@ -51,3 +52,34 @@ def test_smoke_test(name, html_str, unwrap_elements, expected_elements):
mock_unwrap.assert_called()
else:
mock_unwrap.assert_not_called()


@pytest.mark.parametrize(
("name", "html_str", "expected_processing_log"),
values := [
(
"simple",
"<div>Hello World.</div>",
(
LogItem(
origin="TextClassifier",
payload={
'cls_name': 'TextElement'
},
),
),
),
],
ids = [v[0] for v in values],
)
def test_transformation_history(name, html_str, expected_processing_log):
# Arrange
sec_parser = Edgar10QParser()

# Act
processed_elements = sec_parser.parse(html_str)
processing_log = processed_elements[0].processing_log.get_items()

# Assert
assert len(processed_elements)==1 # For simplicity, while crafting `html_str` make sure it always returns single element.
assert processing_log == expected_processing_log
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from unittest.mock import Mock
import bs4
import pytest

from sec_parser.semantic_elements.abstract_semantic_element import AbstractSemanticElement
from sec_parser.processing_steps.individual_semantic_element_extractor.single_element_checks.table_check import TableCheck

def test_table_check():
# Arrange
element = AbstractSemanticElement(Mock())
element.html_tag.name = "table"
check = TableCheck()

# Act
actual = check.contains_single_element(element)

# Assert
assert actual is True
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from sec_parser.processing_steps.individual_semantic_element_extractor.single_element_checks.xbrl_tag_check import XbrlTagCheck


def test_contains_single_element():
def test_contains_single_element_ix_tag():
# Arrange
element = AbstractSemanticElement(Mock())
element.html_tag.name = "ix"
Expand All @@ -16,4 +16,20 @@ def test_contains_single_element():
actual = check.contains_single_element(element)

# Assert
assert actual is False
assert actual is False


def test_contains_single_element_ix_numeric_tag():
# Arrange
element = AbstractSemanticElement(Mock())
element.html_tag.name = "some-random-tag-name"
element.html_tag.contains_tag.side_effect = lambda tag: True if tag == "ix:nonnumeric" else False
check = XbrlTagCheck()

# Act
actual = check.contains_single_element(element)

# Assert
assert actual is False

test_contains_single_element_ix_numeric_tag()
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import pytest

from sec_parser.processing_steps.individual_semantic_element_extractor.individual_semantic_element_extractor import IndividualSemanticElementExtractor
from sec_parser.exceptions import SecParserValueError


def test_init_with_no_checks():
# Arrange
get_checks = None

# Act & Assert
with pytest.raises(SecParserValueError):
IndividualSemanticElementExtractor(get_checks=get_checks)
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
import bs4
import pytest

from sec_parser.exceptions import SecParserValueError
from sec_parser.exceptions import SecParserError, SecParserValueError
from sec_parser.processing_engine.html_tag import HtmlTag
from sec_parser.processing_steps.abstract_classes.abstract_elementwise_processing_step import (
AbstractElementwiseProcessingStep,
ElementProcessingContext,
ErrorWhileProcessingElement,
)
from sec_parser.semantic_elements.abstract_semantic_element import (
AbstractSemanticElement,
Expand Down Expand Up @@ -46,6 +47,15 @@ def _process_element(
return element


class ErrorRaisingProcessingStep(AbstractElementwiseProcessingStep):
def _process_element(
self,
element: AbstractSemanticElement,
_: ElementProcessingContext,
) -> AbstractSemanticElement:
raise SecParserError


def test_process_skip_due_to_types_to_process():
"""Test that elements not in 'types_to_process' are skipped."""
# Arrange
Expand Down Expand Up @@ -107,3 +117,28 @@ def test_process_skip_due_to_both_types_to_process_and_types_to_exclude():
assert step.seen_elements == [element1]
assert processed_elements == input_elements
assert processed_elements == input_elements


def test_error_while_processing_element():
# Arrange
input_elements = [MockSemanticElement(Mock())]
step = ErrorRaisingProcessingStep()

# Act
elements = step.process(input_elements)

# Assert
assert isinstance(elements[0], ErrorWhileProcessingElement)


def test_error_while_processing_element_with_no_error():
# Arrange
element = MockSemanticElement(Mock())

# Act & Assert
with pytest.raises(SecParserValueError):
error_processing_element = ErrorWhileProcessingElement.create_from_element(
element,
error=None,
log_origin=None
)
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@
<p>repeating</p>
<p>repeating</p>
<p>repeating</p>

<p></p>
""",
[
{
Expand Down Expand Up @@ -102,6 +104,10 @@
},
]
* 10,
{
"type": EmptyElement,
"tag": "p",
},
],
),
],
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import pytest

import bs4

from sec_parser.processing_engine.html_tag import HtmlTag
from sec_parser.semantic_elements.composite_semantic_element import (
CompositeSemanticElement,
)
from sec_parser.semantic_elements.semantic_elements import NotYetClassifiedElement
from sec_parser.exceptions import SecParserValueError


def test_inner_elements_setter():
# Arrange
empty_elements = []
tag = bs4.Tag(name="span")
tag.string = "A" * 60
element = CompositeSemanticElement(
HtmlTag(tag),
inner_elements=(
NotYetClassifiedElement(HtmlTag(bs4.Tag(name="p"))),
NotYetClassifiedElement(HtmlTag(bs4.Tag(name="p"))),
),
)

# Act & Assert
with pytest.raises(SecParserValueError):
element.inner_elements = None
13 changes: 13 additions & 0 deletions tests/unit/semantic_elements/test_highlighted_text_element.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import bs4
import pytest

from sec_parser.semantic_elements.abstract_semantic_element import AbstractSemanticElement
from sec_parser.processing_engine.html_tag import HtmlTag
from sec_parser.semantic_elements.highlighted_text_element import (
HighlightedTextElement,
Expand All @@ -25,6 +26,18 @@ def test_highlighted_text_element_initialization():
HighlightedTextElement(mock_html_tag, style=None)


def test_highlighted_text_element_from_element():
# Arrange
element = AbstractSemanticElement(Mock())

# Act & Assert
with pytest.raises(
SecParserValueError,
match="Style must be provided.",
):
_ = HighlightedTextElement.create_from_element(element, style=None, log_origin=None)


def test_to_dict():
# Arrange
tag = bs4.Tag(name="span")
Expand Down
118 changes: 89 additions & 29 deletions tests/unit/semantic_tree/test_semantic_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,93 @@
from sec_parser.semantic_tree.tree_node import TreeNode


class MockSemanticElement(AbstractSemanticElement):
pass


def element(text):
t = bs4.Tag(name="p")
t.string = text
return MockSemanticElement(HtmlTag(t))


def build_tree(tree_structure, parent=None):
nodes = []
assert isinstance(tree_structure, list)
for item in tree_structure:
if isinstance(item, dict):
for key, value in item.items():
# Create a new TreeNode for each key
node = TreeNode(element(key), parent=parent)
nodes.append(node)
# Recursively build the tree for the children of the node
if isinstance(value, list):
children = build_tree(value, parent=node)
node.add_children(children)
else:
child_node = TreeNode(element(item), parent=parent)
nodes.append(child_node)
return nodes


@pytest.mark.parametrize(
("name", "tree_structure", "render_kwargs", "expected_output"),
values := [
("empty_tree_with_default_kwargs", [], {}, ""),
],
ids = [v[0] for v in values],
)
def test_render(name, tree_structure, render_kwargs, expected_output):
# Arrange
root_nodes = build_tree(tree_structure)
tree = SemanticTree(root_nodes)

# Act
actual = tree.render(**render_kwargs)

# Assert
assert actual==expected_output


@pytest.mark.parametrize(
("name", "tree_structure", "print_kwargs", "expected_output"),
values := [
(
"empty_tree_with_default_kwargs",
[],
{},
"\n"
),
(
"simple_tree",
[{"root": ["child1", "child2"]}],
{},
"\x1b[1;34mMockSemanticElement\x1b[0m: root\n├── \x1b[1;34mMockSemanticElement\x1b[0m: child1\n└── \x1b[1;34mMockSemanticElement\x1b[0m: child2\n"
),
(
"simple_tree_with_line_limit",
[{"root": ["child1", "child2"]}],
{
"line_limit": 2,
},
"\x1b[1;34mMockSemanticElement\x1b[0m: root\n├── \x1b[1;34mMockSemanticElement\x1b[0m: child1\n",
),
],
ids = [v[0] for v in values],
)
def test_print(name, tree_structure, print_kwargs, expected_output, capsys):
# Arrange
root_nodes = build_tree(tree_structure)
tree = SemanticTree(root_nodes)

# Act
tree.print(**print_kwargs)
actual = capsys.readouterr().out

# Assert
assert actual==expected_output


@pytest.mark.parametrize(
"tree_structure,expected_nodes",
[
Expand Down Expand Up @@ -44,39 +131,12 @@ def test_get_nodes(
tree_structure: dict | list[dict],
expected_nodes: list[str],
) -> None:
def build_tree(tree_structure, parent=None):
nodes = []
assert isinstance(tree_structure, list)
for item in tree_structure:
if isinstance(item, dict):
for key, value in item.items():
# Create a new TreeNode for each key
node = TreeNode(element(key), parent=parent)
nodes.append(node)
# Recursively build the tree for the children of the node
if isinstance(value, list):
children = build_tree(value, parent=node)
node.add_children(children)
else:
child_node = TreeNode(element(item), parent=parent)
nodes.append(child_node)
return nodes

# Arrange
root_nodes = build_tree(tree_structure)
tree = SemanticTree(root_nodes)

# Act
nodes = list(tree.nodes)

# Assert
assert [node.text for node in nodes] == expected_nodes


class MockSemanticElement(AbstractSemanticElement):
pass


def element(text):
t = bs4.Tag(name="p")
t.string = text
return MockSemanticElement(HtmlTag(t))
assert [node.text for node in nodes] == expected_nodes