Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix several accidentally quadratic functions #800

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/spdx_tools/spdx/model/relationship.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,6 @@ def __init__(
comment: Optional[str] = None,
):
check_types_and_set_values(self, locals())

def __hash__(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is not a correct hash of the relationship. One might instead return the triple and use it for lookups in the cache

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From python docs:

If a class does not define an eq() method it should not define a hash() operation either;

(Note I am not a python expert.)

return hash((self.spdx_element_id, self.related_spdx_element_id, self.relationship_type))
16 changes: 7 additions & 9 deletions src/spdx_tools/spdx/parser/jsonlikedict/relationship_parser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# SPDX-FileCopyrightText: 2022 spdx contributors
#
# SPDX-License-Identifier: Apache-2.0
from beartype.typing import Dict, List, Optional
from beartype.typing import Dict, List, Optional, Set

from spdx_tools.common.typing.constructor_type_errors import ConstructorTypeErrors
from spdx_tools.spdx.model import Relationship, RelationshipType
Expand Down Expand Up @@ -35,9 +35,9 @@ def parse_all_relationships(self, input_doc_dict: Dict) -> List[Relationship]:
document_describes: List[str] = delete_duplicates_from_list(input_doc_dict.get("documentDescribes", []))
doc_spdx_id: Optional[str] = input_doc_dict.get("SPDXID")

existing_relationships_without_comments: List[Relationship] = self.get_all_relationships_without_comments(
existing_relationships_without_comments: Set[Relationship] = set(self.get_all_relationships_without_comments(
relationships
)
))
relationships.extend(
parse_field_or_log_error(
self.logger,
Expand All @@ -52,9 +52,6 @@ def parse_all_relationships(self, input_doc_dict: Dict) -> List[Relationship]:
)

package_dicts: List[Dict] = input_doc_dict.get("packages", [])
existing_relationships_without_comments: List[Relationship] = self.get_all_relationships_without_comments(

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a bit confused by the removal of this code. The semantics will be different between this and previous call right? This one would include the additional relationships found from the relationships.extend call above. To preserve the previous behavior, it seems that you would instead want to simply retrieve the set identified by parser_field_or_log_error, and then extend relationships with that set. But then you could also update the existing_relationships_without_comments from that new set as well. The question is whether you are purposefully changing the semantics here or not.

relationships
)

relationships.extend(
parse_field_or_log_error(
Expand Down Expand Up @@ -110,7 +107,7 @@ def parse_relationship_type(relationship_type_str: str) -> RelationshipType:
return relationship_type

def parse_document_describes(
self, doc_spdx_id: str, described_spdx_ids: List[str], existing_relationships: List[Relationship]
self, doc_spdx_id: str, described_spdx_ids: List[str], existing_relationships: Set[Relationship]
) -> List[Relationship]:
logger = Logger()
describes_relationships = []
Expand All @@ -131,10 +128,11 @@ def parse_document_describes(
return describes_relationships

def parse_has_files(
self, package_dicts: List[Dict], existing_relationships: List[Relationship]
self, package_dicts: List[Dict], existing_relationships: Set[Relationship]
) -> List[Relationship]:
# assume existing relationships are stripped of comments
logger = Logger()

contains_relationships = []
for package in package_dicts:
package_spdx_id: Optional[str] = package.get("SPDXID")
Expand All @@ -160,7 +158,7 @@ def parse_has_files(
return contains_relationships

def check_if_relationship_exists(
self, relationship: Relationship, existing_relationships: List[Relationship]
self, relationship: Relationship, existing_relationships: Set[Relationship]
) -> bool:
# assume existing relationships are stripped of comments
if relationship in existing_relationships:
Expand Down
13 changes: 8 additions & 5 deletions src/spdx_tools/spdx/validation/annotation_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,32 @@
#
# SPDX-License-Identifier: Apache-2.0

from beartype.typing import List
from beartype.typing import List, Set

from spdx_tools.spdx.model import Annotation, Document
from spdx_tools.spdx.validation.actor_validator import validate_actor
from spdx_tools.spdx.validation.spdx_id_validators import validate_spdx_id
from spdx_tools.spdx.validation.spdx_id_validators import validate_spdx_id, get_all_spdx_ids
from spdx_tools.spdx.validation.validation_message import SpdxElementType, ValidationContext, ValidationMessage


def validate_annotations(annotations: List[Annotation], document: Document) -> List[ValidationMessage]:
validation_messages = []

all_spdx_ids: Set[str] = get_all_spdx_ids(document)

for annotation in annotations:
validation_messages.extend(validate_annotation(annotation, document))
validation_messages.extend(validate_annotation(annotation, document, all_spdx_ids))

return validation_messages


def validate_annotation(annotation: Annotation, document: Document) -> List[ValidationMessage]:
def validate_annotation(annotation: Annotation, document: Document, all_spdx_ids: Set[str]) -> List[ValidationMessage]:
validation_messages = []
context = ValidationContext(element_type=SpdxElementType.ANNOTATION, full_element=annotation)

validation_messages.extend(validate_actor(annotation.annotator, "annotation"))

messages: List[str] = validate_spdx_id(annotation.spdx_id, document, check_document=True)
messages: List[str] = validate_spdx_id(annotation.spdx_id, document, all_spdx_ids, check_document=True)
for message in messages:
validation_messages.append(ValidationMessage(message, context))

Expand Down
15 changes: 9 additions & 6 deletions src/spdx_tools/spdx/validation/file_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@
#
# SPDX-License-Identifier: Apache-2.0

from beartype.typing import List, Optional
from beartype.typing import List, Optional, Set

from spdx_tools.spdx.model import ChecksumAlgorithm, Document, File
from spdx_tools.spdx.validation.checksum_validator import validate_checksums
from spdx_tools.spdx.validation.license_expression_validator import (
validate_license_expression,
validate_license_expressions,
)
from spdx_tools.spdx.validation.spdx_id_validators import validate_spdx_id
from spdx_tools.spdx.validation.spdx_id_validators import validate_spdx_id, get_all_spdx_ids
from spdx_tools.spdx.validation.validation_message import SpdxElementType, ValidationContext, ValidationMessage


Expand All @@ -19,16 +19,19 @@ def validate_files(
) -> List[ValidationMessage]:
validation_messages = []
if document:
all_spdx_ids: Set[str] = get_all_spdx_ids(document)
for file in files:
validation_messages.extend(validate_file_within_document(file, spdx_version, document))
validation_messages.extend(validate_file_within_document(file, spdx_version, document, all_spdx_ids))
else:
for file in files:
validation_messages.extend(validate_file(file, spdx_version))

return validation_messages


def validate_file_within_document(file: File, spdx_version: str, document: Document) -> List[ValidationMessage]:
def validate_file_within_document(
file: File, spdx_version: str, document: Document, all_spdx_ids: Set[str]
) -> List[ValidationMessage]:
validation_messages: List[ValidationMessage] = []
context = ValidationContext(
spdx_id=file.spdx_id,
Expand All @@ -37,8 +40,8 @@ def validate_file_within_document(file: File, spdx_version: str, document: Docum
full_element=file,
)

for message in validate_spdx_id(file.spdx_id, document):
validation_messages.append(ValidationMessage(message, context))
for message in validate_spdx_id(file.spdx_id, document, all_spdx_ids):
validation_messages.append(ValidationMessage(message, context, all_spdx_ids))

validation_messages.extend(validate_license_expression(file.license_concluded, document, file.spdx_id))

Expand Down
11 changes: 6 additions & 5 deletions src/spdx_tools/spdx/validation/package_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#
# SPDX-License-Identifier: Apache-2.0

from beartype.typing import List, Optional
from beartype.typing import List, Optional, Set

from spdx_tools.spdx.model import Document, File, Package, Relationship, RelationshipType
from spdx_tools.spdx.model.relationship_filters import filter_by_type_and_origin, filter_by_type_and_target
Expand All @@ -14,7 +14,7 @@
validate_license_expressions,
)
from spdx_tools.spdx.validation.package_verification_code_validator import validate_verification_code
from spdx_tools.spdx.validation.spdx_id_validators import validate_spdx_id
from spdx_tools.spdx.validation.spdx_id_validators import validate_spdx_id, get_all_spdx_ids
from spdx_tools.spdx.validation.uri_validators import validate_download_location, validate_url
from spdx_tools.spdx.validation.validation_message import SpdxElementType, ValidationContext, ValidationMessage

Expand All @@ -23,9 +23,10 @@ def validate_packages(
packages: List[Package], spdx_version: str, document: Optional[Document] = None
) -> List[ValidationMessage]:
validation_messages: List[ValidationMessage] = []
all_spdx_ids: Set[str] = get_all_spdx_ids(document)
if document:
for package in packages:
validation_messages.extend(validate_package_within_document(package, spdx_version, document))
validation_messages.extend(validate_package_within_document(package, spdx_version, document, all_spdx_ids))
else:
for package in packages:
validation_messages.extend(validate_package(package, spdx_version))
Expand All @@ -34,7 +35,7 @@ def validate_packages(


def validate_package_within_document(
package: Package, spdx_version: str, document: Document
package: Package, spdx_version: str, document: Document, all_spdx_ids: Set[str]
) -> List[ValidationMessage]:
validation_messages: List[ValidationMessage] = []
context = ValidationContext(
Expand All @@ -44,7 +45,7 @@ def validate_package_within_document(
full_element=package,
)

for message in validate_spdx_id(package.spdx_id, document):
for message in validate_spdx_id(package.spdx_id, document, all_spdx_ids):
validation_messages.append(ValidationMessage(message, context))

if not package.files_analyzed:
Expand Down
16 changes: 10 additions & 6 deletions src/spdx_tools/spdx/validation/relationship_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,37 +2,41 @@
#
# SPDX-License-Identifier: Apache-2.0

from beartype.typing import List
from beartype.typing import List, Set

from spdx_tools.spdx.model import Document, Relationship, RelationshipType, SpdxNoAssertion, SpdxNone
from spdx_tools.spdx.validation.spdx_id_validators import validate_spdx_id
from spdx_tools.spdx.validation.spdx_id_validators import validate_spdx_id, get_all_spdx_ids
from spdx_tools.spdx.validation.validation_message import SpdxElementType, ValidationContext, ValidationMessage


def validate_relationships(
relationships: List[Relationship], spdx_version: str, document: Document
) -> List[ValidationMessage]:
validation_messages = []

all_spdx_ids: Set[str] = get_all_spdx_ids(document)

for relationship in relationships:
validation_messages.extend(validate_relationship(relationship, spdx_version, document))
validation_messages.extend(validate_relationship(relationship, spdx_version, document, all_spdx_ids))

return validation_messages


def validate_relationship(
relationship: Relationship, spdx_version: str, document: Document
relationship: Relationship, spdx_version: str, document: Document, all_spdx_ids: Set[str],
) -> List[ValidationMessage]:
validation_messages = []
context = ValidationContext(element_type=SpdxElementType.RELATIONSHIP, full_element=relationship)

relationship_type: RelationshipType = relationship.relationship_type

messages: List[str] = validate_spdx_id(relationship.spdx_element_id, document, check_document=True)
messages: List[str] = validate_spdx_id(relationship.spdx_element_id, document, all_spdx_ids, check_document=True)

for message in messages:
validation_messages.append(ValidationMessage(message, context))

if relationship.related_spdx_element_id not in [SpdxNone(), SpdxNoAssertion()]:
messages: List[str] = validate_spdx_id(relationship.related_spdx_element_id, document, check_document=True)
messages: List[str] = validate_spdx_id(relationship.related_spdx_element_id, document, all_spdx_ids, check_document=True)
for message in messages:
validation_messages.append(ValidationMessage(message, context))

Expand Down
15 changes: 8 additions & 7 deletions src/spdx_tools/spdx/validation/snippet_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,34 @@
#
# SPDX-License-Identifier: Apache-2.0

from beartype.typing import List, Optional
from beartype.typing import List, Optional, Set

from spdx_tools.spdx.model import Document, Snippet
from spdx_tools.spdx.validation.license_expression_validator import (
validate_license_expression,
validate_license_expressions,
)
from spdx_tools.spdx.validation.spdx_id_validators import validate_spdx_id
from spdx_tools.spdx.validation.spdx_id_validators import validate_spdx_id, get_all_spdx_ids
from spdx_tools.spdx.validation.validation_message import SpdxElementType, ValidationContext, ValidationMessage


def validate_snippets(
snippets: List[Snippet], spdx_version: str, document: Optional[Document] = None
) -> List[ValidationMessage]:
validation_messages = []
all_spdx_ids: Set[str] = get_all_spdx_ids(document)
if document:
for snippet in snippets:
validation_messages.extend(validate_snippet_within_document(snippet, spdx_version, document))
validation_messages.extend(validate_snippet_within_document(snippet, spdx_version, document, all_spdx_ids))
else:
for snippet in snippets:
validation_messages.extend(validate_snippet(snippet, spdx_version))
validation_messages.extend(validate_snippet(snippet, spdx_version, all_spdx_ids))

return validation_messages


def validate_snippet_within_document(
snippet: Snippet, spdx_version: str, document: Document
snippet: Snippet, spdx_version: str, document: Document, all_spdx_ids: Set[str]
) -> List[ValidationMessage]:
validation_messages: List[ValidationMessage] = []
context = ValidationContext(
Expand All @@ -38,11 +39,11 @@ def validate_snippet_within_document(
full_element=snippet,
)

messages: List[str] = validate_spdx_id(snippet.spdx_id, document)
messages: List[str] = validate_spdx_id(snippet.spdx_id, document, all_spdx_ids)
for message in messages:
validation_messages.append(ValidationMessage(message, context))

messages: List[str] = validate_spdx_id(snippet.file_spdx_id, document, check_files=True)
messages: List[str] = validate_spdx_id(snippet.file_spdx_id, document, all_spdx_ids, check_files=True)
for message in messages:
validation_messages.append(ValidationMessage(message, context))

Expand Down
12 changes: 8 additions & 4 deletions src/spdx_tools/spdx/validation/spdx_id_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import re

from beartype.typing import List
from beartype.typing import List, Set

from spdx_tools.spdx.document_utils import get_contained_spdx_element_ids
from spdx_tools.spdx.model import Document, File
Expand All @@ -23,11 +23,15 @@ def is_spdx_id_present_in_files(spdx_id: str, files: List[File]) -> bool:


def is_spdx_id_present_in_document(spdx_id: str, document: Document) -> bool:
all_spdx_ids_in_document: List[str] = get_list_of_all_spdx_ids(document)
all_spdx_ids_in_document: Set[str] = get_all_spdx_ids(document)

return spdx_id in all_spdx_ids_in_document


def get_all_spdx_ids(document: Document) -> Set[str]:
return set(get_list_of_all_spdx_ids(document))


def get_list_of_all_spdx_ids(document: Document) -> List[str]:
all_spdx_ids_in_document: List[str] = [document.creation_info.spdx_id]
all_spdx_ids_in_document.extend(get_contained_spdx_element_ids(document))
Expand All @@ -44,7 +48,7 @@ def is_external_doc_ref_present_in_document(external_ref_id: str, document: Docu


def validate_spdx_id(
spdx_id: str, document: Document, check_document: bool = False, check_files: bool = False
spdx_id: str, document: Document, all_spdx_ids: Set[str], check_document: bool = False, check_files: bool = False

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should you not also have an files_spdx_ids: Set[str] argument for the check_files case? My SPDX file has over 70K entries. The check_document seems redundant here, since you could simply make all_spdx_ids: Optional[Set[str]] instead and go by whether it was passed as None or not.

) -> List[str]:
"""Test that the given spdx_id (and a potential DocumentRef to an external document) is valid
and, if it is a reference, actually exists in the document. Optionally checks files or the whole document
Expand Down Expand Up @@ -87,7 +91,7 @@ def validate_spdx_id(
)

if check_document:
if not is_spdx_id_present_in_document(spdx_id, document):
if not spdx_id in all_spdx_ids:
validation_messages.append(f'did not find the referenced spdx_id "{spdx_id}" in the SPDX document')

if check_files:
Expand Down
5 changes: 3 additions & 2 deletions tests/spdx/validation/test_annotation_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@

from spdx_tools.spdx.model import Annotation, Document
from spdx_tools.spdx.validation.annotation_validator import validate_annotation
from spdx_tools.spdx.validation.spdx_id_validators import get_all_spdx_ids
from spdx_tools.spdx.validation.validation_message import SpdxElementType, ValidationContext, ValidationMessage
from tests.spdx.fixtures import annotation_fixture, document_fixture, file_fixture


def test_valid_annotation():
validation_messages: List[ValidationMessage] = validate_annotation(annotation_fixture(), document_fixture())
validation_messages: List[ValidationMessage] = validate_annotation(annotation_fixture(), document_fixture(), get_all_spdx_ids(document_fixture()))

assert validation_messages == []

Expand All @@ -31,7 +32,7 @@ def test_valid_annotation():
def test_invalid_annotation(annotation_id, file_id, expected_message):
annotation: Annotation = annotation_fixture(spdx_id=annotation_id)
document: Document = document_fixture(files=[file_fixture(spdx_id=file_id)])
validation_messages: List[ValidationMessage] = validate_annotation(annotation, document)
validation_messages: List[ValidationMessage] = validate_annotation(annotation, document, get_all_spdx_ids(document))

expected = ValidationMessage(
expected_message, ValidationContext(element_type=SpdxElementType.ANNOTATION, full_element=annotation)
Expand Down
Loading
Loading