Skip to content
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,6 @@ nosetests.xml
.venv

.idea
src/scielo-scholarly-data

# Source packages
src/
151 changes: 150 additions & 1 deletion packtools/sps/validation/graphic.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,154 @@
import os
from packtools.sps.validation.visual_resource_base import VisualResourceBaseValidation
from packtools.sps.validation.utils import build_response
from packtools.sps.models.graphic import XmlGraphic


class GraphicValidation(VisualResourceBaseValidation):
...
"""
Validation class for <graphic> and <inline-graphic> elements according to SPS 1.10.

Validates:
- @id attribute (required for both <graphic> and <inline-graphic>)
- @xlink:href attribute (required, with valid file extension)
- File extensions (.jpg, .jpeg, .png, .tif, .tiff, .svg)
- .svg only allowed inside <alternatives>

Note: Accessibility validation (<alt-text>, <long-desc>) is handled separately
by XMLAccessibilityDataValidation in the validation pipeline to avoid duplicates.
"""

def validate(self):
"""Execute all validations for graphic/inline-graphic elements."""
yield self.validate_id()
yield self.validate_xlink_href()
yield from self.validate_svg_in_alternatives()
Comment on lines +21 to +25
Copy link

Copilot AI Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GraphicValidation.validate() calls the inherited validate_xlink_href(), but VisualResourceBaseValidation.validate_xlink_href() uses os.path.splitext(xlink_href) without guarding for missing @xlink:href. If xlink_href is None (attribute absent), this will raise TypeError and break the whole validation pipeline instead of reporting a CRITICAL/ERROR result. Consider overriding validate_xlink_href() in GraphicValidation (or fixing the base method) to (1) validate presence of @xlink:href and (2) only run extension validation when a value exists.

Copilot uses AI. Check for mistakes.
# Note: Accessibility validation is handled by the dedicated XMLAccessibilityDataValidation
# in the pipeline to avoid duplicate validation entries in reports

def validate_id(self):
"""
Validate @id attribute is present in <graphic> and <inline-graphic>.

Per SPS 1.10 specification, @id is required for both <graphic> and
<inline-graphic> elements. This overrides the base class behavior which
exempts inline-* elements.
"""
xml = self.data.get("xml")
tag = self.data.get("tag")
id_value = self.data.get("id")

valid = bool(id_value)
elem = xml[:xml.find(">")+1] if xml else tag

return build_response(
title="@id",
parent=self.data,
item=tag,
sub_item=None,
is_valid=valid,
validation_type="exist",
expected="@id attribute",
obtained=id_value,
advice=f'Add id="" to {elem}' if not valid else None,
error_level=self.params["media_attributes_error_level"],
data=self.data,
)

def validate_xlink_href(self):
"""
Override to validate @xlink:href presence before delegating to the base
class extension check.

The inherited method calls os.path.splitext(xlink_href) unconditionally,
which raises TypeError when the attribute is absent. This override:
1. Returns an ERROR response when @xlink:href is missing.
2. Delegates to the base implementation when the attribute is present,
so extension validation runs normally.
"""
xlink_href = self.data.get("xlink_href")
if not xlink_href:
return build_response(
title="@xlink:href",
parent=self.data,
item=self.data.get("tag"),
sub_item=None,
is_valid=False,
validation_type="exist",
expected="@xlink:href attribute with valid file extension",
obtained=None,
advice=(
f'Add xlink:href="filename.ext" to '
f'<{self.data.get("tag")}> '
f'(valid extensions: jpg, jpeg, png, tif, tiff, svg)'
),
error_level=self.params["xlink_href_error_level"],
data=self.data,
)
return super().validate_xlink_href()

def validate_svg_in_alternatives(self):
"""
Validate that .svg extension is only used when <graphic> is inside <alternatives>.

Per SPS 1.10 specification:
- .svg files are only allowed when the graphic is inside <alternatives>
- Other formats (.jpg, .jpeg, .png, .tif, .tiff) can be used anywhere

Yields:
dict: Validation response
"""
xlink_href = self.data.get("xlink_href")
parent_tag = self.data.get("parent_tag")

if not xlink_href:
return

_, ext = os.path.splitext(xlink_href)
ext = ext.lower()

if ext == ".svg":
is_valid = parent_tag == "alternatives"

yield build_response(
title="SVG in alternatives",
parent=self.data,
item=self.data.get("tag"),
sub_item="xlink_href",
is_valid=is_valid,
validation_type="format",
expected="<graphic> with .svg extension inside <alternatives>",
obtained=f"{self.data.get('tag')} with .svg inside <{parent_tag}>",
advice=(
f"SVG files are only allowed inside <alternatives>. "
f"The file '{xlink_href}' is currently in <{parent_tag}>. "
f"Either move this <graphic> inside <alternatives> or use a "
f"different format (.jpg, .png, .tif)."
) if not is_valid else None,
error_level=self.params.get("svg_error_level", "ERROR"),
data=self.data,
)


class XMLGraphicValidation:
"""
Validates all <graphic> and <inline-graphic> elements in an XML document.

This class follows the same pattern as XMLMediaValidation and
XMLAccessibilityDataValidation. It iterates through all graphic elements
found in the document and validates each one.
"""

def __init__(self, xmltree, params):
self.params = params
self.xml_graphic = XmlGraphic(xmltree)

def validate(self):
"""
Validate all graphic and inline-graphic elements in the document.

Yields validation results for each graphic element found.
"""
for data in self.xml_graphic.data:
validator = GraphicValidation(data, self.params)
yield from validator.validate()
21 changes: 20 additions & 1 deletion packtools/sps/validation/xml_validations.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@

from packtools.sps.validation.supplementary_material import XmlSupplementaryMaterialValidation
from packtools.sps.validation.ext_link import ExtLinkValidation
from packtools.sps.validation.graphic import XMLGraphicValidation


def validate_affiliations(xmltree, params):
Expand Down Expand Up @@ -309,7 +310,7 @@ def validate_supplementary_materials(xmltree, params):
def validate_ext_links(xmltree, params):
"""
Validates ext-link elements according to SPS 1.10 specification.

Validates:
- Mandatory attributes (@ext-link-type, @xlink:href)
- URL format (must start with http:// or https://)
Expand All @@ -325,3 +326,21 @@ def validate_ext_links(xmltree, params):
yield from validator.validate_ext_link_type_value()
yield from validator.validate_descriptive_text()
yield from validator.validate_xlink_title_when_generic()


def validate_graphics(xmltree, params):
"""
Validates <graphic> and <inline-graphic> elements according to SPS 1.10 specification.

Validates:
- @id attribute (required for both <graphic> and <inline-graphic>)
- @xlink:href attribute (required, with valid file extension)
- File extensions (.jpg, .jpeg, .png, .tif, .tiff, .svg)
- .svg only allowed inside <alternatives>

Note: Accessibility validation (<alt-text>, <long-desc>) is handled separately
by validate_accessibility_data() via XMLAccessibilityDataValidation.
"""
Comment on lines +332 to +343
Copy link

Copilot AI Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The docstring for validate_graphics() says it validates accessibility elements (<alt-text>, <long-desc>), but XMLGraphicValidation/GraphicValidation no longer runs accessibility validation (it’s handled by XMLAccessibilityDataValidation). Update the docstring to avoid misleading consumers about what the graphic group covers.

Copilot uses AI. Check for mistakes.
graphic_rules = params["graphic_rules"]
validator = XMLGraphicValidation(xmltree, graphic_rules)
yield from validator.validate()
4 changes: 4 additions & 0 deletions packtools/sps/validation/xml_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,3 +149,7 @@ def validate_xml_content(xmltree, rules):
"group": "ext-link",
"items": xml_validations.validate_ext_links(xmltree, params),
}
yield {
"group": "graphic",
"items": xml_validations.validate_graphics(xmltree, params),
}
8 changes: 8 additions & 0 deletions packtools/sps/validation_rules/graphic_rules.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"graphic_rules": {
"media_attributes_error_level": "CRITICAL",
"xlink_href_error_level": "ERROR",
"valid_extension": ["jpg", "jpeg", "png", "tif", "tiff", "svg"],
"svg_error_level": "ERROR"
}
}
Loading