From 7c43c7f23e72d170633fc217cb7990642ddca667 Mon Sep 17 00:00:00 2001
From: Douglas Lowe <10961945+douglowe@users.noreply.github.com>
Date: Mon, 12 Jan 2026 14:50:52 +0000
Subject: [PATCH 01/15] profile path optional input for rocrate validation task
---
app/tasks/validation_tasks.py | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/app/tasks/validation_tasks.py b/app/tasks/validation_tasks.py
index 0a62b55..74cb265 100644
--- a/app/tasks/validation_tasks.py
+++ b/app/tasks/validation_tasks.py
@@ -158,7 +158,7 @@ def process_validation_task_by_metadata(
def perform_ro_crate_validation(
- file_path: str, profile_name: str | None, skip_checks_list: Optional[list] = None
+ file_path: str, profile_name: str | None, skip_checks_list: Optional[list] = None, profiles_path: Optional[str] = None
) -> ValidationResult | str:
"""
Validates an RO-Crate using the provided file path and profile name.
@@ -166,6 +166,7 @@ def perform_ro_crate_validation(
:param file_path: The path to the RO-Crate file to validate
:param profile_name: The name of the validation profile to use. Defaults to None. If None, the CRS4 validator will
attempt to determine the profile.
+ :param profiles_path: The path to the profiles definition directory
:param skip_checks_list: A list of checks to skip, if needed
:return: The validation result.
:raises Exception: If an error occurs during the validation process.
@@ -183,7 +184,8 @@ def perform_ro_crate_validation(
settings = services.ValidationSettings(
rocrate_uri=full_file_path,
**({"profile_identifier": profile_name} if profile_name else {}),
- **({"skip_checks": skip_checks_list} if skip_checks_list else {})
+ **({"skip_checks": skip_checks_list} if skip_checks_list else {}),
+ **({"profiles_path": profiles_path} if profiles_path else {})
)
return services.validate(settings)
From afa0354321f14e897e4c342f4eca6aee3df5e77b Mon Sep 17 00:00:00 2001
From: Douglas Lowe <10961945+douglowe@users.noreply.github.com>
Date: Mon, 19 Jan 2026 14:02:11 +0000
Subject: [PATCH 02/15] clean config class, use for celery app, add
profiles_path
---
app/utils/config.py | 31 ++++++++++++++++---------------
1 file changed, 16 insertions(+), 15 deletions(-)
diff --git a/app/utils/config.py b/app/utils/config.py
index a57b63f..28e71ae 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -10,34 +10,32 @@
from flask import Flask
+def get_env(name: str, default=None, required=False):
+ value = os.environ.get(name, default)
+ if required and value is None:
+ raise RuntimeError(f"Missing required environment variable: {name}")
+ return value
+
+
class Config:
"""Base configuration class for the Flask application."""
- SECRET_KEY = os.getenv("SECRET_KEY", "my_precious")
-
# Celery configuration:
- CELERY_BROKER_URL = os.getenv("CELERY_BROKER_URL")
- CELERY_RESULT_BACKEND = os.getenv("CELERY_RESULT_BACKEND")
+ CELERY_BROKER_URL = get_env("CELERY_BROKER_URL", required=False)
+ CELERY_RESULT_BACKEND = get_env("CELERY_RESULT_BACKEND", required=False)
- # MinIO configuration:
- MINIO_ENDPOINT = os.getenv("MINIO_ENDPOINT")
- MINIO_ACCESS_KEY = os.getenv("MINIO_ACCESS_KEY")
- MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY")
- MINIO_BUCKET_NAME = os.getenv("MINIO_BUCKET_NAME", "bucket-name")
+ # rocrate validator configuration:
+ PROFILES_PATH = get_env("PROFILES_PATH", required=False)
class DevelopmentConfig(Config):
"""Development configuration class."""
-
DEBUG = True
- ENV = "development"
class ProductionConfig(Config):
"""Production configuration class."""
-
DEBUG = False
- ENV = "production"
class InvalidAPIUsage(Exception):
@@ -63,10 +61,13 @@ def make_celery(app: Flask = None) -> Celery:
:param app: The Flask application to use.
:return: The Celery instance.
"""
+ env = os.environ.get("FLASK_ENV", "development")
+ config_cls = ProductionConfig if env == "production" else DevelopmentConfig
+
celery = Celery(
app.import_name if app else __name__,
- broker=os.getenv("CELERY_BROKER_URL"),
- backend=os.getenv("CELERY_RESULT_BACKEND"),
+ broker=config_cls.CELERY_BROKER_URL,
+ backend=config_cls.CELERY_RESULT_BACKEND,
)
if app:
From e2a030ac09b074c7a410609882652514f631948c Mon Sep 17 00:00:00 2001
From: Douglas Lowe <10961945+douglowe@users.noreply.github.com>
Date: Mon, 19 Jan 2026 14:02:57 +0000
Subject: [PATCH 03/15] provide route for passing profiles_path to rocrate
validator call
---
app/ro_crates/routes/post_routes.py | 7 +++++--
app/services/validation_service.py | 6 ++++--
app/tasks/validation_tasks.py | 5 +++--
3 files changed, 12 insertions(+), 6 deletions(-)
diff --git a/app/ro_crates/routes/post_routes.py b/app/ro_crates/routes/post_routes.py
index c1ebcdb..2c517f4 100644
--- a/app/ro_crates/routes/post_routes.py
+++ b/app/ro_crates/routes/post_routes.py
@@ -7,7 +7,7 @@
from apiflask import APIBlueprint, Schema
from apiflask.fields import String, Boolean
from marshmallow.fields import Nested
-from flask import Response
+from flask import Response, current_app
from app.services.validation_service import (
queue_ro_crate_validation_task,
@@ -81,7 +81,10 @@ def validate_ro_crate_via_id(json_data, crate_id) -> tuple[Response, int]:
else:
profile_name = None
- return queue_ro_crate_validation_task(minio_config, crate_id, root_path, profile_name, webhook_url)
+ profiles_path = current_app.config["PROFILES_PATH"]
+
+ return queue_ro_crate_validation_task(minio_config, crate_id, root_path, profile_name,
+ webhook_url, profiles_path)
@post_routes_bp.post("/validate_metadata")
diff --git a/app/services/validation_service.py b/app/services/validation_service.py
index 67dde94..b51a088 100644
--- a/app/services/validation_service.py
+++ b/app/services/validation_service.py
@@ -25,7 +25,8 @@
def queue_ro_crate_validation_task(
- minio_config, crate_id, root_path=None, profile_name=None, webhook_url=None
+ minio_config, crate_id, root_path=None, profile_name=None, webhook_url=None,
+ profiles_path=None
) -> tuple[Response, int]:
"""
Queues an RO-Crate for validation with Celery.
@@ -51,7 +52,8 @@ def queue_ro_crate_validation_task(
raise InvalidAPIUsage(f"No RO-Crate with prefix: {crate_id}", 400)
try:
- process_validation_task_by_id.delay(minio_config, crate_id, root_path, profile_name, webhook_url)
+ process_validation_task_by_id.delay(minio_config, crate_id, root_path,
+ profile_name, webhook_url, profiles_path)
return jsonify({"message": "Validation in progress"}), 202
except Exception as e:
diff --git a/app/tasks/validation_tasks.py b/app/tasks/validation_tasks.py
index 74cb265..e27c46c 100644
--- a/app/tasks/validation_tasks.py
+++ b/app/tasks/validation_tasks.py
@@ -29,7 +29,8 @@
@celery.task
def process_validation_task_by_id(
- minio_config: dict, crate_id: str, root_path: str, profile_name: str | None, webhook_url: str | None
+ minio_config: dict, crate_id: str, root_path: str, profile_name: str | None,
+ webhook_url: str | None, profiles_path: str | None
) -> None:
"""
Background task to process the RO-Crate validation by ID.
@@ -56,7 +57,7 @@ def process_validation_task_by_id(
logging.info(f"Processing validation task for {file_path}")
# Perform validation:
- validation_result = perform_ro_crate_validation(file_path, profile_name)
+ validation_result = perform_ro_crate_validation(file_path, profile_name, profiles_path=profiles_path)
if isinstance(validation_result, str):
logging.error(f"Validation failed: {validation_result}")
From 96955ff02e92cb5bed622b6f6f652bdb778473f7 Mon Sep 17 00:00:00 2001
From: Douglas Lowe <10961945+douglowe@users.noreply.github.com>
Date: Mon, 19 Jan 2026 14:03:40 +0000
Subject: [PATCH 04/15] update tests for profile_paths variable
---
tests/test_api_routes.py | 31 +++++++++++++++++++++----------
tests/test_services.py | 17 +++++++++++------
tests/test_validation_tasks.py | 28 ++++++++++++++--------------
3 files changed, 46 insertions(+), 30 deletions(-)
diff --git a/tests/test_api_routes.py b/tests/test_api_routes.py
index f527501..e50b511 100644
--- a/tests/test_api_routes.py
+++ b/tests/test_api_routes.py
@@ -13,7 +13,7 @@ def client():
# Test POST API: /v1/ro_crates/{crate_id}/validation
@pytest.mark.parametrize(
- "crate_id, payload, status_code, response_json",
+ "crate_id, payload, profiles_path, status_code, response_json",
[
(
"crate-123", {
@@ -27,7 +27,9 @@ def client():
"root_path": "base_path",
"webhook_url": "https://webhook.example.com",
"profile_name": "default"
- }, 202, {"message": "Validation in progress"}
+ },
+ None,
+ 202, {"message": "Validation in progress"}
),
(
"crate-123", {
@@ -38,9 +40,11 @@ def client():
"ssl": False,
"bucket": "test_bucket"
},
- "root_path": "base_path",
+ "root_path": "base_path",
"webhook_url": "https://webhook.example.com",
- }, 202, {"message": "Validation in progress"}
+ },
+ None,
+ 202, {"message": "Validation in progress"}
),
(
"crate-123", {
@@ -51,9 +55,11 @@ def client():
"ssl": False,
"bucket": "test_bucket"
},
- "root_path": "base_path",
+ "root_path": "base_path",
"profile_name": "default"
- }, 202, {"message": "Validation in progress"}
+ },
+ None,
+ 202, {"message": "Validation in progress"}
),
(
"crate-123", {
@@ -66,7 +72,9 @@ def client():
},
"webhook_url": "https://webhook.example.com",
"profile_name": "default"
- }, 202, {"message": "Validation in progress"}
+ },
+ None,
+ 202, {"message": "Validation in progress"}
),
(
"crate-123", {
@@ -77,14 +85,17 @@ def client():
"ssl": False,
"bucket": "test_bucket"
},
- }, 202, {"message": "Validation in progress"}
+ },
+ None,
+ 202, {"message": "Validation in progress"}
),
],
ids=["validate_by_id", "validate_with_missing_profile_name",
"validate_with_missing_webhook_url", "validate_with_missing_root_path",
"validate_with_missing_root_path_and_profile_name_and_webhook_url"]
)
-def test_validate_by_id_success(client: FlaskClient, crate_id: str, payload: dict, status_code: int, response_json: dict):
+def test_validate_by_id_success(client: FlaskClient, crate_id: str, payload: dict,
+ profiles_path: str, status_code: int, response_json: dict):
with patch("app.ro_crates.routes.post_routes.queue_ro_crate_validation_task") as mock_queue:
mock_queue.return_value = (response_json, status_code)
@@ -96,7 +107,7 @@ def test_validate_by_id_success(client: FlaskClient, crate_id: str, payload: dic
webhook_url = payload["webhook_url"] if "webhook_url" in payload else None
assert response.status_code == status_code
assert response.json == response_json
- mock_queue.assert_called_once_with(minio_config, crate_id, root_path, profile_name, webhook_url)
+ mock_queue.assert_called_once_with(minio_config, crate_id, root_path, profile_name, webhook_url, profiles_path)
@pytest.mark.parametrize(
diff --git a/tests/test_services.py b/tests/test_services.py
index c7d50c3..ccebeba 100644
--- a/tests/test_services.py
+++ b/tests/test_services.py
@@ -22,7 +22,7 @@ def flask_app():
# Test function: queue_ro_crate_validation_task
@pytest.mark.parametrize(
- "crate_id, rocrate_exists, minio_client, delay_side_effects, payload, status_code, response_dict",
+ "crate_id, rocrate_exists, minio_client, delay_side_effects, payload, profiles_path, status_code, response_dict",
[
(
"crate123", True, "minio_client", None,
@@ -37,7 +37,9 @@ def flask_app():
"root_path": "base_path",
"webhook_url": "https://webhook.example.com",
"profile_name": "default"
- }, 202, {"message": "Validation in progress"}
+ },
+ None,
+ 202, {"message": "Validation in progress"}
),
(
"crate123", True, "minio_client", Exception("Celery down"),
@@ -52,7 +54,9 @@ def flask_app():
"root_path": "base_path",
"webhook_url": "https://webhook.example.com",
"profile_name": "default"
- }, 500, {"error": "Celery down"}
+ },
+ None,
+ 500, {"error": "Celery down"}
),
],
ids=["successful_queue", "celery_server_down"]
@@ -65,7 +69,7 @@ def test_queue_ro_crate_validation_task(
mock_exists,
mock_delay,
flask_app: FlaskClient, crate_id: str, rocrate_exists: bool, minio_client: str,
- delay_side_effects: Exception, payload: dict, status_code: int, response_dict: dict
+ delay_side_effects: Exception, payload: dict, profiles_path: str, status_code: int, response_dict: dict
):
mock_delay.side_effect = delay_side_effects
mock_exists.return_value = rocrate_exists
@@ -76,11 +80,12 @@ def test_queue_ro_crate_validation_task(
profile_name = payload["profile_name"] if "profile_name" in payload else None
webhook_url = payload["webhook_url"] if "webhook_url" in payload else None
- response, status_code = queue_ro_crate_validation_task(minio_config, crate_id, root_path, profile_name, webhook_url)
+ response, status_code = queue_ro_crate_validation_task(minio_config, crate_id, root_path,
+ profile_name, webhook_url, profiles_path)
mock_client.assert_called_once_with(minio_config)
mock_exists.assert_called_once_with(minio_client, minio_config["bucket"], crate_id, root_path)
- mock_delay.assert_called_once_with(minio_config, crate_id, root_path, profile_name, webhook_url)
+ mock_delay.assert_called_once_with(minio_config, crate_id, root_path, profile_name, webhook_url, profiles_path)
assert status_code == status_code
assert response.json == response_dict
diff --git a/tests/test_validation_tasks.py b/tests/test_validation_tasks.py
index afa11c2..49c3fed 100644
--- a/tests/test_validation_tasks.py
+++ b/tests/test_validation_tasks.py
@@ -17,7 +17,7 @@
@pytest.mark.parametrize(
"minio_config, crate_id, os_path_exists, os_path_isfile, os_path_isdir, " +
- "return_value, webhook, profile, val_success, val_result, minio_client",
+ "return_value, webhook, profile, profiles_path, val_success, val_result, minio_client",
[
(
{
@@ -28,7 +28,7 @@
"bucket": "test_bucket"
},
"crate123", True, True, False, "/tmp/crate.zip",
- "https://example.com/hook", "profileA", True, '{"status": "valid"}',
+ "https://example.com/hook", "profileA", None, True, '{"status": "valid"}',
"minio_client"
),
(
@@ -40,7 +40,7 @@
"bucket": "test_bucket"
},
"crate123", True, False, True, "/tmp/crate123",
- "https://example.com/hook", "profileA", True, '{"status": "valid"}',
+ "https://example.com/hook", "profileA", None, True, '{"status": "valid"}',
"minio_client"
),
(
@@ -52,7 +52,7 @@
"bucket": "test_bucket"
},
"crate123", True, False, True, "/tmp/crate123",
- None, "profileA", True, '{"status": "valid"}',
+ None, "profileA", None, True, '{"status": "valid"}',
"minio_client"
),
],
@@ -80,7 +80,7 @@ def test_process_validation(
mock_rmtree,
mock_client,
minio_config: dict, crate_id: str, os_path_exists: bool, os_path_isfile: bool, os_path_isdir: bool,
- return_value: str, webhook: str, profile: str, val_success: bool, val_result: str, minio_client: str
+ return_value: str, webhook: str, profile: str, profiles_path: str, val_success: bool, val_result: str, minio_client: str
):
mock_exists.return_value = os_path_exists
mock_isfile.return_value = os_path_isfile
@@ -93,11 +93,11 @@ def test_process_validation(
mock_validation_result.to_json.return_value = val_result
mock_validate.return_value = mock_validation_result
- process_validation_task_by_id(minio_config, crate_id, "", profile, webhook)
+ process_validation_task_by_id(minio_config, crate_id, "", profile, webhook, profiles_path)
mock_client.assert_called_once_with(minio_config)
mock_fetch.assert_called_once_with(minio_client, minio_config["bucket"], crate_id, "")
- mock_validate.assert_called_once_with(return_value, profile)
+ mock_validate.assert_called_once_with(return_value, profile, profiles_path=profiles_path)
mock_update.assert_called_once_with(minio_client, minio_config["bucket"], crate_id, "", val_result)
if webhook is not None:
mock_webhook.assert_called_once_with(webhook, val_result)
@@ -113,7 +113,7 @@ def test_process_validation(
@pytest.mark.parametrize(
"minio_config, crate_id, os_path_exists, os_path_isfile, os_path_isdir, return_fetch, "
- + "webhook, profile, return_validate, validate_side_effect, fetch_side_effect, minio_client",
+ + "webhook, profile, profiles_path, return_validate, validate_side_effect, fetch_side_effect, minio_client",
[
(
{
@@ -124,7 +124,7 @@ def test_process_validation(
"bucket": "test_bucket"
},
"crate123", True, True, False, "/tmp/crate.zip",
- "https://example.com/hook", "profileA", "Validation failed", None, None,
+ "https://example.com/hook", "profileA", None, "Validation failed", None, None,
"minio_client"
),
(
@@ -136,7 +136,7 @@ def test_process_validation(
"bucket": "test_bucket"
},
"crate123", True, True, False, "/tmp/crate.zip",
- "https://example.com/hook", "profileA", None, Exception("Unexpected error"), None,
+ "https://example.com/hook", "profileA", None, None, Exception("Unexpected error"), None,
"minio_client"
),
(
@@ -148,7 +148,7 @@ def test_process_validation(
"bucket": "test_bucket"
},
"crate123", False, False, False, None,
- "https://example.com/hook", "profileA", None, None, Exception("MinIO fetch failed"),
+ "https://example.com/hook", "profileA", None, None, None, Exception("MinIO fetch failed"),
"minio_client"
),
],
@@ -177,7 +177,7 @@ def test_process_validation_failure(
mock_rmtree,
mock_client,
minio_config: dict, crate_id: str, os_path_exists: bool, os_path_isfile: bool, os_path_isdir: bool,
- return_fetch: str, webhook: str, profile: str, return_validate: str,
+ return_fetch: str, webhook: str, profile: str, profiles_path: str, return_validate: str,
validate_side_effect: Exception, fetch_side_effect: Exception, minio_client: str
):
mock_exists.return_value = os_path_exists
@@ -195,10 +195,10 @@ def test_process_validation_failure(
else:
mock_validate.side_effect = validate_side_effect
- process_validation_task_by_id(minio_config, crate_id, "", profile, webhook)
+ process_validation_task_by_id(minio_config, crate_id, "", profile, webhook, profiles_path)
if fetch_side_effect is None:
- mock_validate.assert_called_once_with(return_fetch, profile)
+ mock_validate.assert_called_once_with(return_fetch, profile, profiles_path=profiles_path)
else:
mock_validate.assert_not_called()
From 435199af3260ddff7d2cf32df6f8e5453ee24dd9 Mon Sep 17 00:00:00 2001
From: Douglas Lowe <10961945+douglowe@users.noreply.github.com>
Date: Mon, 19 Jan 2026 15:00:53 +0000
Subject: [PATCH 05/15] switch to extra_profiles_path option for validator
additional profiles
---
app/tasks/validation_tasks.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/app/tasks/validation_tasks.py b/app/tasks/validation_tasks.py
index e27c46c..3b178e5 100644
--- a/app/tasks/validation_tasks.py
+++ b/app/tasks/validation_tasks.py
@@ -186,7 +186,7 @@ def perform_ro_crate_validation(
rocrate_uri=full_file_path,
**({"profile_identifier": profile_name} if profile_name else {}),
**({"skip_checks": skip_checks_list} if skip_checks_list else {}),
- **({"profiles_path": profiles_path} if profiles_path else {})
+ **({"extra_profiles_path": profiles_path} if profiles_path else {})
)
return services.validate(settings)
From ce29cde62f86a28e37f80ca7d21d9af97760eba8 Mon Sep 17 00:00:00 2001
From: Douglas Lowe <10961945+douglowe@users.noreply.github.com>
Date: Tue, 20 Jan 2026 17:34:28 +0000
Subject: [PATCH 06/15] docker compose profile loading example
---
docker-compose-develop.yml | 3 +++
1 file changed, 3 insertions(+)
diff --git a/docker-compose-develop.yml b/docker-compose-develop.yml
index fa7661d..e6b6a0e 100644
--- a/docker-compose-develop.yml
+++ b/docker-compose-develop.yml
@@ -32,9 +32,12 @@ services:
- MINIO_ROOT_USER=${MINIO_ROOT_USER}
- MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD}
- MINIO_BUCKET_NAME=${MINIO_BUCKET_NAME}
+ - PROFILES_PATH=/app/profiles
depends_on:
- redis
- minio
+ volumes:
+ - ./tests/data/rocrate_validator_profiles:/app/profiles:ro
redis:
image: "redis:alpine"
From 18b88844c4644765a41b98ae0ead7f0cde9938d6 Mon Sep 17 00:00:00 2001
From: Douglas Lowe <10961945+douglowe@users.noreply.github.com>
Date: Tue, 20 Jan 2026 17:35:08 +0000
Subject: [PATCH 07/15] integration test for providing extra profile for
validation
---
tests/test_integration.py | 74 +++++++++++++++++++++++++++++++++++++++
1 file changed, 74 insertions(+)
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 1e90a1a..63941c4 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -350,6 +350,80 @@ def test_directory_rocrate_validation():
assert response_result["passed"] is False
+def test_extra_profile_rocrate_validation():
+ ro_crate = "ro_crate_2"
+ profile_name = "alpha-crate-0.1"
+ url_post = f"http://localhost:5001/v1/ro_crates/{ro_crate}/validation"
+ url_get = f"http://localhost:5001/v1/ro_crates/{ro_crate}/validation"
+ headers = {
+ "accept": "application/json",
+ "Content-Type": "application/json"
+ }
+
+ # The API expects the JSON to be passed as a string
+ post_payload = {
+ "minio_config": {
+ "endpoint": "minio:9000",
+ "accesskey": "minioadmin",
+ "secret": "minioadmin",
+ "ssl": False,
+ "bucket": "ro-crates"
+ },
+ "profile_name": profile_name
+ }
+ get_payload = {
+ "minio_config": {
+ "endpoint": "minio:9000",
+ "accesskey": "minioadmin",
+ "secret": "minioadmin",
+ "ssl": False,
+ "bucket": "ro-crates"
+ }
+ }
+
+ # POST action and tests
+ response = requests.post(url_post, json=post_payload, headers=headers)
+ response_result = response.json()['message']
+
+ # Print response for debugging
+ print("Status Code:", response.status_code)
+ print("Response JSON:", response_result)
+
+ # Assertions
+ assert response.status_code == 202
+ assert response_result == "Validation in progress"
+
+ # wait for ro-crate to be validated
+ time.sleep(10)
+
+ # GET action and tests
+ response = requests.get(url_get, json=get_payload, headers=headers)
+ response_result = response.json()
+
+ # Print response for debugging
+ print("Status Code:", response.status_code)
+ print("Response JSON:", response_result)
+
+ start_time = time.time()
+ while response.status_code == 400:
+ time.sleep(10)
+ # GET action and tests
+ response = requests.get(url_get, json=get_payload, headers=headers)
+ response_result = response.json()
+ # Print response for debugging
+ print("Status Code:", response.status_code)
+ print("Response JSON:", response_result)
+
+ elapsed = time.time() - start_time
+ if elapsed > 60:
+ print("60 seconds passed. Exiting loop")
+ break
+
+ # Assertions
+ assert response.status_code == 200
+ assert response_result["passed"] is False
+
+
def test_ignore_rocrates_not_on_basepath():
ro_crate = "ro_crate_4"
url_post = f"http://localhost:5001/v1/ro_crates/{ro_crate}/validation"
From 8f3fee0276448ba3244cfe687335cf5bbf46c2b6 Mon Sep 17 00:00:00 2001
From: Douglas Lowe <10961945+douglowe@users.noreply.github.com>
Date: Mon, 9 Feb 2026 13:22:17 +0000
Subject: [PATCH 08/15] full profile directory for crate validator, not extra
profiles path
---
app/tasks/validation_tasks.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/app/tasks/validation_tasks.py b/app/tasks/validation_tasks.py
index 3b178e5..e27c46c 100644
--- a/app/tasks/validation_tasks.py
+++ b/app/tasks/validation_tasks.py
@@ -186,7 +186,7 @@ def perform_ro_crate_validation(
rocrate_uri=full_file_path,
**({"profile_identifier": profile_name} if profile_name else {}),
**({"skip_checks": skip_checks_list} if skip_checks_list else {}),
- **({"extra_profiles_path": profiles_path} if profiles_path else {})
+ **({"profiles_path": profiles_path} if profiles_path else {})
)
return services.validate(settings)
From 6215b5f879fe1c23d1b79908b8a9184f77d59ea0 Mon Sep 17 00:00:00 2001
From: Douglas Lowe <10961945+douglowe@users.noreply.github.com>
Date: Mon, 9 Feb 2026 13:23:17 +0000
Subject: [PATCH 09/15] profiles path (in develop) set for flask not celery
worker
---
docker-compose-develop.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docker-compose-develop.yml b/docker-compose-develop.yml
index e6b6a0e..bffae0f 100644
--- a/docker-compose-develop.yml
+++ b/docker-compose-develop.yml
@@ -16,6 +16,7 @@ services:
- MINIO_ROOT_USER=${MINIO_ROOT_USER}
- MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD}
- MINIO_BUCKET_NAME=${MINIO_BUCKET_NAME}
+ - PROFILES_PATH=/app/profiles
depends_on:
- redis
- minio
@@ -32,7 +33,6 @@ services:
- MINIO_ROOT_USER=${MINIO_ROOT_USER}
- MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD}
- MINIO_BUCKET_NAME=${MINIO_BUCKET_NAME}
- - PROFILES_PATH=/app/profiles
depends_on:
- redis
- minio
From e1a161053b1247b23cd7d2078986768b134a6047 Mon Sep 17 00:00:00 2001
From: Douglas Lowe <10961945+douglowe@users.noreply.github.com>
Date: Mon, 9 Feb 2026 13:30:40 +0000
Subject: [PATCH 10/15] remove extraneous environment variables from dev celery
worker
---
docker-compose-develop.yml | 4 ----
1 file changed, 4 deletions(-)
diff --git a/docker-compose-develop.yml b/docker-compose-develop.yml
index bffae0f..a8d27c2 100644
--- a/docker-compose-develop.yml
+++ b/docker-compose-develop.yml
@@ -29,10 +29,6 @@ services:
environment:
- CELERY_BROKER_URL=redis://redis:6379/0
- CELERY_RESULT_BACKEND=redis://redis:6379/0
- - MINIO_ENDPOINT=${MINIO_ENDPOINT}
- - MINIO_ROOT_USER=${MINIO_ROOT_USER}
- - MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD}
- - MINIO_BUCKET_NAME=${MINIO_BUCKET_NAME}
depends_on:
- redis
- minio
From 35286849972a02a33a2aa2d92c23705f97607ba5 Mon Sep 17 00:00:00 2001
From: Douglas Lowe <10961945+douglowe@users.noreply.github.com>
Date: Mon, 9 Feb 2026 13:31:19 +0000
Subject: [PATCH 11/15] remove extraneous environment variables from main
celery worker
---
docker-compose.yml | 4 ----
1 file changed, 4 deletions(-)
diff --git a/docker-compose.yml b/docker-compose.yml
index 8605c0c..bd47218 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -26,10 +26,6 @@ services:
environment:
- CELERY_BROKER_URL=redis://redis:6379/0
- CELERY_RESULT_BACKEND=redis://redis:6379/0
- - MINIO_ENDPOINT=${MINIO_ENDPOINT}
- - MINIO_ROOT_USER=${MINIO_ROOT_USER}
- - MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD}
- - MINIO_BUCKET_NAME=${MINIO_BUCKET_NAME}
depends_on:
- redis
- minio
From 7ec382411aa55d8cadbc60cf9756e9b553418324 Mon Sep 17 00:00:00 2001
From: Douglas Lowe <10961945+douglowe@users.noreply.github.com>
Date: Mon, 9 Feb 2026 13:38:40 +0000
Subject: [PATCH 12/15] add test profiles
---
.../alpha-crate/1_root_data_entity.ttl | 46 ++
.../alpha-crate/profile.ttl | 55 +++
.../may/11_workflow_execution_phase.ttl | 64 +++
.../five-safes-crate/may/12_check_phase.ttl | 56 +++
.../may/13_validation_phase.ttl | 57 +++
.../may/14_workflow_retrieval_phase.ttl | 102 +++++
.../may/1_responsible_project.ttl | 56 +++
.../five-safes-crate/may/4_sign_off.ttl | 58 +++
.../may/8_disclosure_phase.ttl | 58 +++
.../must/11_workflow_execution_phase.ttl | 64 +++
.../five-safes-crate/must/12_check_phase.ttl | 93 ++++
.../must/13_validation_phase.ttl | 126 ++++++
.../must/14_workflow_retrieval_phase.ttl | 157 +++++++
.../five-safes-crate/must/15_metadata_file.py | 64 +++
.../must/15_metadata_file.ttl | 41 ++
.../must/16_publishing_phase.ttl | 41 ++
.../must/1_requesting_agent.ttl | 82 ++++
.../must/1_responsible_project.ttl | 59 +++
.../must/1_root_data_entity.ttl | 46 ++
.../must/2_requesting_agent.ttl | 63 +++
.../must/3_timestamp_format.ttl | 54 +++
.../five-safes-crate/must/4_sign_off.ttl | 89 ++++
.../must/6_workflow_reference.ttl | 80 ++++
.../must/7_requested_workflow_run.ttl | 86 ++++
.../must/8_disclosure_phase.ttl | 100 +++++
.../five-safes-crate/profile.ttl | 83 ++++
.../five-safes-crate/should/10_outputs.ttl | 89 ++++
.../should/11_workflow_execution_phase.ttl | 85 ++++
.../should/12_check_phase.ttl | 190 +++++++++
.../should/13_validation_phase.ttl | 165 +++++++
.../should/14_workflow_retrieval_phase.ttl | 125 ++++++
.../should/1_requesting_agent.ttl | 46 ++
.../should/1_responsible_project.ttl | 60 +++
.../should/2_requesting_agent.ttl | 49 +++
.../five-safes-crate/should/4_sign_off.ttl | 177 ++++++++
.../should/6_workflow_reference.ttl | 49 +++
.../should/7_requested_workflow_run.ttl | 36 ++
.../should/8_disclosure_phase.ttl | 114 +++++
.../five-safes-crate/should/9_inputs.ttl | 58 +++
.../ro-crate/may/4_data_entity_metadata.ttl | 89 ++++
.../ro-crate/may/61_license_entity.ttl | 66 +++
.../ro-crate/must/0_file_descriptor_format.py | 401 ++++++++++++++++++
.../must/1_file-descriptor_metadata.ttl | 100 +++++
.../must/2_root_data_entity_metadata.ttl | 175 ++++++++
.../ro-crate/must/4_data_entity_metadata.py | 75 ++++
.../ro-crate/must/4_data_entity_metadata.ttl | 216 ++++++++++
.../must/5_web_data_entity_metadata.ttl | 50 +++
.../ro-crate/must/6_contextual_entity.ttl | 81 ++++
.../ro-crate/ontology.ttl | 67 +++
.../ro-crate/prefixes.ttl | 49 +++
.../ro-crate/profile.ttl | 74 ++++
.../should/2_root_data_entity_metadata.ttl | 74 ++++
.../should/2_root_data_entity_relative_uri.py | 42 ++
.../should/4_data_entity_existence.py | 58 +++
.../should/4_data_entity_metadata.ttl | 69 +++
.../should/5_web_data_entity_metadata.py | 73 ++++
.../should/5_web_data_entity_metadata.ttl | 63 +++
.../should/6_contextual_entity_metadata.ttl | 75 ++++
58 files changed, 5020 insertions(+)
create mode 100644 tests/data/rocrate_validator_profiles/alpha-crate/1_root_data_entity.ttl
create mode 100644 tests/data/rocrate_validator_profiles/alpha-crate/profile.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/may/11_workflow_execution_phase.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/may/12_check_phase.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/may/13_validation_phase.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/may/14_workflow_retrieval_phase.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/may/1_responsible_project.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/may/4_sign_off.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/may/8_disclosure_phase.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/11_workflow_execution_phase.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/12_check_phase.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/13_validation_phase.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/14_workflow_retrieval_phase.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/15_metadata_file.py
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/15_metadata_file.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/16_publishing_phase.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/1_requesting_agent.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/1_responsible_project.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/1_root_data_entity.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/2_requesting_agent.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/3_timestamp_format.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/4_sign_off.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/6_workflow_reference.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/7_requested_workflow_run.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/8_disclosure_phase.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/profile.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/should/10_outputs.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/should/11_workflow_execution_phase.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/should/12_check_phase.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/should/13_validation_phase.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/should/14_workflow_retrieval_phase.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/should/1_requesting_agent.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/should/1_responsible_project.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/should/2_requesting_agent.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/should/4_sign_off.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/should/6_workflow_reference.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/should/7_requested_workflow_run.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/should/8_disclosure_phase.ttl
create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/should/9_inputs.ttl
create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/may/4_data_entity_metadata.ttl
create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/may/61_license_entity.ttl
create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/must/0_file_descriptor_format.py
create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/must/1_file-descriptor_metadata.ttl
create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/must/2_root_data_entity_metadata.ttl
create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/must/4_data_entity_metadata.py
create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/must/4_data_entity_metadata.ttl
create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/must/5_web_data_entity_metadata.ttl
create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/must/6_contextual_entity.ttl
create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/ontology.ttl
create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/prefixes.ttl
create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/profile.ttl
create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/should/2_root_data_entity_metadata.ttl
create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/should/2_root_data_entity_relative_uri.py
create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/should/4_data_entity_existence.py
create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/should/4_data_entity_metadata.ttl
create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/should/5_web_data_entity_metadata.py
create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/should/5_web_data_entity_metadata.ttl
create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/should/6_contextual_entity_metadata.ttl
diff --git a/tests/data/rocrate_validator_profiles/alpha-crate/1_root_data_entity.ttl b/tests/data/rocrate_validator_profiles/alpha-crate/1_root_data_entity.ttl
new file mode 100644
index 0000000..af82b5d
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/alpha-crate/1_root_data_entity.ttl
@@ -0,0 +1,46 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix alpha-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+alpha-crate:RootDataEntityRequiredProperties
+ a sh:NodeShape ;
+ sh:name "RootDataEntity" ;
+ sh:targetClass ro-crate:RootDataEntity ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "sourceOrganization" ;
+ sh:path schema:sourceOrganization;
+ sh:minCount 1 ;
+ sh:severity sh:Violation ;
+ sh:message """The Root Data Entity MUST have a `sourceOrganization` property.""" ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "sourceOrganization" ;
+ sh:path schema:sourceOrganization ;
+ sh:class schema:Project ;
+ sh:severity sh:Violation ;
+ sh:message """The `sourceOrganization` property of the RootDataEntity MUST point to a Project entity.""" ;
+ ] .
diff --git a/tests/data/rocrate_validator_profiles/alpha-crate/profile.ttl b/tests/data/rocrate_validator_profiles/alpha-crate/profile.ttl
new file mode 100644
index 0000000..d33dfef
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/alpha-crate/profile.ttl
@@ -0,0 +1,55 @@
+# Copyright (c) 2024-2025 CRS4, University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix dct: .
+@prefix prof: .
+@prefix role: .
+@prefix rdfs: .
+
+
+ a prof:Profile ;
+
+ # the Profile's label
+ rdfs:label "Alpha RO-Crate 0.1" ;
+
+ # regular metadata, a basic description of the Profile
+ rdfs:comment """Alpha RO-Crate Metadata Specification 0.1"""@en ;
+
+ # URI of the publisher of the Metadata Specification
+ dct:publisher ;
+
+ # This profile is a transitive profile of the RO-Crate Metadata Specification
+ prof:isTransitiveProfileOf ;
+
+ # this profile has a JSON-LD context resource
+ prof:hasResource [
+ a prof:ResourceDescriptor ;
+
+ # it's in JSON-LD format
+ dct:format ;
+
+ # it conforms to JSON-LD, here referred to by its namespace URI as a Profile
+ dct:conformsTo ;
+
+ # this profile resource plays the role of "Vocabulary"
+ # described in this ontology's accompanying Roles vocabulary
+ prof:hasRole role:Vocabulary ;
+
+ # this profile resource's actual file
+ prof:hasArtifact ;
+ ] ;
+
+ # a short code to refer to the Profile with when a URI can't be used
+ prof:hasToken "alpha-crate" ;
+.
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/may/11_workflow_execution_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/may/11_workflow_execution_phase.ttl
new file mode 100644
index 0000000..6c33191
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/may/11_workflow_execution_phase.ttl
@@ -0,0 +1,64 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+five-safes-crate:WorkflowexecutionObjectHasStartTimeIfBegun
+ a sh:NodeShape ;
+ sh:name "WorkflowExecution" ;
+ sh:description (
+ "The workflow execution object MAY have a startTime if actionStatus is "
+ "either ActiveActionStatus, CompletedActionStatus or FailedActionStatus."
+ ) ;
+
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX rdf:
+
+ SELECT ?this
+ WHERE {
+ ?this rdf:type schema:CreateAction ;
+ schema:actionStatus ?status .
+ FILTER(?status IN (
+ "http://schema.org/CompletedActionStatus",
+ "http://schema.org/FailedActionStatus",
+ "http://schema.org/ActiveActionStatus"
+ ))
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "StartTime" ;
+ sh:path schema:startTime ;
+ sh:minCount 1 ;
+ sh:maxCount 1 ;
+ sh:severity sh:Info ;
+ sh:description (
+ "The workflow execution object MAY have a startTime if actionStatus is "
+ "either ActiveActionStatus, CompletedActionStatus or FailedActionStatus."
+ ) ;
+ sh:message "The workflow execution object MAY have a startTime if actionStatus is either ActiveActionStatus, CompletedActionStatus or FailedActionStatus." ;
+ ] .
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/may/12_check_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/may/12_check_phase.ttl
new file mode 100644
index 0000000..0e741f6
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/may/12_check_phase.ttl
@@ -0,0 +1,56 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix purl: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+five-safes-crate:CheckValueMayHaveStartTime
+ a sh:NodeShape ;
+ sh:name "CheckValue" ;
+ sh:description "" ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+ SELECT ?this
+ WHERE {
+ ?this schema:additionalType shp:CheckValue ;
+ schema:actionStatus ?status .
+ FILTER(?status IN (
+ "http://schema.org/CompletedActionStatus",
+ "http://schema.org/FailedActionStatus",
+ "http://schema.org/ActiveActionStatus"
+ ))
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "StartTime" ;
+ sh:path schema:startTime ;
+ sh:minCount 1 ;
+ sh:maxCount 1 ;
+ sh:severity sh:Info ;
+ sh:message "`CheckValue` MAY have the `startTime` property." ;
+ ] .
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/may/13_validation_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/may/13_validation_phase.ttl
new file mode 100644
index 0000000..b7adcb3
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/may/13_validation_phase.ttl
@@ -0,0 +1,57 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix purl: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+five-safes-crate:DownloadActionMayHaveStartTimeIfBegun
+ a sh:NodeShape ;
+ sh:name "ValidationCheck" ;
+ sh:description "" ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+ SELECT ?this
+ WHERE {
+ ?this schema:additionalType shp:ValidationCheck ;
+ schema:actionStatus ?status .
+ FILTER(?status IN (
+ "http://schema.org/CompletedActionStatus",
+ "http://schema.org/FailedActionStatus",
+ "http://schema.org/ActiveActionStatus"
+ ))
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "StartTime" ;
+ sh:path schema:startTime ;
+ sh:minCount 1 ;
+ sh:maxCount 1 ;
+ sh:severity sh:Info ;
+ sh:description "ValidationCheck MAY have the `startTime` property if `actionStatus` is either ActiveActionStatus, CompletedActionStatus or FailedActionStatus." ;
+ sh:message "ValidationCheck MAY have the `startTime` property if `actionStatus` is either ActiveActionStatus, CompletedActionStatus or FailedActionStatus." ;
+ ] .
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/may/14_workflow_retrieval_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/may/14_workflow_retrieval_phase.ttl
new file mode 100644
index 0000000..4386682
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/may/14_workflow_retrieval_phase.ttl
@@ -0,0 +1,102 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix purl: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+five-safes-crate:DownloadedWorkflowSHOULDExistAndBeReferencedByDownloadActionResult
+ a sh:NodeShape ;
+ sh:name "DownloadAction" ;
+ sh:description "Validates that DownloadAction result references an existing entity" ;
+ sh:targetClass schema:DownloadAction ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "Result" ;
+ sh:description "The result property must reference an existing entity in the RO-Crate" ;
+ sh:path schema:result ;
+ sh:minCount 1 ;
+ sh:nodeKind sh:IRI ;
+
+ sh:sparql [
+ a sh:SPARQLConstraint ;
+ sh:select """
+ PREFIX rdf:
+ PREFIX schema:
+
+ SELECT $this $value
+ WHERE {
+ $this schema:result $value .
+
+ # Entity must have BOTH type AND name (proper definition)
+ FILTER NOT EXISTS {
+ $value rdf:type schema:Dataset .
+ }
+ }
+ """ ;
+ sh:severity sh:Info ;
+ sh:message "The entity representing the downloaded workflow is not defined, OR is not referenced by `DownloadAction` --> `result`, OR is not of type `schema:Dataset`." ;
+ ] ;
+ ] .
+
+
+five-safes-crate:DownloadActionMayHaveStartTimeIfBegun
+ a sh:NodeShape ;
+ sh:name "DownloadAction" ;
+ sh:description (
+ "`DownloadAction` MAY have the `startTime` property if `actionStatus` "
+ "is either ActiveActionStatus, CompletedActionStatus or FailedActionStatus."
+ );
+
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX rdf:
+
+ SELECT ?this
+ WHERE {
+ ?this rdf:type schema:DownloadAction ;
+ schema:actionStatus ?status .
+ FILTER(?status IN (
+ "http://schema.org/CompletedActionStatus",
+ "http://schema.org/FailedActionStatus",
+ "http://schema.org/ActiveActionStatus"
+ ))
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "StartTime" ;
+ sh:path schema:startTime ;
+ sh:minCount 1 ;
+ sh:maxCount 1 ;
+ sh:severity sh:Info ;
+ sh:description (
+ "`DownloadAction` MAY have the `startTime` property if `actionStatus` "
+ "is either ActiveActionStatus, CompletedActionStatus or FailedActionStatus."
+ );
+ sh:message "`DownloadAction` MAY have the `startTime` property if `actionStatus` is either ActiveActionStatus, CompletedActionStatus or FailedActionStatus." ;
+ ] .
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/may/1_responsible_project.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/may/1_responsible_project.ttl
new file mode 100644
index 0000000..5dd46fd
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/may/1_responsible_project.ttl
@@ -0,0 +1,56 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+five-safes-crate:ResponsibleProject
+ a sh:NodeShape ;
+ sh:name "Responsible Project" ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:prefixes ro-crate:sparqlPrefixes ;
+ sh:select """
+ SELECT DISTINCT ?this WHERE {
+ ?action a schema:CreateAction ;
+ schema:agent ?agent .
+ ?agent schema:memberOf ?this .
+ }
+ """
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "funding" ;
+ sh:path schema:funding;
+ sh:minCount 1 ;
+ sh:severity sh:Info ;
+ sh:message """The Responsible Project does not have the property `funding`.""" ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "member" ;
+ sh:path schema:member;
+ sh:minCount 1 ;
+ sh:severity sh:Info ;
+ sh:message """The Responsible Project does not have the property `member`.""" ;
+ ] .
\ No newline at end of file
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/may/4_sign_off.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/may/4_sign_off.ttl
new file mode 100644
index 0000000..3890e2b
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/may/4_sign_off.ttl
@@ -0,0 +1,58 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+@prefix shp: .
+
+
+five-safes-crate:SignOffPhaseStartTime
+ a sh:NodeShape ;
+ sh:name "SignOffPhaseStartTime" ;
+
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+ SELECT ?this
+ WHERE {
+ ?this schema:additionalType shp:SignOff ;
+ schema:actionStatus ?status .
+ FILTER(?status IN (
+ "http://schema.org/ActiveActionStatus",
+ "http://schema.org/CompletedActionStatus",
+ "http://schema.org/FailedActionStatus"
+ ))
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "StartTime" ;
+ sh:path schema:startTime ;
+ sh:minCount 1 ;
+ sh:maxCount 1 ;
+ sh:pattern "^[0-9]{4}-[0-9]{2}-[0-9]{2}[Tt][0-9]{2}:[0-9]{2}:[0-9]{2}([.|,][0-9]+)?(Z|z|[+-][0-9]{2}:[0-9]{2})$" ;
+ sh:severity sh:Info ;
+ sh:description "Sign Off object MAY have a startTime property if action is active, completed or failed." ;
+ sh:message "Sign Off object MAY have a startTime property if action is active, completed or failed." ;
+ ] .
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/may/8_disclosure_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/may/8_disclosure_phase.ttl
new file mode 100644
index 0000000..4694319
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/may/8_disclosure_phase.ttl
@@ -0,0 +1,58 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+five-safes-crate:DisclosureObjectHasStartTimeIfBegun
+ a sh:NodeShape ;
+ sh:name "DisclosureCheck" ;
+ sh:description "DisclosureCheck" ;
+
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+
+ SELECT ?this
+ WHERE {
+ ?this schema:additionalType shp:DisclosureCheck ;
+ schema:actionStatus ?status .
+ FILTER(?status IN (
+ "http://schema.org/CompletedActionStatus",
+ "http://schema.org/FailedActionStatus",
+ "http://schema.org/ActiveActionStatus"
+ ))
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "StartTime" ;
+ sh:path schema:startTime ;
+ sh:minCount 1 ;
+ sh:maxCount 1 ;
+ sh:severity sh:Info ;
+ sh:description "`DisclosureCheck` MAY have the `startTime` property if `actionStatus` is either ActiveActionStatus, CompletedActionStatus or FailedActionStatus." ;
+ sh:message "`DisclosureCheck` MAY have the `startTime` property if `actionStatus` is either ActiveActionStatus, CompletedActionStatus or FailedActionStatus." ;
+ ] .
\ No newline at end of file
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/11_workflow_execution_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/11_workflow_execution_phase.ttl
new file mode 100644
index 0000000..2f723c9
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/11_workflow_execution_phase.ttl
@@ -0,0 +1,64 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix purl: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+five-safes-crate:WorkflowMustHaveDescriptiveName
+ a sh:NodeShape ;
+ sh:name "WorkflowExecution" ;
+ sh:targetClass schema:CreateAction ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "name" ;
+ sh:minCount 1 ;
+ sh:description "Workflow (CreateAction) MUST have a name string of at least 10 characters." ;
+ sh:path schema:name ;
+ sh:datatype xsd:string ;
+ sh:minLength 10 ;
+ sh:severity sh:Violation ;
+ sh:message "Workflow (CreateAction) MUST have a name string of at least 10 characters." ;
+ ] .
+
+
+
+
+five-safes-crate:WorkflowMustHaveActionStatusWithAllowedValues
+ a sh:NodeShape ;
+ sh:name "WorkflowExecution" ;
+ sh:targetClass schema:CreateAction ;
+ sh:property [
+ a sh:PropertyShape ;
+ sh:minCount 1 ;
+ sh:name "actionStatus" ;
+ sh:description "WorkflowExecution MUST have an actionStatus with an allowed value (see https://schema.org/ActionStatusType)." ;
+ sh:path schema:actionStatus ;
+ sh:in (
+ "http://schema.org/PotentialActionStatus"
+ "http://schema.org/ActiveActionStatus"
+ "http://schema.org/CompletedActionStatus"
+ "http://schema.org/FailedActionStatus"
+ ) ;
+ sh:severity sh:Violation ;
+ sh:message "WorkflowExecution MUST have an actionStatus with an allowed value (see https://schema.org/ActionStatusType)." ;
+ ] .
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/12_check_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/12_check_phase.ttl
new file mode 100644
index 0000000..35b5dd1
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/12_check_phase.ttl
@@ -0,0 +1,93 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix purl: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+five-safes-crate:CheckValueObjectHasDescriptiveNameAndIsAssessAction
+ a sh:NodeShape ;
+ sh:name "CheckValue" ;
+ sh:description "" ;
+
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+
+ SELECT ?this
+ WHERE {
+ ?this schema:additionalType shp:CheckValue .
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ sh:path rdf:type ;
+ sh:minCount 1 ;
+ sh:hasValue schema:AssessAction;
+ sh:severity sh:Violation ;
+ sh:message "CheckValue MUST be a `schema:AssessAction`." ;
+ ] ;
+
+ sh:property [
+ sh:a sh:PropertyShape ;
+ sh:name "name" ;
+ sh:description "CheckValue MUST have a human readable name string." ;
+ sh:path schema:name ;
+ sh:datatype xsd:string ;
+ sh:severity sh:Violation ;
+ sh:message "CheckValue MUST have a human readable name string." ;
+ ] .
+
+five-safes-crate:CheckValueActionStatusMustHaveAllowedValues
+ a sh:NodeShape ;
+ sh:name "CheckValue" ;
+ sh:description "" ;
+
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+
+ SELECT ?this
+ WHERE {
+ ?this schema:additionalType shp:CheckValue ;
+ schema:actionStatus ?status .
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "ActionStatus" ;
+ sh:path schema:actionStatus ;
+ sh:in (
+ "http://schema.org/PotentialActionStatus"
+ "http://schema.org/ActiveActionStatus"
+ "http://schema.org/CompletedActionStatus"
+ "http://schema.org/FailedActionStatus"
+ ) ;
+ sh:severity sh:Violation ;
+ sh:message "`CheckValue` --> `actionStatus` MUST have one of the allowed values." ;
+ ] .
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/13_validation_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/13_validation_phase.ttl
new file mode 100644
index 0000000..8ce6be4
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/13_validation_phase.ttl
@@ -0,0 +1,126 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix purl: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+five-safes-crate:ValidationCheckObjectHasDescriptiveNameAndIsAssessAction
+ a sh:NodeShape ;
+ sh:name "ValidationCheck" ;
+ sh:description "" ;
+
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+
+ SELECT ?this
+ WHERE {
+ ?this schema:additionalType shp:ValidationCheck .
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ sh:path rdf:type ;
+ sh:minCount 1 ;
+ sh:hasValue schema:AssessAction;
+ sh:severity sh:Violation ;
+ sh:message "ValidationCheck MUST be a `schema:AssessAction`." ;
+ ] ;
+
+ sh:property [
+ sh:a sh:PropertyShape ;
+ sh:name "name" ;
+ sh:description "ValidationCheck MUST have a human readable name string." ;
+ sh:path schema:name ;
+ sh:datatype xsd:string ;
+ sh:severity sh:Violation ;
+ sh:message "ValidationCheck MUST have a human readable name string." ;
+ ] .
+
+
+five-safes-crate:ValidationCheckActionStatusMustHaveAllowedValue
+ a sh:NodeShape ;
+ sh:name "ValidationCheck" ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+
+ SELECT ?this
+ WHERE {
+ ?this schema:additionalType shp:ValidationCheck ;
+ schema:actionStatus ?status .
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "actionStatus" ;
+ sh:description "The `actionStatus` of ValidationCheck MUST have an allowed value (see https://schema.org/ActionStatusType)." ;
+ sh:path schema:actionStatus ;
+ sh:in (
+ "http://schema.org/PotentialActionStatus"
+ "http://schema.org/ActiveActionStatus"
+ "http://schema.org/CompletedActionStatus"
+ "http://schema.org/FailedActionStatus"
+ ) ;
+ sh:severity sh:Violation ;
+ sh:message "The `actionStatus` of ValidationCheck MUST have an allowed value (see https://schema.org/ActionStatusType)." ;
+ ] .
+
+
+five-safes-crate:ValidationCheckActionStatusMustHaveAllowedValue
+ a sh:NodeShape ;
+ sh:name "ValidationCheck" ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+
+ SELECT ?this
+ WHERE {
+ ?this schema:additionalType shp:ValidationCheck .
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:minCount 1 ;
+ sh:name "actionStatus" ;
+ sh:description "actionStatus MUST be either PotentialActionStatus, ActiveActionStatus, CompletedActionStatus, or FailedActionStatus." ;
+ sh:path schema:actionStatus ;
+ sh:in (
+ "http://schema.org/PotentialActionStatus"
+ "http://schema.org/ActiveActionStatus"
+ "http://schema.org/CompletedActionStatus"
+ "http://schema.org/FailedActionStatus"
+ ) ;
+ sh:severity sh:Violation ;
+ sh:message "actionStatus MUST be either PotentialActionStatus, ActiveActionStatus, CompletedActionStatus, or FailedActionStatus." ;
+ ] .
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/14_workflow_retrieval_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/14_workflow_retrieval_phase.ttl
new file mode 100644
index 0000000..a1108ad
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/14_workflow_retrieval_phase.ttl
@@ -0,0 +1,157 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix purl: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+five-safes-crate:DownloadActionObjectMUSTHavesDescriptiveName
+ a sh:NodeShape ;
+ sh:name "DownloadAction" ;
+ sh:targetClass schema:DownloadAction ;
+ sh:description "" ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "name" ;
+ sh:description "DownloadAction MUST have a human readable name string." ;
+ sh:path schema:name ;
+ sh:minCount 1 ;
+ sh:maxCount 1 ;
+ sh:datatype xsd:string ;
+ sh:severity sh:Violation ;
+ sh:message "DownloadAction MUST have a human readable name string." ;
+ ] .
+
+
+
+five-safes-crate:WorkflowSameAsAndRootDataEntityMainEntityMUSTBeTheSame
+ a sh:NodeShape ;
+ sh:name "Downloaded Workflow" ;
+ sh:description "" ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX rdf:
+
+ SELECT ?this
+ WHERE {
+ ?this rdf:type schema:Dataset .
+ ?s rdf:type schema:DownloadAction ;
+ schema:result ?this .
+ }
+ """ ;
+ ];
+
+ sh:sparql [
+ a sh:SPARQLConstraint ;
+ sh:select """
+ PREFIX schema:
+ PREFIX rdf:
+
+ SELECT $this
+ WHERE {
+ FILTER NOT EXISTS {
+ $this schema:sameAs ?o .
+ ?s schema:mainEntity ?o .
+ # ?o rdf:type schema:Dataset .
+ }
+ }
+ """ ;
+ sh:severity sh:Violation ;
+ sh:description "The property `sameAs` of the entity representing the downloaded workflow MUST point to the same entity as `RootDataEntity` --> `mainEntity`." ;
+ sh:message "The property `sameAs` of the entity representing the downloaded workflow MUST point to the same entity as `RootDataEntity` --> `mainEntity`." ;
+ ] .
+
+
+five-safes-crate:DownloadedWorkflowDistributionAndDownloadActionObjectMUSTBeTheSame
+ a sh:NodeShape ;
+ sh:name "Downloaded Workflow" ;
+ sh:description "" ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX rdf:
+
+ SELECT ?this
+ WHERE {
+ ?this rdf:type schema:Dataset .
+ ?s rdf:type schema:DownloadAction ;
+ schema:result ?this .
+ }
+ """ ;
+ ];
+
+ sh:sparql [
+ a sh:SPARQLConstraint ;
+ sh:name "" ;
+ sh:select """
+ PREFIX schema:
+ PREFIX rdf:
+
+ SELECT $this
+ WHERE {
+ ?action rdf:type schema:DownloadAction .
+ FILTER NOT EXISTS {
+ $this schema:distribution ?url .
+ ?action schema:object ?url .
+ }
+ }
+ """ ;
+ sh:severity sh:Violation ;
+ sh:message "DownloadedWorkflow --> `distribution` MUST reference the same entity as `DownloadAction` --> `object`." ;
+ ] .
+
+
+five-safes-crate:DownloadActionActionStatusMUSTHaveAllowedValues
+ a sh:NodeShape ;
+ sh:name "DownloadAction" ;
+ sh:description "" ;
+
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX rdf:
+
+ SELECT ?this
+ WHERE {
+ ?this rdf:type schema:DownloadAction ;
+ schema:actionStatus ?status .
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "ActionStatus" ;
+ sh:path schema:actionStatus ;
+ sh:in (
+ "http://schema.org/PotentialActionStatus"
+ "http://schema.org/ActiveActionStatus"
+ "http://schema.org/CompletedActionStatus"
+ "http://schema.org/FailedActionStatus"
+ ) ;
+ sh:severity sh:Violation ;
+ sh:message "The value of actionStatus MUST be one of the allowed values: PotentialActionStatus; ActiveActionStatus; CompletedActionStatus; FailedActionStatus." ;
+ ] .
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/15_metadata_file.py b/tests/data/rocrate_validator_profiles/five-safes-crate/must/15_metadata_file.py
new file mode 100644
index 0000000..b589d33
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/15_metadata_file.py
@@ -0,0 +1,64 @@
+# Copyright (c) 2024-2025 CRS4
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+
+import rocrate_validator.utils.log as logging
+from rocrate_validator.models import Severity, ValidationContext
+from rocrate_validator.requirements.python import PyFunctionCheck, check, requirement
+
+# set up logging
+logger = logging.getLogger(__name__)
+
+
+@requirement(name="RO-Crate context version")
+class FileDescriptorContextVersion(PyFunctionCheck):
+ """The RO-Crate metadata file MUST include the RO-Crate context version 1.2
+ (or later minor version) in `@context`"""
+
+ @check(name="RO-Crate context version", severity=Severity.REQUIRED)
+ def test_existence(self, context: ValidationContext) -> bool:
+ """
+ The RO-Crate metadata file MUST include the RO-Crate context version 1.2
+ (or later minor version) in `@context`
+ """
+ try:
+ json_dict = context.ro_crate.metadata.as_dict()
+ context_value = json_dict["@context"]
+ pattern = re.compile(
+ r"https://w3id\.org/ro/crate/1\.[2-9](-DRAFT)?/context"
+ )
+ passed = True
+ if isinstance(context_value, list):
+ if not any(
+ pattern.match(item)
+ for item in context_value
+ if isinstance(item, str)
+ ):
+ passed = False
+ else:
+ if not pattern.match(context_value):
+ passed = False
+ if not passed:
+ context.result.add_issue(
+ "The RO-Crate metadata file MUST include the RO-Crate context "
+ "version 1.2 (or later minor version) in `@context`",
+ self,
+ )
+ return passed
+
+ except Exception as e:
+ if logger.isEnabledFor(logging.DEBUG):
+ logger.exception(e)
+ return True
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/15_metadata_file.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/15_metadata_file.ttl
new file mode 100644
index 0000000..14d939f
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/15_metadata_file.ttl
@@ -0,0 +1,41 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix purl: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+@prefix dct: .
+
+five-safes-crate:MetadataFileDescriptorProperties a sh:NodeShape ;
+ sh:name "RO-Crate conforms to 1.2 or later minor version" ;
+ sh:description """The RO-Crate metadata file descriptor MUST have a `conformsTo` property with RO-Crate specification version 1.2 or later minor version""";
+ sh:targetClass ro-crate:ROCrateMetadataFileDescriptor ;
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "RO-Crate conforms to 1.2 or later minor version" ;
+ sh:description "The RO-Crate metadata file descriptor MUST have a `conformsTo` property with RO-Crate specification version 1.2 or later minor version" ;
+ sh:minCount 1 ;
+ sh:nodeKind sh:IRI ;
+ sh:path dct:conformsTo ;
+ sh:pattern "https://w3id\\.org/ro/crate/(1\\.[2-9](-DRAFT)?)" ;
+ sh:severity sh:Violation;
+ sh:message "The RO-Crate metadata file descriptor MUST have a `conformsTo` property with RO-Crate specification version 1.2 or later minor version" ;
+ ] .
+
+ro-crate:conformsToROCrateSpec sh:deactivated true .
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/16_publishing_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/16_publishing_phase.ttl
new file mode 100644
index 0000000..5a591a4
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/16_publishing_phase.ttl
@@ -0,0 +1,41 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix purl: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+five-safes-crate:AllAssessActionsMentioned
+ a sh:NodeShape ;
+ sh:name "All AssessActions are mentioned from Root Data Entity" ;
+ sh:description "All AssessAction entities in the crate MUST be referenced from the Root Dataset via `mentions`." ;
+ sh:targetClass schema:AssessAction;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "AssessAction mentions from RDE" ;
+ sh:description "All AssessAction entities in the crate MUST be referenced from the Root Dataset via `mentions`." ;
+ sh:path [ sh:inversePath schema:mentions ] ;
+ sh:node ro-crate:RootDataEntity ;
+ sh:minCount 1 ;
+ sh:severity sh:Violation ;
+ sh:message "All AssessAction entities in the crate MUST be referenced from the Root Dataset via `mentions`." ;
+ ] .
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/1_requesting_agent.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/1_requesting_agent.ttl
new file mode 100644
index 0000000..2cc0d43
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/1_requesting_agent.ttl
@@ -0,0 +1,82 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+five-safes-crate:AgentIsMemberOf
+ a sh:NodeShape ;
+ sh:name "Requesting Agent" ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:prefixes ro-crate:sparqlPrefixes ;
+ sh:select """
+ SELECT DISTINCT ?this WHERE {
+ ?action a schema:CreateAction ;
+ schema:agent ?this .
+ }
+ """
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "memberOf" ;
+ sh:path schema:memberOf;
+ sh:class schema:Project ;
+ sh:severity sh:Violation ;
+ sh:message """The 'memberOf' property of an agent MUST be of type Project.""" ;
+ ] .
+
+
+five-safes-crate:AgentProjectIntersection
+ a sh:NodeShape ;
+ sh:name "Agent Project Intersection" ;
+ sh:description """At least one Project referenced by Agent -> memberOf MUST be included in the set of Projects referenced by RootDataEntity -> sourceOrganization.""" ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:prefixes ro-crate:sparqlPrefixes ;
+ sh:select """
+ SELECT DISTINCT ?this WHERE {
+ ?action a schema:CreateAction ;
+ schema:agent ?this .
+ }
+ """
+ ] ;
+ sh:sparql [
+ a sh:SPARQLConstraint ;
+ sh:name "Agent Project Intersection" ;
+ sh:description """At least one Project referenced by Agent -> memberOf MUST be included in the set of Projects referenced by RootDataEntity -> sourceOrganization.""" ;
+ sh:prefixes ro-crate:sparqlPrefixes ;
+ sh:select """
+ SELECT $this WHERE {
+ FILTER EXISTS {
+ $this schema:memberOf ?anyProject .
+ }
+ FILTER NOT EXISTS {
+ $this schema:memberOf ?commonProject .
+ ?metadata schema:about ?root .
+ ?root schema:sourceOrganization ?commonProject .
+ }
+ }
+ """ ;
+ sh:severity sh:Violation ;
+ sh:message """At least one Project referenced by Agent -> memberOf MUST be included in the set of Projects referenced by RootDataEntity -> sourceOrganization.""" ;
+ ] .
\ No newline at end of file
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/1_responsible_project.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/1_responsible_project.ttl
new file mode 100644
index 0000000..c06c873
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/1_responsible_project.ttl
@@ -0,0 +1,59 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+five-safes-crate:ResponsibleProject
+ a sh:NodeShape ;
+ sh:name "Responsible Project" ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:prefixes ro-crate:sparqlPrefixes ;
+ sh:select """
+ SELECT DISTINCT ?this WHERE {
+ ?action a schema:CreateAction ;
+ schema:agent ?agent .
+ ?agent schema:memberOf ?this .
+ }
+ """
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "funding" ;
+ sh:path schema:funding;
+ sh:class schema:Grant ;
+ sh:severity sh:Violation ;
+ sh:message """The property 'funding' of the Responsible Project MUST be of type schema:Grant.""" ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "member" ;
+ sh:path schema:member;
+ sh:or (
+ [ sh:class schema:Organization ]
+ [ sh:class schema:Person ]
+ ) ;
+ sh:severity sh:Violation ;
+ sh:message """The property 'member' of the Responsible Project MUST be of type schema:Organization or schema:Person.""" ;
+ ] .
\ No newline at end of file
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/1_root_data_entity.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/1_root_data_entity.ttl
new file mode 100644
index 0000000..bb9a514
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/1_root_data_entity.ttl
@@ -0,0 +1,46 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+five-safes-crate:RootDataEntityRequiredProperties
+ a sh:NodeShape ;
+ sh:name "RootDataEntity" ;
+ sh:targetClass ro-crate:RootDataEntity ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "sourceOrganization" ;
+ sh:path schema:sourceOrganization;
+ sh:minCount 1 ;
+ sh:severity sh:Violation ;
+ sh:message """The Root Data Entity MUST have a `sourceOrganization` property.""" ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "sourceOrganization" ;
+ sh:path schema:sourceOrganization ;
+ sh:class schema:Project ;
+ sh:severity sh:Violation ;
+ sh:message """The `sourceOrganization` property of the RootDataEntity MUST point to a Project entity.""" ;
+ ] .
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/2_requesting_agent.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/2_requesting_agent.ttl
new file mode 100644
index 0000000..890bf3a
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/2_requesting_agent.ttl
@@ -0,0 +1,63 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+
+five-safes-crate:CreateActionHasAgent
+ a sh:NodeShape ;
+ sh:name "CreateAction" ;
+ sh:targetClass schema:CreateAction ;
+ sh:description "Checks that a CreateAction has an agent and that each agent is a schema:Person." ;
+
+ # CreateAction entity MUST have an agent (IRI)
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "Has Agent" ;
+ sh:path schema:agent ;
+ sh:minCount 1 ;
+ sh:nodeKind sh:IRI ;
+ sh:severity sh:Violation ;
+ sh:message "CreateAction MUST have at least one schema:agent that is a contextual entity." ;
+ ] ;
+
+ # The agent of a CreateAction entity MUST be a Person
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "Agent is a Person" ;
+ sh:path schema:agent ;
+ sh:nodeKind sh:IRI ;
+ sh:class schema:Person ;
+ sh:severity sh:Violation ;
+ sh:message "Each CreateAction agent MUST be typed as schema:Person." ;
+ ] ;
+
+ # If any agent affiliation exists, it MUST be an Organization (IRI)
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "Affiliation is an Organization" ;
+ sh:path ( schema:agent schema:affiliation ) ;
+ sh:class schema:Organization ;
+ sh:nodeKind sh:IRI ;
+ sh:severity sh:Violation ;
+ sh:message "The affiliation of a CreateAction's agent MUST be a contextual entity with type schema:Organization." ;
+ ] .
\ No newline at end of file
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/3_timestamp_format.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/3_timestamp_format.ttl
new file mode 100644
index 0000000..c4f2ddb
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/3_timestamp_format.ttl
@@ -0,0 +1,54 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix purl: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+# to ensure the entity id will be included in any error message,
+# target all entities which have startTime and/or endTime properties using sh:targetSubjectsOf,
+# then we use sh:property to validate the values of those properties.
+# the properties are listed individually so that the property id appears in any error message too
+five-safes-crate:TimeStampFormat
+ a sh:NodeShape ;
+ sh:name "Timestamp Format" ;
+ sh:description "Timestamps MUST follow the RFC 3339 standard (YYYY-MM-DD'T'hh:mm:ss[.fraction](Z | ±hh:mm))." ;
+ sh:targetSubjectsOf schema:startTime, schema:endTime;
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "End TimeStamp" ;
+ sh:path schema:endTime ;
+ sh:minCount 0 ;
+ sh:pattern "^[0-9]{4}-[0-9]{2}-[0-9]{2}[Tt][0-9]{2}:[0-9]{2}:[0-9]{2}([.|,][0-9]+)?(Z|z|[+-][0-9]{2}:[0-9]{2})$" ;
+ sh:severity sh:Violation ;
+ sh:message "All `startTime` and `endTime` values MUST follow the RFC 3339 standard (YYYY-MM-DD'T'hh:mm:ss[.fraction](Z | ±hh:mm))." ;
+ sh:description "End timestamps MUST follow the RFC 3339 standard." ;
+ ] ;
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "Start TimeStamp" ;
+ sh:path schema:startTime ;
+ sh:minCount 0 ;
+ sh:pattern "^[0-9]{4}-[0-9]{2}-[0-9]{2}[Tt][0-9]{2}:[0-9]{2}:[0-9]{2}([.|,][0-9]+)?(Z|z|[+-][0-9]{2}:[0-9]{2})$" ;
+ sh:severity sh:Violation ;
+ sh:message "All `startTime` and `endTime` values MUST follow the RFC 3339 standard (YYYY-MM-DD'T'hh:mm:ss[.fraction](Z | ±hh:mm))." ;
+ sh:description "Start timestamps MUST follow the RFC 3339 standard." ;
+ ] .
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/4_sign_off.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/4_sign_off.ttl
new file mode 100644
index 0000000..2b61a25
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/4_sign_off.ttl
@@ -0,0 +1,89 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix purl: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+five-safes-crate:SignOffObjectActionAndName
+ a sh:NodeShape ;
+ sh:name "SignOff" ;
+ sh:description "Sign Off phase" ;
+
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+ SELECT ?this
+ WHERE {
+ ?this schema:additionalType shp:SignOff .
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ sh:path schema:name ;
+ sh:datatype xsd:string ;
+ sh:minCount 1 ;
+ sh:severity sh:Violation ;
+ sh:message "Sign Off phase MUST have a human-readable name string." ;
+ ] ;
+
+ sh:property [
+ sh:path rdf:type ;
+ sh:minCount 1 ;
+ sh:hasValue schema:AssessAction;
+ sh:severity sh:Violation ;
+ sh:message "Sign Off phase MUST be a `schema:AssessAction`." ;
+ ] .
+
+five-safes-crate:SignOffObjectHasActionStatus
+ a sh:NodeShape ;
+ sh:name "SignOffStatus" ;
+ sh:description "Sign Off Phase Action Status" ;
+
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+ SELECT ?this
+ WHERE {
+ ?this schema:additionalType shp:SignOff ;
+ schema:actionStatus ?status .
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "actionStatus" ;
+ sh:description "The value of actionStatus MUST be one of the allowed values." ;
+ sh:path schema:actionStatus ;
+ sh:in (
+ "http://schema.org/PotentialActionStatus"
+ "http://schema.org/ActiveActionStatus"
+ "http://schema.org/CompletedActionStatus"
+ "http://schema.org/FailedActionStatus"
+ ) ;
+ sh:severity sh:Violation ;
+ sh:message "The value of actionStatus MUST be one of the allowed values: PotentialActionStatus; ActiveActionStatus; CompletedActionStatus; FailedActionStatus." ;
+ ] .
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/6_workflow_reference.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/6_workflow_reference.ttl
new file mode 100644
index 0000000..6477742
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/6_workflow_reference.ttl
@@ -0,0 +1,80 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix purl: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+
+five-safes-crate:ReferenceToWorkflowCrate
+ a sh:NodeShape ;
+ sh:name "RootDataEntity" ;
+ sh:targetClass ro-crate:RootDataEntity ;
+
+ # RootDataEntity MUST have an mainEntity property
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "mainEntity" ;
+ sh:path schema:mainEntity ;
+ sh:minCount 1 ;
+ sh:maxCount 1 ;
+ sh:nodeKind sh:IRI ;
+ sh:severity sh:Violation ;
+ sh:message "The RootDataEntity MUST have exactly one schema:mainEntity property that is an IRI." ;
+ ] ;
+
+ # The mainEntity of a RootDataEntity MUST be a Dataset
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "mainEntity" ;
+ sh:path schema:mainEntity ;
+ sh:class schema:Dataset ;
+ sh:severity sh:Violation ;
+ sh:message "The mainEntity pointed to by the RootDataEntity MUST be of type schema:Dataset" ;
+ ] .
+
+five-safes-crate:mainEntityHasProperConformsTo
+ a sh:NodeShape ;
+ sh:name "mainEntity" ;
+ sh:description "The mainEntity of the RootDataEntity MUST have a conformsTo property with an IRI starting with https://w3id.org/workflowhub/workflow-ro-crate" ;
+ sh:targetObjectsOf schema:mainEntity ;
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "conformsTo" ;
+ sh:path purl:conformsTo ;
+ sh:minCount 1 ;
+ sh:maxCount 1 ;
+ sh:severity sh:Violation ;
+ sh:message "mainEntity MUST have one and only one `purl:conformsTo` property." ;
+ ] ;
+ sh:sparql [
+ a sh:SPARQLConstraint ;
+ sh:name "conformsTo" ;
+ sh:message "conformsTo IRI must start with https://w3id.org/workflowhub/workflow-ro-crate" ;
+ sh:select """
+ PREFIX schema:
+ PREFIX purl:
+ SELECT $this WHERE {
+ $this purl:conformsTo ?iri .
+ FILTER(!STRSTARTS(STR(?iri), "https://w3id.org/workflowhub/workflow-ro-crate"))
+ }
+ """ ;
+ ] .
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/7_requested_workflow_run.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/7_requested_workflow_run.ttl
new file mode 100644
index 0000000..5679437
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/7_requested_workflow_run.ttl
@@ -0,0 +1,86 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+five-safes-crate:RootDataEntityMentionsCreateAction
+ a sh:NodeShape ;
+ sh:name "RootDataEntity" ;
+ sh:targetClass ro-crate:RootDataEntity ;
+ sh:description "" ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "mentions" ;
+ sh:path schema:mentions;
+ sh:qualifiedValueShape [
+ sh:class schema:CreateAction ;
+ ] ;
+ sh:qualifiedMinCount 1 ;
+ sh:severity sh:Violation ;
+ sh:message "`RootDataEntity` MUST reference at least one `CreateAction` through `mentions`" ;
+ ] .
+
+
+five-safes-crate:CreateActionInstrumentAndStatus
+ a sh:NodeShape ;
+ sh:name "CreateAction" ;
+ sh:targetClass schema:CreateAction ;
+ sh:description "" ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "instrument" ;
+ sh:path schema:instrument;
+ sh:minCount 1 ;
+ sh:severity sh:Violation ;
+ sh:message "`CreateAction` MUST have the `schema:instrument` property" ;
+ ] ;
+ sh:sparql [
+ a sh:SPARQLConstraint ;
+ sh:name "instrument" ;
+ sh:prefixes ro-crate:sparqlPrefixes ;
+ sh:select """
+ SELECT $this ?main ?instrument
+ WHERE {
+ ?root schema:mainEntity ?main .
+ $this schema:instrument ?instrument .
+ FILTER (?instrument != ?main)
+ }
+ """ ;
+ sh:severity sh:Violation ;
+ sh:message "`CreateAction` --> `instrument` MUST reference the same entity as `Root Data Entity` --> `mainEntity`" ;
+ ] ;
+ sh:sparql [
+ a sh:SPARQLConstraint ;
+ sh:prefixes ro-crate:sparqlPrefixes ;
+ sh:name "object" ;
+ sh:select """
+ SELECT $this ?object
+ WHERE {
+ $this schema:object ?object .
+ FILTER NOT EXISTS { ?object a ?type . }
+ }
+ """ ;
+ sh:severity sh:Violation ;
+ sh:message "Each `object` in `CreateAction` MUST reference an existing entity." ;
+ ] .
\ No newline at end of file
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/8_disclosure_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/8_disclosure_phase.ttl
new file mode 100644
index 0000000..3c8ceca
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/8_disclosure_phase.ttl
@@ -0,0 +1,100 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix purl: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+five-safes-crate:DisclosureObjectHasDescriptiveNameAndIsAssessAction
+ a sh:NodeShape ;
+ sh:name "DisclosureCheck" ;
+ sh:description "DisclosureCheck" ;
+
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+
+ SELECT ?this
+ WHERE {
+ ?this schema:additionalType shp:DisclosureCheck .
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "AssessAction" ;
+ sh:description "`DisclosureCheck` MUST be a `schema:AssessAction`." ;
+ sh:path rdf:type ;
+ sh:minCount 1 ;
+ sh:hasValue schema:AssessAction;
+ sh:severity sh:Violation ;
+ sh:message "`DisclosureCheck` MUST be a `schema:AssessAction`." ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "name" ;
+ sh:description "`DisclosureCheck` MUST have a name string of at least 10 characters." ;
+ sh:minCount 1 ;
+ sh:path schema:name ;
+ sh:datatype xsd:string ;
+ sh:minLength 10 ;
+ sh:severity sh:Violation ;
+ sh:message "`DisclosureCheck` MUST have a name string of at least 10 characters." ;
+ ] .
+
+
+five-safes-crate:DisclosureObjectHasActionStatusWithAcceptedValue
+ a sh:NodeShape ;
+ sh:name "DisclosureCheck" ;
+ sh:description "`DisclosureCheck` MUST have an actionStatus with an allowed value (see https://schema.org/ActionStatusType)." ;
+
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+
+ SELECT ?this
+ WHERE {
+ ?this schema:additionalType shp:DisclosureCheck ;
+ schema:actionStatus ?status .
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "actionStatus" ;
+ sh:description "`DisclosureCheck` MUST have an actionStatus with an allowed value (see https://schema.org/ActionStatusType)." ;
+ sh:path schema:actionStatus ;
+ sh:in (
+ "http://schema.org/PotentialActionStatus"
+ "http://schema.org/ActiveActionStatus"
+ "http://schema.org/CompletedActionStatus"
+ "http://schema.org/FailedActionStatus"
+ ) ;
+ sh:severity sh:Violation ;
+ sh:message "`DisclosureCheck` MUST have an actionStatus with an allowed value (see https://schema.org/ActionStatusType)." ;
+ ] .
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/profile.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/profile.ttl
new file mode 100644
index 0000000..f144e01
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/profile.ttl
@@ -0,0 +1,83 @@
+# Copyright (c) 2024-2025 CRS4, University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix dct: .
+@prefix prof: .
+@prefix role: .
+@prefix rdfs: .
+
+
+ a prof:Profile ;
+
+ # the Profile's label
+ rdfs:label "Five Safes RO-Crate 0.4" ;
+
+ # regular metadata, a basic description of the Profile
+ rdfs:comment """Five Safes RO-Crate Metadata Specification 0.4"""@en ;
+
+ # URI of the publisher of the Metadata Specification
+ dct:publisher ;
+
+ # TODO: resolve failures when these profiles are applied
+ # This profile is an extension of Workflow Run Crate for use in Trusted Research Environments (TRE)
+ # prof:isProfileOf ;
+
+ # This profile is a transitive profile of the RO-Crate Metadata Specification
+ prof:isTransitiveProfileOf ;
+ # TODO: resolve failures when these profiles are applied
+ # ,
+ # ;
+
+ # this profile has a JSON-LD context resource
+ prof:hasResource [
+ a prof:ResourceDescriptor ;
+
+ # it's in JSON-LD format
+ dct:format ;
+
+ # it conforms to JSON-LD, here referred to by its namespace URI as a Profile
+ dct:conformsTo ;
+
+ # this profile resource plays the role of "Vocabulary"
+ # described in this ontology's accompanying Roles vocabulary
+ prof:hasRole role:Vocabulary ;
+
+ # this profile resource's actual file
+ prof:hasArtifact ;
+ ] ;
+
+ # this profile has a human-readable documentation resource
+ prof:hasResource [
+ a prof:ResourceDescriptor ;
+
+ # it's in HTML format
+ dct:format ;
+
+ # it conforms to HTML, here referred to by its namespace URI as a Profile
+ dct:conformsTo ;
+
+ # this profile resource plays the role of "Specification"
+ # described in this ontology's accompanying Roles vocabulary
+ prof:hasRole role:Specification ;
+
+ # this profile resource's actual file
+ prof:hasArtifact ;
+
+ # this profile is inherited from Workflow Run profile
+ prof:isInheritedFrom ;
+ ] ;
+
+ # a short code to refer to the Profile with when a URI can't be used
+ prof:hasToken "five-safes-crate" ;
+.
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/should/10_outputs.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/should/10_outputs.ttl
new file mode 100644
index 0000000..25f623a
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/should/10_outputs.ttl
@@ -0,0 +1,89 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+
+five-safes-crate:CreateActionHasResultIfActionCompleted
+ a sh:NodeShape ;
+ sh:name "CreateAction" ;
+ sh:description "`CreateAction` with CompletedActionStatus SHOULD have the `schema:result` property." ;
+
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:name "Result" ;
+ sh:description "`CreateAction` with CompletedActionStatus SHOULD have the `schema:result` property." ;
+ sh:prefixes ro-crate:sparqlPrefixes ;
+ sh:select """
+ SELECT ?this WHERE {
+ ?this a schema:CreateAction ;
+ schema:actionStatus "http://schema.org/CompletedActionStatus" .
+ }
+ """
+ ] ;
+
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "Result" ;
+ sh:path schema:result ;
+ sh:minCount 1 ;
+ sh:severity sh:Warning ;
+ sh:message "`CreateAction` with CompletedActionStatus SHOULD have the `schema:result` property." ;
+ ] .
+
+
+five-safes-crate:CreateActionResultOutputsHaveAllowedTypes
+ a sh:NodeShape ;
+ sh:name "Output" ;
+ sh:description "Result SHOULD have a `@type` among an allowed set of values." ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ SELECT ?this
+ WHERE {
+ ?createAction a schema:CreateAction .
+ ?createAction schema:result ?this .
+ }
+ """ ;
+ ] ;
+ sh:message "Result SHOULD have a `@type` among an allowed set of values." ;
+ sh:severity sh:Warning ;
+ sh:or (
+ [
+ sh:class schema:MediaObject;
+ ]
+ [
+ sh:class schema:Dataset;
+ ]
+ [
+ sh:class schema:Collection;
+ ]
+ [
+ sh:class schema:DigitalDocument;
+ ]
+ [
+ sh:class schema:PropertyValue;
+ ]
+ ) .
+
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/should/11_workflow_execution_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/should/11_workflow_execution_phase.ttl
new file mode 100644
index 0000000..0c15231
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/should/11_workflow_execution_phase.ttl
@@ -0,0 +1,85 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix purl: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+
+five-safes-crate:RootDataEntityShouldMentionWorkflow
+ a sh:NodeShape ;
+ sh:name "RootDataEntity" ;
+ sh:description "RootDataEntity SHOULD mention workflow execution object (typed CreateAction)." ;
+ sh:targetClass ro-crate:RootDataEntity ;
+ sh:sparql [
+ a sh:SPARQLConstraint ;
+ sh:name "mentions" ;
+ sh:select """
+ PREFIX schema:
+ PREFIX rdf:
+ SELECT $this
+ WHERE {
+
+ FILTER NOT EXISTS {
+ $this schema:mentions ?workflowExecution .
+ ?workflowExecution rdf:type schema:CreateAction .
+ }
+ }
+ """ ;
+ sh:severity sh:Warning ;
+ sh:message "RootDataEntity SHOULD mention workflow execution object (typed CreateAction)." ;
+ ] .
+
+
+
+five-safes-crate:WorkflowexecutionObjectHasEndTimeIfEnded
+ a sh:NodeShape ;
+ sh:name "WorkflowExecution" ;
+ sh:description "The workflow execution object SHOULD have an endTime property if it has ended." ;
+
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX rdf:
+
+ SELECT ?this
+ WHERE {
+ ?this rdf:type schema:CreateAction ;
+ schema:actionStatus ?status .
+ FILTER(?status IN (
+ "http://schema.org/CompletedActionStatus",
+ "http://schema.org/FailedActionStatus"
+ ))
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "EndTime" ;
+ sh:path schema:endTime ;
+ sh:minCount 1 ;
+ sh:maxCount 1 ;
+ sh:severity sh:Warning ;
+ sh:description "The workflow execution object SHOULD have an endTime property if it has ended." ;
+ sh:message "The workflow execution object SHOULD have an endTime property if it has ended." ;
+ ] .
\ No newline at end of file
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/should/12_check_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/should/12_check_phase.ttl
new file mode 100644
index 0000000..9944d8d
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/should/12_check_phase.ttl
@@ -0,0 +1,190 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix purl: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+five-safes-crate:RootDataEntityShouldMentionCheckValueObject
+ a sh:NodeShape ;
+ sh:name "RootDataEntity" ;
+ sh:targetClass ro-crate:RootDataEntity ;
+ sh:description "" ;
+
+ sh:sparql [
+ a sh:SPARQLConstraint ;
+ sh:name "mentions" ;
+ sh:description "RootDataEntity SHOULD mention a check value object." ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+ SELECT $this
+ WHERE {
+ FILTER NOT EXISTS{
+ $this schema:mentions ?action .
+ ?action schema:additionalType shp:CheckValue .
+ }
+ }
+ """ ;
+ sh:severity sh:Warning ;
+ sh:message "RootDataEntity SHOULD mention a check value object." ;
+ ] .
+
+
+five-safes-crate:CheckValueObjectShouldPointToRootDataEntity
+ a sh:NodeShape ;
+ sh:name "CheckValue" ;
+ sh:description "" ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+ SELECT ?this
+ WHERE {
+ ?this schema:additionalType shp:CheckValue .
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "object" ;
+ sh:path schema:object ;
+ sh:minCount 1 ;
+ sh:class ro-crate:RootDataEntity ;
+ sh:severity sh:Warning ;
+ sh:message "`CheckValue` --> `object` SHOULD point to the root of the RO-Crate" ;
+ ] .
+
+
+five-safes-crate:CheckValueInstrumentShouldPointToEntityTypedDefinedTerm
+ a sh:NodeShape ;
+ sh:name "CheckValue" ;
+ sh:description "" ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+ SELECT ?this
+ WHERE {
+ ?this schema:additionalType shp:CheckValue .
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "instrument" ;
+ sh:path schema:instrument ;
+ sh:minCount 1 ;
+ sh:class schema:DefinedTerm ;
+ sh:severity sh:Warning ;
+ sh:message "`CheckValue` --> `instrument` SHOULD point to an entity typed `schema:DefinedTerm`" ;
+ ] .
+
+
+five-safes-crate:CheckValueAgentShouldIdentifyTheAgentWhoPerformnedTheCheck
+ a sh:NodeShape ;
+ sh:name "CheckValue" ;
+ sh:description "" ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+ SELECT ?this
+ WHERE {
+ ?this schema:additionalType shp:CheckValue .
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "agent" ;
+ sh:path schema:agent ;
+ sh:minCount 1 ;
+ sh:nodeKind sh:IRI ;
+ sh:severity sh:Warning ;
+ sh:message "`CheckValue` --> `agent` SHOULD reference the agent who initiated the check" ;
+ ] .
+
+
+five-safes-crate:CheckValueShouldHaveEndTime
+ a sh:NodeShape ;
+ sh:name "CheckValue" ;
+ sh:description "" ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+ SELECT ?this
+ WHERE {
+ ?this schema:additionalType shp:CheckValue ;
+ schema:actionStatus ?status .
+ FILTER(?status IN (
+ "http://schema.org/CompletedActionStatus",
+ "http://schema.org/FailedActionStatus"
+ ))
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "EndTime" ;
+ sh:path schema:endTime ;
+ sh:minCount 1 ;
+ sh:maxCount 1 ;
+ sh:severity sh:Warning ;
+ sh:message "`CheckValue` SHOULD have the `endTime` property." ;
+ ] .
+
+
+five-safes-crate:CheckValueShouldHaveActionStatus
+ a sh:NodeShape ;
+ sh:name "CheckValue" ;
+ sh:description "" ;
+
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+
+ SELECT ?this
+ WHERE {
+ ?this schema:additionalType shp:CheckValue .
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "ActionStatus" ;
+ sh:path schema:actionStatus ;
+ sh:minCount 1 ;
+ sh:severity sh:Warning ;
+ sh:message "CheckValue SHOULD have actionStatus property." ;
+ ] .
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/should/13_validation_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/should/13_validation_phase.ttl
new file mode 100644
index 0000000..cb2b181
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/should/13_validation_phase.ttl
@@ -0,0 +1,165 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix purl: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+five-safes-crate:RootDataEntityShouldMentionValidationCheckObject
+ a sh:NodeShape ;
+ sh:name "RootDataEntity" ;
+ sh:targetClass ro-crate:RootDataEntity ;
+ sh:description "" ;
+
+ sh:sparql [
+ a sh:SPARQLConstraint ;
+ sh:name "mentions" ;
+ sh:description "RootDataEntity SHOULD mention a ValidationCheck object." ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+ SELECT $this
+ WHERE {
+ FILTER NOT EXISTS{
+ $this schema:mentions ?action .
+ ?action schema:additionalType shp:ValidationCheck .
+ }
+ }
+ """ ;
+ sh:severity sh:Warning ;
+ sh:message "RootDataEntity SHOULD mention a ValidationCheck object." ;
+ ] .
+
+
+five-safes-crate:ValidationCheckObjectShouldPointToRootDataEntity
+ a sh:NodeShape ;
+ sh:name "ValidationCheck" ;
+ sh:description "" ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+ SELECT ?this
+ WHERE {
+ ?this schema:additionalType shp:ValidationCheck .
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "object" ;
+ sh:path schema:object ;
+ sh:minCount 1 ;
+ sh:class ro-crate:RootDataEntity ;
+ sh:severity sh:Warning ;
+ sh:message "`ValidationCheck` --> `object` SHOULD point to the root of the RO-Crate" ;
+ ] .
+
+five-safes-crate:ValidationCheckInstrumentShouldPointToEntityWithSpecificId
+ a sh:NodeShape ;
+ sh:name "ValidationCheck" ;
+ sh:description "" ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+ SELECT ?this
+ WHERE {
+ ?this schema:additionalType shp:ValidationCheck .
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "instrument" ;
+ sh:description "" ;
+ sh:path schema:instrument ;
+ sh:minCount 1 ;
+ sh:nodeKind sh:IRI ;
+ sh:hasValue ;
+ sh:severity sh:Warning ;
+ sh:message "`ValidationCheck` --> `instrument` SHOULD point to an entity with @id https://w3id.org/5s-crate/0.4" ;
+ ] .
+
+
+five-safes-crate:ValidationCheckShouldHaveActionStatus
+ a sh:NodeShape ;
+ sh:name "ValidationCheck" ;
+ sh:description "" ;
+
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+
+ SELECT ?this
+ WHERE {
+ ?this schema:additionalType shp:ValidationCheck .
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "ActionStatus" ;
+ sh:path schema:actionStatus ;
+ sh:minCount 1 ;
+ sh:severity sh:Warning ;
+ sh:message "ValidationCheck SHOULD have actionStatus property." ;
+ ] .
+
+
+five-safes-crate:DownloadActionShouldHaveEndTimeIfBegun
+ a sh:NodeShape ;
+ sh:name "ValidationCheck" ;
+ sh:description "" ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+ SELECT ?this
+ WHERE {
+ ?this schema:additionalType shp:ValidationCheck ;
+ schema:actionStatus ?status .
+ FILTER(?status IN (
+ "http://schema.org/CompletedActionStatus",
+ "http://schema.org/FailedActionStatus"
+ ))
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "EndTime" ;
+ sh:path schema:endTime ;
+ sh:minCount 1 ;
+ sh:maxCount 1 ;
+ sh:severity sh:Warning ;
+ sh:description "ValidationCheck SHOULD have the `endTime` property if `actionStatus` is either CompletedActionStatus or FailedActionStatus." ;
+ sh:message "ValidationCheck SHOULD have the `endTime` property if `actionStatus` is either CompletedActionStatus or FailedActionStatus." ;
+ ] .
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/should/14_workflow_retrieval_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/should/14_workflow_retrieval_phase.ttl
new file mode 100644
index 0000000..f2bd9dd
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/should/14_workflow_retrieval_phase.ttl
@@ -0,0 +1,125 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix purl: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+five-safes-crate:DownloadActionEntitySHOULDExist
+ a sh:NodeShape ;
+ sh:name "RootDataEntity" ;
+ sh:description "" ;
+ sh:targetClass ro-crate:RootDataEntity ;
+
+ sh:sparql [
+ a sh:SPARQLConstraint ;
+ sh:name "" ;
+ sh:select """
+ PREFIX schema:
+ PREFIX rdf:
+
+ SELECT $this
+ WHERE {
+ FILTER NOT EXISTS {
+ ?s rdf:type schema:DownloadAction .
+ }
+ }
+ """ ;
+ sh:severity sh:Warning ;
+ sh:message "An entity typed DownloadAction SHOULD exist." ;
+ ] .
+
+
+
+five-safes-crate:RootDataEntitySHOULDMentionDownloadActionIfPresent
+ a sh:NodeShape ;
+ sh:name "RootDataEntity" ;
+ sh:description "" ;
+ sh:targetClass ro-crate:RootDataEntity ;
+
+ sh:sparql [
+ a sh:SPARQLConstraint ;
+ sh:name "" ;
+ sh:select """
+ PREFIX schema:
+ PREFIX rdf:
+
+ SELECT $this ?da
+ WHERE {
+ ?da rdf:type schema:DownloadAction .
+ FILTER NOT EXISTS {
+ $this schema:mentions ?da .
+ }
+ }
+ """ ;
+ sh:severity sh:Warning ;
+ sh:message "RootDataEntity SHOULD mention DownloadAction if this exists." ;
+ ] .
+
+
+five-safes-crate:DownloadActionShouldHaveEndTimeIfEnded
+ a sh:NodeShape ;
+ sh:name "DownloadAction" ;
+ sh:description "" ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX rdf:
+
+ SELECT ?this
+ WHERE {
+ ?this rdf:type schema:DownloadAction ;
+ schema:actionStatus ?status .
+ FILTER(?status IN (
+ "http://schema.org/CompletedActionStatus",
+ "http://schema.org/FailedActionStatus"
+ ))
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "EndTime" ;
+ sh:path schema:endTime ;
+ sh:minCount 1 ;
+ sh:maxCount 1 ;
+ sh:severity sh:Warning ;
+ sh:message "`DownloadAction` SHOULD have the `endTime` property if `actionStatus` is either CompletedActionStatus or FailedActionStatus." ;
+ ] .
+
+
+five-safes-crate:DownloadActionShouldHaveActionStatus
+ a sh:NodeShape ;
+ sh:name "DownloadAction" ;
+ sh:targetClass schema:DownloadAction ;
+ sh:description "" ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "ActionStatus" ;
+ sh:path schema:actionStatus ;
+ sh:minCount 1 ;
+ sh:severity sh:Warning ;
+ sh:message "`DownloadAction` SHOULD have `actionStatus` property." ;
+ ] .
+
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/should/1_requesting_agent.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/should/1_requesting_agent.ttl
new file mode 100644
index 0000000..332c67d
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/should/1_requesting_agent.ttl
@@ -0,0 +1,46 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+five-safes-crate:AgentIsMemberOf
+ a sh:NodeShape ;
+ sh:name "Requesting Agent" ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:prefixes ro-crate:sparqlPrefixes ;
+ sh:select """
+ SELECT DISTINCT ?this WHERE {
+ ?action a schema:CreateAction ;
+ schema:agent ?this .
+ }
+ """
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "memberOf" ;
+ sh:path schema:memberOf;
+ sh:minCount 1 ;
+ sh:severity sh:Warning ;
+ sh:message """The Requesting Agent SHOULD have a `memberOf` property.""" ;
+ ] .
\ No newline at end of file
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/should/1_responsible_project.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/should/1_responsible_project.ttl
new file mode 100644
index 0000000..28d1c1e
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/should/1_responsible_project.ttl
@@ -0,0 +1,60 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+five-safes-crate:ResponsibleProjectMemberAndSourceOrganizationIntersection
+ a sh:NodeShape ;
+ sh:name "Organizations (members of Responsible Project)" ;
+ sh:description """At least one of the organisations that are members of the responsible project SHOULD be included in the Requesting Agent's affiliations, if such properties exist.""" ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:prefixes ro-crate:sparqlPrefixes ;
+ sh:select """
+ SELECT DISTINCT ?this WHERE {
+ ?action a schema:CreateAction ;
+ schema:agent ?this .
+ ?this a schema:Person ;
+ schema:memberOf ?project ;
+ schema:affiliation ?someAffiliation .
+ ?project schema:member ?org2 .
+ }
+ """
+ ] ;
+ sh:sparql [
+ a sh:SPARQLConstraint ;
+ sh:name "Intersection with agent affiliations" ;
+ sh:description """At least one of the organisations that are members of the responsible project SHOULD be included in the Requesting Agent's affiliations, if such properties exist.""" ;
+
+ sh:prefixes ro-crate:sparqlPrefixes ;
+ sh:select """
+ SELECT $this WHERE {
+ FILTER NOT EXISTS {
+ $this schema:affiliation ?org .
+ $this schema:memberOf ?project .
+ ?project schema:member ?org .
+ }
+ }
+ """ ;
+ sh:severity sh:Warning ;
+ sh:message """At least one of the organisations that are members of the responsible project SHOULD be included in the Requesting Agent's affiliations, if such properties exist.""" ;
+ ] .
\ No newline at end of file
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/should/2_requesting_agent.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/should/2_requesting_agent.ttl
new file mode 100644
index 0000000..c21b3f9
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/should/2_requesting_agent.ttl
@@ -0,0 +1,49 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+# Person who is the agent of a CreateAction SHOULD have an affiliation
+five-safes-crate:PersonAgentHasAffiliation
+ a sh:NodeShape ;
+ sh:name "Agent of CreateAction" ;
+ sh:description "The agent of a CreateAction entity" ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:prefixes ro-crate:sparqlPrefixes ;
+ sh:select """
+ SELECT DISTINCT ?this WHERE {
+ ?action a schema:CreateAction ;
+ schema:agent ?this .
+ }
+ """
+ ] ;
+
+ # The agent of a CreateAction entity SHOULD have an affiliation
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "Presence of affiliations" ;
+ sh:path schema:affiliation ;
+ sh:minCount 1 ;
+ sh:severity sh:Warning ;
+ sh:message "The agent of a CreateAction entity SHOULD have an affiliation" ;
+ ] .
\ No newline at end of file
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/should/4_sign_off.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/should/4_sign_off.ttl
new file mode 100644
index 0000000..129c02d
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/should/4_sign_off.ttl
@@ -0,0 +1,177 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+@prefix shp: .
+
+
+# There SHOULD be a Sign-Off Phase
+five-safes-crate:SignOffPhase
+ a sh:NodeShape ;
+ sh:targetClass ro-crate:RootDataEntity ;
+ sh:description "Check the Sign-Off Phase" ;
+ sh:sparql [
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+ SELECT $this
+ WHERE {
+ FILTER NOT EXISTS {
+ ?action schema:additionalType shp:SignOff .
+ }
+ }
+ """ ;
+ sh:severity sh:Warning ;
+ sh:message "There SHOULD be a Sign-Off Phase in the Final RO-Crate" ;
+ ] ;
+ sh:sparql [
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+ SELECT $this
+ WHERE {
+ ?action schema:additionalType shp:SignOff .
+ FILTER NOT EXISTS {
+ $this schema:mentions ?action .
+ }
+ }
+ """ ;
+ sh:severity sh:Warning ;
+ sh:message "The Root Data Entity SHOULD mention a Sign-Off Phase Object" ;
+ ] .
+
+
+five-safes-crate:SignOffPhaseProperties
+ a sh:NodeShape ;
+ sh:description "Check Sign-Off Phase Properties" ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+ SELECT ?this
+ WHERE {
+ ?this schema:additionalType shp:SignOff .
+ }
+ """
+ ] ;
+ sh:property [
+ sh:description "Check if the Sign Off phase has an actionStatus" ;
+ sh:path schema:actionStatus ;
+ sh:minCount 1 ;
+ sh:severity sh:Warning ;
+ sh:message "The Sign-Off Phase SHOULD have an actionStatus" ;
+ ] ;
+ sh:property [
+ sh:description "Check if the Sign Off phase has an agent" ;
+ sh:path schema:agent ;
+ sh:minCount 1 ;
+ sh:severity sh:Warning ;
+ sh:message "The Sign-Off Phase SHOULD have an agent" ;
+ ] ;
+ sh:property [
+ sh:description "Check if the Sign Off phase has an instrument (TRE Policy)" ;
+ sh:path schema:instrument ;
+ sh:class schema:CreativeWork ;
+ sh:nodeKind sh:IRI;
+ sh:minCount 1 ;
+ sh:severity sh:Warning ;
+ sh:message "The Sign-Off Phase SHOULD have an TRE policy (instrument) with type CreativeWork" ;
+ ] ;
+ sh:property [
+ sh:description "Check if the Sign Off phase has an instrument (TRE Policy)" ;
+ sh:path ( schema:instrument schema:name ) ;
+ sh:datatype xsd:string ;
+ sh:minCount 1 ;
+ sh:severity sh:Warning ;
+ sh:message "The Sign-Off Phase SHOULD have an TRE policy (instrument) with a human-readable name" ;
+ ] ;
+ sh:sparql [
+ a sh:SPARQLConstraint ;
+ sh:description "Check if the Sign Off phase lists the workflow as an object" ;
+ sh:select """
+ PREFIX schema:
+ PREFIX rocrate:
+ SELECT $this
+ WHERE {
+ ?root a schema:Dataset ;
+ schema:mainEntity ?mainEntity ;
+ rdf:type rocrate:RootDataEntity .
+ FILTER NOT EXISTS {
+ $this schema:object ?mainEntity .
+ }
+ }
+ """ ;
+ sh:severity sh:Warning ;
+ sh:message "The Sign-Off Phase SHOULD list the workflow (mainEntity) as an object" ;
+ ];
+ sh:sparql [
+ a sh:SPARQLConstraint ;
+ sh:description "Check if the Sign Off phase lists the Responsible Project as an object" ;
+ sh:select """
+ PREFIX schema:
+ PREFIX rocrate:
+ SELECT $this
+ WHERE {
+ ?root a schema:Dataset ;
+ rdf:type rocrate:RootDataEntity ;
+ schema:sourceOrganization ?sourceOrg .
+ FILTER NOT EXISTS {
+ $this schema:object ?sourceOrg .
+ }
+ }
+ """ ;
+ sh:severity sh:Warning ;
+ sh:message "The Sign-Off Phase SHOULD list the Responsible Project (sourceOrganization) as an object" ;
+ ].
+
+
+five-safes-crate:SignOffPhaseEndTime
+ a sh:NodeShape ;
+ sh:description "Sign Off end time check" ;
+
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+ SELECT ?this
+ WHERE {
+ ?this schema:additionalType shp:SignOff ;
+ schema:actionStatus ?status .
+ FILTER(?status IN (
+ "http://schema.org/CompletedActionStatus",
+ "http://schema.org/FailedActionStatus"
+ ))
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "EndTime" ;
+ sh:path schema:endTime ;
+ sh:minCount 1 ;
+ sh:maxCount 1 ;
+ sh:severity sh:Warning ;
+ sh:description "Sign Off object SHOULD have endTime property if action completed or failed." ;
+ sh:message "Sign Off object SHOULD have endTime property if action completed or failed." ;
+ ] .
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/should/6_workflow_reference.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/should/6_workflow_reference.ttl
new file mode 100644
index 0000000..98c2856
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/should/6_workflow_reference.ttl
@@ -0,0 +1,49 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix purl: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+five-safes-crate:DatasetMustHaveDistributionIfURI
+ a sh:NodeShape ;
+ sh:name "mainEntity" ;
+ sh:targetObjectsOf schema:mainEntity ;
+ sh:description "If mainEntity has an HTTP(S) @id, it SHOULD have a distribution that is an HTTP(S) URL." ;
+
+ sh:sparql [
+ a sh:SPARQLConstraint ;
+ sh:name "distribution" ;
+ sh:description "If mainEntity has an HTTP(S) @id, it SHOULD have a distribution that is an HTTP(S) URL." ;
+ sh:prefixes ro-crate:sparqlPrefixes ;
+ sh:select """
+ SELECT $this
+ WHERE {
+ FILTER (STRSTARTS(STR($this), "http://") || STRSTARTS(STR($this), "https://")) .
+ FILTER NOT EXISTS {
+ $this schema:distribution ?dist .
+ FILTER (STRSTARTS(STR(?dist), "http://") || STRSTARTS(STR(?dist), "https://")) .
+ }
+ }
+ """ ;
+ sh:severity sh:Warning ;
+ sh:message "If mainEntity has an HTTP(S) @id SHOULD have at least one distribution with an HTTP(S) URL." ;
+ ] .
\ No newline at end of file
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/should/7_requested_workflow_run.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/should/7_requested_workflow_run.ttl
new file mode 100644
index 0000000..41af739
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/should/7_requested_workflow_run.ttl
@@ -0,0 +1,36 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+# CreateAction SHOULD have object property with minimum cardinality 1
+five-safes-crate:CreateActionShouldHaveObjectProperty
+ a sh:NodeShape ;
+ sh:targetClass schema:CreateAction ;
+ sh:name "CreateAction" ;
+ sh:property [
+ sh:path schema:object ;
+ sh:minCount 1 ;
+ sh:nodeKind sh:IRI ;
+ sh:severity sh:Warning ;
+ sh:message "`CreateAction` SHOULD have the property `object` with IRI values." ;
+ ] .
\ No newline at end of file
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/should/8_disclosure_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/should/8_disclosure_phase.ttl
new file mode 100644
index 0000000..1d46625
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/should/8_disclosure_phase.ttl
@@ -0,0 +1,114 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix purl: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+five-safes-crate:RootDataEntityShouldMentionDisclosureObject
+ a sh:NodeShape ;
+ sh:name "RootDataEntity" ;
+ sh:targetClass ro-crate:RootDataEntity ;
+ sh:description "RootDataEntity SHOULD mention a disclosure object." ;
+
+ sh:sparql [
+ a sh:SPARQLConstraint ;
+ sh:name "mentions" ;
+ sh:description "`RootDataEntity` SHOULD mention a disclosure object." ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+ SELECT $this
+ WHERE {
+ FILTER NOT EXISTS{
+ $this schema:mentions ?action .
+ ?action a schema:AssessAction ;
+ schema:additionalType shp:DisclosureCheck .
+ }
+ }
+ """ ;
+ sh:severity sh:Warning ;
+ sh:message "`RootDataEntity` SHOULD mention a disclosure object." ;
+ ] .
+
+
+five-safes-crate:DisclosureObjectHasActionStatus
+ a sh:NodeShape ;
+ sh:name "DisclosureCheck" ;
+ sh:description "The `DisclosureCheck` SHOULD have `actionStatus` property." ;
+
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+
+ SELECT ?this
+ WHERE {
+ ?this schema:additionalType shp:DisclosureCheck .
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "ActionStatus" ;
+ sh:description "The `DisclosureCheck` SHOULD have `actionStatus` property." ;
+ sh:path schema:actionStatus ;
+ sh:minCount 1 ;
+ sh:severity sh:Warning ;
+ sh:message "The `DisclosureCheck` SHOULD have `actionStatus` property." ;
+ ] .
+
+
+five-safes-crate:DisclosureObjectHasEndTimeIfcompletedOrFailed
+ a sh:NodeShape ;
+ sh:name "DisclosureCheck" ;
+ sh:description "DisclosureCheck" ;
+
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:select """
+ PREFIX schema:
+ PREFIX shp:
+
+ SELECT ?this
+ WHERE {
+ ?this schema:additionalType shp:DisclosureCheck ;
+ schema:actionStatus ?status .
+ FILTER(?status IN (
+ "http://schema.org/CompletedActionStatus",
+ "http://schema.org/FailedActionStatus"
+ ))
+ }
+ """ ;
+ ] ;
+
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "EndTime" ;
+ sh:path schema:endTime ;
+ sh:minCount 1 ;
+ sh:maxCount 1 ;
+ sh:severity sh:Warning ;
+ sh:description "`DisclosureCheck` SHOULD have the `endTime` property if `actionStatus` is either CompletedActionStatus or FailedActionStatus." ;
+ sh:message "`DisclosureCheck` SHOULD have the `endTime` property if `actionStatus` is either CompletedActionStatus or FailedActionStatus." ;
+ ] .
\ No newline at end of file
diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/should/9_inputs.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/should/9_inputs.ttl
new file mode 100644
index 0000000..5726109
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/five-safes-crate/should/9_inputs.ttl
@@ -0,0 +1,58 @@
+# Copyright (c) 2025 eScience Lab, The University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix five-safes-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix bioschemas: .
+@prefix purl: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+
+five-safes-crate:InputEntityReferencesFormalParameterViaExampleOfWork
+ a sh:NodeShape ;
+ sh:name "Input" ;
+ sh:description "" ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:prefixes ro-crate:sparqlPrefixes ;
+ sh:select """
+ SELECT ?this WHERE {
+ ?action a schema:CreateAction ;
+ schema:object ?this .
+ }
+ """
+ ] ;
+ sh:sparql [
+ a sh:SPARQLConstraint ;
+ sh:name "exampleOfWork" ;
+ sh:description "Input SHOULD reference a FormalParameter using exampleOfWork" ;
+
+ sh:prefixes ro-crate:sparqlPrefixes ;
+ sh:select """
+ SELECT $this WHERE {
+ FILTER NOT EXISTS {
+ $this schema:exampleOfWork ?par .
+ ?par a bioschemas:FormalParameter .
+ }
+ }
+ """ ;
+ sh:severity sh:Warning ;
+ sh:message "Input SHOULD reference a FormalParameter using exampleOfWork" ;
+ ] .
\ No newline at end of file
diff --git a/tests/data/rocrate_validator_profiles/ro-crate/may/4_data_entity_metadata.ttl b/tests/data/rocrate_validator_profiles/ro-crate/may/4_data_entity_metadata.ttl
new file mode 100644
index 0000000..4dfd242
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/ro-crate/may/4_data_entity_metadata.ttl
@@ -0,0 +1,89 @@
+# Copyright (c) 2024-2026 CRS4
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix rdf: .
+@prefix sh: .
+@prefix xsd: .
+@prefix owl: .
+@prefix schema: .
+@prefix validator: .
+
+ro-crate:FileDataEntityWebOptionalProperties a sh:NodeShape ;
+ sh:name "File Data Entity with web presence: OPTIONAL properties" ;
+ sh:description """A File Data Entity which have a corresponding web presence,
+ for instance a landing page that describes the file, including persistence identifiers (e.g. DOI),
+ resolving to an intermediate HTML page instead of the downloadable file directly.
+ These can included for File Data Entities as additional metadata by using the properties:
+ `identifier`, `url`, `subjectOf`and `mainEntityOfPage`""" ;
+ sh:targetClass ro-crate:File ;
+ # Check if the Web-based Data Entity has a contentSize property
+ sh:property [
+ a sh:PropertyShape ;
+ sh:minCount 1 ;
+ sh:name "File Data Entity: optional formal `identifier` (e.g. DOI)" ;
+ sh:description """Check if the File Data Entity has a formal identifier string such as a DOI""" ;
+ sh:path schema:identifier ;
+ sh:datatype xsd:anyURI ;
+ sh:severity sh:Info ;
+ sh:message """The File Data Entity MAY have a formal identifier specified through an `identifier` property""" ;
+ ] ;
+ sh:property [
+ a sh:PropertyShape ;
+ sh:minCount 1 ;
+ sh:name "File Data Entity: optional `url` property" ;
+ sh:description """Check if the File Data Entity has an optional `download` link""" ;
+ sh:path schema:url ;
+ sh:datatype xsd:anyURI ;
+ sh:severity sh:Info ;
+ sh:message """The File Data Entity MAY use a `url` property to denote a `download` link""" ;
+ ] ;
+ sh:property [
+ a sh:PropertyShape ;
+ sh:minCount 1 ;
+ sh:name "File Data Entity: optional `subjectOf` property" ;
+ sh:description """Check if the File Data Entity includes a `subjectOf` property to link `CreativeWork` instances that mention it.""" ;
+ sh:path schema:subjectOf ;
+ sh:class schema:WebPage, schema:CreativeWork ;
+ sh:severity sh:Info ;
+ sh:message """The File Data Entity MAY include a `subjectOf` property to link `CreativeWork` instances that mention it.""" ;
+ ] ;
+ sh:property [
+ a sh:PropertyShape ;
+ sh:minCount 1 ;
+ sh:name "File Data Entity: optional `mainEntityOfPage` property" ;
+ sh:description """Check if the File Data Entity has a `mainEntityOfPage` property""" ;
+ sh:path schema:mainEntityOfPage ;
+ sh:class schema:WebPage, schema:CreativeWork ;
+ sh:severity sh:Info ;
+ sh:message """The File Data Entity MAY have a `mainEntityOfPage` property""" ;
+ ] .
+
+
+ro-crate:DirectoryDataEntityWebOptionalDistribution a sh:NodeShape ;
+ sh:name "Directory Data Entity: OPTIONAL `distribution` property" ;
+ sh:description """A Directory Data Entity MAY have a `distribution` property to denote the distribution of the files within the directory""" ;
+ sh:targetClass ro-crate:File ;
+ # Check if the Web-based Data Entity has a contentSize property
+ sh:property [
+ a sh:PropertyShape ;
+ sh:minCount 1 ;
+ sh:name "Directory Data Entity: optional `distribution` property" ;
+ sh:description """Check if the Directory Data Entity has a `distribution` property""" ;
+ sh:path schema:distribution ;
+ sh:datatype xsd:anyURI ;
+ sh:severity sh:Info ;
+ sh:message """The Directory Data Entity MAY have a `distribution` property to denote the distribution of the files within the directory""" ;
+ ] .
diff --git a/tests/data/rocrate_validator_profiles/ro-crate/may/61_license_entity.ttl b/tests/data/rocrate_validator_profiles/ro-crate/may/61_license_entity.ttl
new file mode 100644
index 0000000..202af89
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/ro-crate/may/61_license_entity.ttl
@@ -0,0 +1,66 @@
+# Copyright (c) 2024-2026 CRS4
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix rdf: .
+@prefix schema_org: .
+@prefix sh: .
+@prefix xsd: .
+
+
+ro-crate:LicenseOptionalAllowedValues a sh:NodeShape ;
+ sh:name "Root Data Entity: optional properties" ;
+ sh:description """Define the optional properties for the Root Data Entity (e.g., license)""" ;
+ sh:targetClass ro-crate:RootDataEntity ;
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "License" ;
+ sh:description """Check if the RO-Crate has a license property with a URI or a textual description""" ;
+ sh:message """MAY have a URI (eg for Creative Commons or Open Source licenses).
+ MAY, if necessary be a textual description of how the RO-Crate may be used.""" ;
+ sh:minCount 1 ;
+ sh:nodeKind sh:IRIOrLiteral ;
+ sh:path schema_org:license ;
+ sh:or (
+ [ sh:dataType xsd:string ]
+ [ sh:dataType xsd:anyURI ]
+ ) ;
+ ].
+
+ro-crate:LicenseDefinition a sh:NodeShape ;
+ sh:name "License definition" ;
+ sh:description """Contextual entity representing a license with a name and description.""";
+ sh:targetClass schema_org:license ;
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "License name" ;
+ sh:description "The license MAY have a name" ;
+ sh:minCount 1 ;
+ sh:maxCount 1 ;
+ sh:nodeKind sh:Literal ;
+ sh:path schema_org:name ;
+ sh:message "Missing license name" ;
+ ] ;
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "License description" ;
+ sh:description """The license MAY have a description""" ;
+ sh:maxCount 1;
+ sh:minCount 1 ;
+ sh:nodeKind sh:Literal ;
+ sh:path schema_org:description ;
+ sh:message "Missing license description" ;
+ ] .
+
diff --git a/tests/data/rocrate_validator_profiles/ro-crate/must/0_file_descriptor_format.py b/tests/data/rocrate_validator_profiles/ro-crate/must/0_file_descriptor_format.py
new file mode 100644
index 0000000..fa02f64
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/ro-crate/must/0_file_descriptor_format.py
@@ -0,0 +1,401 @@
+# Copyright (c) 2024-2026 CRS4
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any
+
+from rocrate_validator.utils import log as logging
+from rocrate_validator.models import ValidationContext
+from rocrate_validator.requirements.python import (PyFunctionCheck, check,
+ requirement)
+from rocrate_validator.utils.http import HttpRequester
+
+# set up logging
+logger = logging.getLogger(__name__)
+
+
+@requirement(name="File Descriptor existence")
+class FileDescriptorExistence(PyFunctionCheck):
+ """The file descriptor MUST be present in the RO-Crate and MUST not be empty."""
+
+ @check(name="File Descriptor Existence")
+ def test_existence(self, context: ValidationContext) -> bool:
+ """
+ Check if the file descriptor is present in the RO-Crate
+ """
+ if context.settings.metadata_only:
+ logger.debug("Skipping file descriptor existence check in metadata-only mode")
+ return True
+ if not context.ro_crate.has_descriptor():
+ message = f'file descriptor "{context.rel_fd_path}" is not present'
+ context.result.add_issue(message, self)
+ return False
+ return True
+
+ @check(name="File Descriptor size check")
+ def test_size(self, context: ValidationContext) -> bool:
+ """
+ Check if the file descriptor is not empty
+ """
+ if context.settings.metadata_only:
+ logger.debug("Skipping file descriptor existence check in metadata-only mode")
+ return True
+ if not context.ro_crate.has_descriptor():
+ message = f'file descriptor {context.rel_fd_path} is empty'
+ context.result.add_issue(message, self)
+ return False
+ if context.ro_crate.metadata.size == 0:
+ context.result.add_issue(f'RO-Crate "{context.rel_fd_path}" file descriptor is empty', self)
+ return False
+ return True
+
+
+@requirement(name="File Descriptor JSON format")
+class FileDescriptorJsonFormat(PyFunctionCheck):
+ """
+ The file descriptor MUST be a valid JSON file
+ """
+ @check(name="File Descriptor JSON format")
+ def check(self, context: ValidationContext) -> bool:
+ """ Check if the file descriptor is in the correct format"""
+ try:
+ logger.debug("Checking validity of JSON file at %s", context.ro_crate.metadata)
+ context.ro_crate.metadata.as_dict()
+ return True
+ except Exception as e:
+ context.result.add_issue(
+ f'RO-Crate file descriptor "{context.rel_fd_path}" is not in the correct format', self)
+ if logger.isEnabledFor(logging.DEBUG):
+ logger.exception(e)
+ return False
+
+
+@requirement(name="File Descriptor JSON-LD format")
+class FileDescriptorJsonLdFormat(PyFunctionCheck):
+ """
+ The file descriptor MUST be a valid JSON-LD file
+ """
+
+ def __check_remote_context__(self, context_uri: str) -> bool:
+ # Try to retrieve the context
+ try:
+ raw_data = HttpRequester().get(context_uri, headers={"Accept": "application/ld+json"})
+ if raw_data.status_code != 200:
+ raise RuntimeError(f"Unable to retrieve the JSON-LD context '{context_uri}'", self)
+ logger.debug(f"Retrieved context from {context_uri}")
+
+ # Try to parse the JSON-LD and access the context
+ jsonLD = raw_data.json()["@context"]
+ assert isinstance(jsonLD, dict)
+ return True
+ except Exception as e:
+ if logger.isEnabledFor(logging.DEBUG):
+ logger.exception(e)
+ return False
+
+ def __check_contexts__(self, context: ValidationContext, jsonld_context: object) -> bool:
+ """ Get the keys of the context URI """
+ is_valid = True
+ # if the context is a string, check if it is a valid URI
+ if isinstance(jsonld_context, str):
+ if not self.__check_remote_context__(jsonld_context):
+ context.result.add_issue(
+ f'Unable to retrieve the JSON-LD context "{jsonld_context}"', self)
+ is_valid = False
+
+ # if the context is a dictionary, get the keys of the dictionary
+ if isinstance(jsonld_context, dict):
+ logger.debug(f"Detected dictionary context: {jsonld_context}")
+
+ # if the context is a list of contexts, get the keys of each context
+ if isinstance(jsonld_context, list):
+ for ctx in jsonld_context:
+ if not self.__check_contexts__(context, ctx):
+ is_valid = False
+ # return if the context is valid
+ return is_valid
+
+ @check(name="File Descriptor @context property validation")
+ def check_context(self, context: ValidationContext) -> bool:
+ """ Check if the file descriptor contains
+ the @context property and it is a valid JSON-LD context
+ """
+ try:
+ json_dict = context.ro_crate.metadata.as_dict()
+ if "@context" not in json_dict:
+ context.result.add_issue(
+ f'RO-Crate file descriptor "{context.rel_fd_path}" '
+ "does not contain a context", self)
+ return False
+
+ # Check if the context is valid
+ return self.__check_contexts__(context, json_dict["@context"])
+ except Exception as e:
+ if logger.isEnabledFor(logging.DEBUG):
+ logger.exception(e)
+ return False
+
+ @check(name="File Descriptor JSON-LD must be flattened")
+ def check_flattened(self, context: ValidationContext) -> bool:
+ """ Check if the file descriptor is flattened """
+
+ def is_entity_flat_recursive(entity: Any, is_first: bool = True, fail_fast: bool = False) -> bool:
+ """ Recursively check if the given data corresponds to a flattened JSON-LD object
+ and returns False if it does not and is not a root element
+ """
+ result = True
+ if isinstance(entity, dict):
+ if is_first:
+ for _, elem in entity.items():
+ if not is_entity_flat_recursive(elem, is_first=False, fail_fast=fail_fast):
+ result = False
+ if fail_fast:
+ return False
+ # if this is not the root element, it must not contain more properties than @id
+ else:
+ if "@id" in entity and "@value" in entity:
+ # add issue if both @id and @value are present
+ context.result.add_issue(
+ (
+ f'entity "{entity.get("@id", entity)}" contains both @id and @value: '
+ 'an object with an @value represents a value object, which is a literal value such as '
+ 'a string, number, date, or language-tagged string. This object is not an identifiable '
+ 'resource, but a simple literal value.'
+ ),
+ self
+ )
+ result = False
+ if fail_fast:
+ return False
+
+ # Handle value objects
+ if "@value" in entity:
+ # Inline the checks from is_value_object and add issues for each violation
+ if not isinstance(entity, dict):
+ context.result.add_issue(
+ f'entity "{entity.get("@id", entity)}" is not a valid value object: '
+ 'it MUST be a dictionary.',
+ self
+ )
+ result = False
+ if fail_fast:
+ return False
+
+ has_language = "@language" in entity
+ has_type = "@type" in entity
+
+ if has_language and has_type:
+ context.result.add_issue(
+ f'entity "{entity.get("@id", entity)}" is not a valid value object: '
+ '@language and @type cannot coexist.',
+ self
+ )
+ result = False
+ if fail_fast:
+ return False
+
+ if has_language and not isinstance(entity["@value"], str):
+ context.result.add_issue(
+ f'entity "{entity.get("@id", entity)}" is not a valid value object: '
+ 'if @language is present, @value must be a string.',
+ self
+ )
+ result = False
+ if fail_fast:
+ return False
+ # Handle node objects:
+ # every remaining entity with len(entity) > 1 must be a node object
+ elif "@id" not in entity or len(entity) > 1:
+ context.result.add_issue(
+ f'entity "{entity.get("@id", entity)}" is not a valid node object reference: '
+ 'it MUST have only @id, but no other properties.',
+ self
+ )
+ result = False
+ if fail_fast:
+ return False
+ if isinstance(entity, list):
+ for element in entity:
+ if not is_entity_flat_recursive(element, is_first=False, fail_fast=fail_fast):
+ result = False
+ if fail_fast:
+ return False
+ return result
+
+ try:
+ fail_fast = context.settings.abort_on_first
+ json_dict = context.ro_crate.metadata.as_dict()
+ result = True
+ for entity in json_dict["@graph"]:
+ if not is_entity_flat_recursive(entity, fail_fast=fail_fast):
+ context.result.add_issue(
+ f'RO-Crate file descriptor "{context.rel_fd_path}" '
+ f'is not fully flattened at entity "{entity.get("@id", entity)}"', self)
+ result = False
+ if fail_fast:
+ return False
+ return result
+ except Exception as e:
+ if logger.isEnabledFor(logging.DEBUG):
+ logger.exception(e)
+ return False
+
+ @check(name="Validation of the @id property of the file descriptor entities")
+ def check_identifiers(self, context: ValidationContext) -> bool:
+ """ Check if the file descriptor entities have the @id property """
+ try:
+ json_dict = context.ro_crate.metadata.as_dict()
+ for entity in json_dict["@graph"]:
+ if "@id" not in entity:
+ context.result.add_issue(
+ f"Entity \"{entity.get('name', None) or entity}\" "
+ f"of RO-Crate \"{context.rel_fd_path}\" "
+ "file descriptor does not contain the @id attribute", self)
+ return False
+ return True
+ except Exception as e:
+ if logger.isEnabledFor(logging.DEBUG):
+ logger.exception(e)
+ return False
+
+ @check(name="Validation of the @type property of the file descriptor entities")
+ def check_types(self, context: ValidationContext) -> bool:
+ """ Check if the file descriptor entities have the @type property """
+ try:
+ json_dict = context.ro_crate.metadata.as_dict()
+ for entity in json_dict["@graph"]:
+ if "@type" not in entity:
+ context.result.add_issue(
+ f"Entity \"{entity.get('name', None) or entity}\" "
+ f"of RO-Crate \"{context.rel_fd_path}\" "
+ "file descriptor does not contain the @type attribute", self)
+ return False
+ return True
+ except Exception as e:
+ if logger.isEnabledFor(logging.DEBUG):
+ logger.exception(e)
+ return False
+
+ def __get_context_keys__(self, context: object) -> set:
+ """ Get the keys of the context URI """
+ if isinstance(context, str):
+ return self.__get_remote_context_keys__(context)
+
+ # if the context is a dictionary, get the keys of the dictionary
+ if isinstance(context, dict):
+ return set(context.keys())
+
+ # if the context is a list of contexts, get the keys of each context
+ if isinstance(context, list):
+ keys = set()
+ for ctx in context:
+ keys.update(self.__get_context_keys__(ctx))
+ return keys
+
+ def __get_remote_context_keys__(self, context_uri: str) -> set:
+ """ Get the keys of the context URI """
+
+ logger.debug(f"Retrieving context from {context_uri}...")
+ # Try to retrieve the context
+ raw_data = HttpRequester().get(context_uri, headers={"Accept": "application/ld+json"})
+ if raw_data.status_code != 200:
+ raise RuntimeError(f"Unable to retrieve the JSON-LD context '{context_uri}'")
+
+ logger.debug(f"Retrieved context from {context_uri}")
+
+ # Get the keys of the context
+ jsonLD = raw_data.json()
+ jsonLD_ctx = jsonLD["@context"]
+ if not isinstance(jsonLD_ctx, dict):
+ raise RuntimeError("The context is not a dictionary", self)
+ return set(jsonLD_ctx.keys())
+
+ def __check_entity_keys__(self, entity: dict,
+ context_keys: set,
+ unexpected_keys: dict[str, int] = None) -> dict[str, int]:
+ """ Check if the entity is in the correct format """
+
+ def add_unexpected_key(k: str, u_keys: dict) -> None:
+ """ Add a key to the unexpected keys dictionary """
+ u_keys[k] = u_keys.get(k, 0) + 1
+
+ # Keys that should be skipped
+ SKIP_KEYS = {"@id", "@type", "@context", "@value", "@language"}
+
+ # Ensure unexpected_keys is initialized
+ if unexpected_keys is None:
+ unexpected_keys = {}
+
+ # If the entity is a dictionary, check each key
+ if isinstance(entity, dict):
+ for k, v in entity.items():
+ if k not in context_keys and k not in SKIP_KEYS:
+ logger.debug(f"Key {k} not in context keys")
+ add_unexpected_key(k, unexpected_keys)
+ if isinstance(v, (dict, list)):
+ self.__check_entity_keys__(v, context_keys, unexpected_keys)
+
+ # If the entity is a list, check each element
+ elif isinstance(entity, list):
+ for elem in entity:
+ self.__check_entity_keys__(elem, context_keys, unexpected_keys)
+
+ return unexpected_keys
+
+ @check(name="Validation of the compaction format of the file descriptor")
+ def check_compaction(self, context: ValidationContext) -> bool:
+ """ Check if the file descriptor is in the **compacted** JSON-LD format """
+ try:
+ logger.debug("Checking compaction format of JSON-LD file at %s", context.ro_crate.metadata)
+ json_dict = context.ro_crate.metadata.as_dict()
+ logger.debug(f"JSONLD keys:{json_dict.keys()}")
+
+ jsonld_context = json_dict.get("@context", None)
+ logger.debug(f"Context: {jsonld_context}")
+
+ try:
+ context_keys = self.__get_context_keys__(jsonld_context)
+ logger.debug(f"{context_keys}")
+ except Exception as e:
+ if logger.isEnabledFor(logging.DEBUG):
+ logger.exception(e)
+ context.result.add_issue(str(e), self)
+ return False
+
+ unexpected_keys = self.__check_entity_keys__(json_dict.get("@graph"), context_keys)
+ logger.debug(f"Unexpected keys: {unexpected_keys}")
+ if len(unexpected_keys) > 0:
+ for k, v in unexpected_keys.items():
+ logger.debug(f"Key {k} appears {v} times")
+ # Add the correct suffix to the message
+ suffix = "s" if v > 1 else ""
+ # Check if k is a term or a URI
+ if k.startswith("http"):
+ context.result.add_issue(
+ f'The The {v} occurrence{suffix} of the "{k}" URI cannot be used as a key{suffix} "'
+ 'because the compacted format requires simple terms as keys '
+ '(see https://www.w3.org/TR/json-ld-api/#compaction for more details).', self)
+ else:
+ context.result.add_issue(
+ f'The {v} occurrence{suffix} of the JSON-LD key "{k}" '
+ f'{"is" if v == 1 else "are"} not allowed in the compacted format '
+ 'because it is not present in the @context of the document', self)
+ return False
+
+ return True
+ except Exception as e:
+ if logger.isEnabledFor(logging.DEBUG):
+ logger.exception(e)
+ context.result.add_issue(
+ f'Unexpected error: {e}', self)
+ return False
diff --git a/tests/data/rocrate_validator_profiles/ro-crate/must/1_file-descriptor_metadata.ttl b/tests/data/rocrate_validator_profiles/ro-crate/must/1_file-descriptor_metadata.ttl
new file mode 100644
index 0000000..32d2a63
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/ro-crate/must/1_file-descriptor_metadata.ttl
@@ -0,0 +1,100 @@
+# Copyright (c) 2024-2026 CRS4
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix dct: .
+@prefix rdf: .
+@prefix schema_org: .
+@prefix sh: .
+@prefix validator: .
+
+
+ro-crate:FindROCrateMetadataFileDescriptorEntity a sh:NodeShape, validator:HiddenShape;
+ sh:name "Identify the RO-Crate Metadata File Descriptor" ;
+ sh:description """The RO-Crate Metadata File Descriptor entity describes the RO-Crate itself, and it is named as `ro-crate-metadata.json`.
+ It can be identified by name according to the RO-Crate specification
+ available at [Finding RO-Crate Root in RDF triple stores](https://www.researchobject.org/ro-crate/1.1/appendix/relative-uris.html#finding-ro-crate-root-in-rdf-triple-stores).""" ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:prefixes ro-crate:sparqlPrefixes ;
+ sh:select """
+ SELECT ?this
+ WHERE {
+ ?this a schema:CreativeWork ;
+ FILTER(contains(str(?this), "ro-crate-metadata.json"))
+ }
+ """
+ ] ;
+
+ # Expand data graph with triples from the file data entity
+ sh:rule [
+ a sh:TripleRule ;
+ sh:subject sh:this ;
+ sh:predicate rdf:type ;
+ sh:object ro-crate:ROCrateMetadataFileDescriptor ;
+ ] .
+
+ro-crate:ROCrateMetadataFileDescriptorExistence
+ a sh:NodeShape ;
+ sh:name "RO-Crate Metadata File Descriptor entity existence" ;
+ sh:description "The RO-Crate JSON-LD MUST contain a Metadata File Descriptor entity named `ro-crate-metadata.json` and typed as `schema:CreativeWork`" ;
+ sh:targetNode ro:ro-crate-metadata.json ;
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "RO-Crate Metadata File Descriptor entity existence" ;
+ sh:description """Check if the RO-Crate Metadata File Descriptor entity exists,
+ i.e., if there exists an entity with @id `ro-crate-metadata.json` and type `schema:CreativeWork`""" ;
+ sh:path rdf:type ;
+ sh:hasValue ro-crate:ROCrateMetadataFileDescriptor ;
+ sh:minCount 1 ;
+ sh:message "The root of the document MUST have an entity with @id `ro-crate-metadata.json`" ;
+ ] .
+
+ro-crate:ROCrateMetadataFileDescriptorRecommendedProperties a sh:NodeShape ;
+ sh:name "RO-Crate Metadata File Descriptor REQUIRED properties" ;
+ sh:description """RO-Crate Metadata Descriptor MUST be defined
+ according with the requirements details defined in
+ [RO-Crate Metadata File Descriptor](https://www.researchobject.org/ro-crate/1.1/root-data-entity.html#ro-crate-metadata-file-descriptor)""";
+ sh:targetNode ro:ro-crate-metadata.json ;
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "Metadata File Descriptor entity type" ;
+ sh:description "Check if the RO-Crate Metadata File Descriptor has `@type` CreativeWork, as per schema.org" ;
+ sh:minCount 1 ;
+ sh:nodeKind sh:IRI ;
+ sh:path rdf:type ;
+ sh:hasValue schema_org:CreativeWork ;
+ sh:message "The RO-Crate metadata file MUST be a CreativeWork, as per schema.org" ;
+ ] ;
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "Metadata File Descriptor entity: `about` property" ;
+ sh:description """Check if the RO-Crate Metadata File Descriptor has an `about` property referencing the Root Data Entity""" ;
+ sh:maxCount 1;
+ sh:minCount 1 ;
+ sh:nodeKind sh:IRI ;
+ sh:path schema_org:about ;
+ sh:class schema_org:Dataset ;
+ sh:message "The RO-Crate metadata file descriptor MUST have an `about` property referencing the Root Data Entity" ;
+ ] ;
+ sh:property ro-crate:conformsToROCrateSpec .
+
+ro-crate:conformsToROCrateSpec sh:name "Metadata File Descriptor entity: `conformsTo` property" ;
+ sh:description """Check if the RO-Crate Metadata File Descriptor has a `conformsTo` property which points to the RO-Crate specification version""" ;
+ sh:minCount 1 ;
+ sh:nodeKind sh:IRI ;
+ sh:path dct:conformsTo ;
+ sh:hasValue ;
+ sh:message "The RO-Crate metadata file descriptor MUST have a `conformsTo` property with the RO-Crate specification version" .
diff --git a/tests/data/rocrate_validator_profiles/ro-crate/must/2_root_data_entity_metadata.ttl b/tests/data/rocrate_validator_profiles/ro-crate/must/2_root_data_entity_metadata.ttl
new file mode 100644
index 0000000..2c2efcf
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/ro-crate/must/2_root_data_entity_metadata.ttl
@@ -0,0 +1,175 @@
+# Copyright (c) 2024-2026 CRS4
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix rdf: .
+@prefix schema_org: .
+@prefix sh: .
+@prefix validator: .
+@prefix xsd: .
+
+
+ro-crate:RootDataEntityType
+ a sh:NodeShape ;
+ sh:name "RO-Crate Root Data Entity type" ;
+ sh:description "The Root Data Entity MUST be a `Dataset` (as per `schema.org`)" ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:prefixes ro-crate:sparqlPrefixes ;
+ sh:select """
+ SELECT ?this
+ WHERE {
+ ?metadatafile schema:about ?this .
+ FILTER(contains(str(?metadatafile), "ro-crate-metadata.json"))
+ }
+ """
+ ] ;
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "Root Data Entity type" ;
+ sh:description "Check if the Root Data Entity is a `Dataset` (as per `schema.org`)" ;
+ sh:path rdf:type ;
+ sh:hasValue schema_org:Dataset ;
+ sh:minCount 1 ;
+ sh:message """The Root Data Entity MUST be a `Dataset` (as per `schema.org`)""" ;
+ ] ;
+ # Validate that if the publisher is specified, it is an Organization or a Person
+ sh:property [
+ sh:path schema_org:publisher ;
+ sh:severity sh:Violation ;
+ sh:name "Root Data Entity: `publisher` property" ;
+ sh:description """Check if the Root Data Entity has a `publisher` property of type `Organization` or `Person`.""" ;
+ sh:or (
+ [ sh:class schema_org:Organization ]
+ [ sh:class schema_org:Person ]
+ ) ;
+ sh:message """The Root Data Entity MUST have a `publisher` property of type `Organization` or `Person`.""" ;
+ ] .
+
+
+ro-crate:FindRootDataEntity a sh:NodeShape, validator:HiddenShape;
+ sh:name "Identify the Root Data Entity of the RO-Crate" ;
+ sh:description """The Root Data Entity is the top-level Data Entity in the RO-Crate and serves as the starting point for the description of the RO-Crate.
+ It is a schema:Dataset and is indirectly identified by the about property of the resource ro-crate-metadata.json in the RO-Crate
+ (see the definition at [Finding RO-Crate Root in RDF triple stores](https://www.researchobject.org/ro-crate/1.1/appendix/relative-uris.html#finding-ro-crate-root-in-rdf-triple-stores)).
+ """ ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:prefixes ro-crate:sparqlPrefixes ;
+ sh:select """
+ SELECT ?this
+ WHERE {
+ ?this a schema:Dataset .
+ ?metadatafile schema:about ?this .
+ FILTER(contains(str(?metadatafile), "ro-crate-metadata.json"))
+ }
+ """
+ ] ;
+
+ # Expand data graph with triples from the file data entity
+ sh:rule [
+ a sh:TripleRule ;
+ sh:subject sh:this ;
+ sh:predicate rdf:type ;
+ sh:object ro-crate:RootDataEntity ;
+ ] .
+
+
+ro-crate:RootDataEntityValueRestriction
+ a sh:NodeShape ;
+ sh:name "RO-Crate Root Data Entity value restriction" ;
+ sh:description "The Root Data Entity MUST end with `/`" ;
+ sh:targetNode ro-crate:RootDataEntity ;
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "Root Data Entity URI value" ;
+ sh:description "Check if the Root Data Entity URI ends with `/`" ;
+ sh:path [ sh:inversePath rdf:type ] ;
+ sh:minCount 1 ;
+ sh:message """The Root Data Entity URI MUST end with `/`""" ;
+ sh:pattern "/$" ;
+ ] .
+
+ro-crate:RootDataEntityRequiredProperties
+ a sh:NodeShape ;
+ sh:name "RO-Crate Root Data Entity REQUIRED properties" ;
+ sh:description "The Root Data Entity MUST have a `name`, `description`, `license` and `datePublished`" ;
+ sh:targetClass ro-crate:RootDataEntity ;
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "Root Data Entity: `name` property" ;
+ sh:description """Check if the Root Data Entity includes a `name` (as specified by schema.org)
+ to clearly identify the dataset and distinguish it from other datasets.""" ;
+ sh:minCount 1 ;
+ sh:nodeKind sh:Literal ;
+ sh:path schema_org:name;
+ sh:message "The Root Data Entity MUST have a `name` property (as specified by schema.org)" ;
+ ] ;
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "Root Data Entity: `description` property" ;
+ sh:description """Check if the Root Data Entity includes a `description` (as specified by schema.org)
+ to provide a human-readable description of the dataset.""" ;
+ sh:minCount 1 ;
+ sh:nodeKind sh:Literal ;
+ sh:path schema_org:description;
+ sh:message "The Root Data Entity MUST have a `description` property (as specified by schema.org)" ;
+ ] ;
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "Root Data Entity: `license` property" ;
+ sh:description """Check if the Root Data Entity includes a `license` property (as specified by schema.org)
+ to provide information about the license of the dataset.""" ;
+ sh:path schema_org:license;
+ sh:minCount 1 ;
+ sh:or (
+ [ sh:class ro-crate:ContextualEntity ]
+ [ sh:dataType xsd:anyURI ]
+ [ sh:datatype xsd:string ]
+ ) ;
+ sh:message """The Root Data Entity MUST have a `license` property (as specified by schema.org).
+ SHOULD link to a Contextual Entity in the RO-Crate Metadata File with a name and description.
+ MAY have a URI (eg for Creative Commons or Open Source licenses).
+ MAY, if necessary be a textual description of how the RO-Crate may be used.""" ;
+ ] ;
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "Root Data Entity: `datePublished` property" ;
+ sh:description """Check if the Root Data Entity includes a `datePublished` (as specified by schema.org)
+ to provide the date when the dataset was published. The datePublished MUST be a valid ISO 8601 date.""" ;
+ sh:minCount 1 ;
+ sh:nodeKind sh:Literal ;
+ sh:path schema_org:datePublished ;
+ sh:pattern "^([\\+-]?\\d{4})((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)?[0-5]\\d)?|24:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$" ;
+ sh:message "The Root Data Entity MUST have a `datePublished` property (as specified by schema.org) with a valid ISO 8601 date" ;
+ ] .
+
+ro-crate:RootDataEntityHasPartValueRestriction
+ a sh:NodeShape ;
+ sh:name "RO-Crate Root Data Entity: `hasPart` value restriction" ;
+ sh:description "The Root Data Entity MUST be linked to the declared `File`, `Directory` and other types of instances through the `hasPart` property" ;
+ sh:targetClass ro-crate:RootDataEntity ;
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "RO-Crate Root Data Entity: `hasPart` value restriction" ;
+ sh:description "Check if the Root Data Entity is linked to the declared `File`, `Directory` and other types of instances through the `hasPart` property" ;
+ sh:path schema_org:hasPart ;
+ sh:or (
+ [ sh:class ro-crate:File ]
+ [ sh:class ro-crate:Directory ]
+ [ sh:class ro-crate:GenericDataEntity ]
+ ) ;
+ sh:message """The Root Data Entity MUST be linked to either File or Directory instances, nothing else""" ;
+ ] .
diff --git a/tests/data/rocrate_validator_profiles/ro-crate/must/4_data_entity_metadata.py b/tests/data/rocrate_validator_profiles/ro-crate/must/4_data_entity_metadata.py
new file mode 100644
index 0000000..1abcbcf
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/ro-crate/must/4_data_entity_metadata.py
@@ -0,0 +1,75 @@
+# Copyright (c) 2024-2026 CRS4
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from rocrate_validator.utils import log as logging
+from rocrate_validator.models import ValidationContext
+from rocrate_validator.requirements.python import (PyFunctionCheck, check,
+ requirement)
+
+# set up logging
+logger = logging.getLogger(__name__)
+
+
+@requirement(name="Data Entity: REQUIRED resource availability")
+class DataEntityRequiredChecker(PyFunctionCheck):
+ """
+ Resources corresponding to local Data Entities MUST be present in the RO-Crate payload
+ """
+
+ @check(name="Data Entity: REQUIRED resource availability")
+ def check_availability(self, context: ValidationContext) -> bool:
+ """
+ Check the presence of the Data Entity in the RO-Crate
+ """
+ # Skip the check in metadata-only mode
+ if context.settings.metadata_only:
+ logger.debug("Skipping file descriptor existence check in metadata-only mode")
+ return True
+ # Perform the check
+ result = True
+ for entity in context.ro_crate.metadata.get_data_entities(exclude_web_data_entities=True):
+ assert entity.id is not None, "Entity has no @id"
+ logger.debug("Ensure the presence of the Data Entity '%s' within the RO-Crate", entity.id)
+ try:
+ logger.debug("Ensure the presence of the Data Entity '%s' within the RO-Crate", entity.id)
+ if entity.has_local_identifier():
+ logger.debug(
+ "Ignoring the Data Entity '%s' as it is a local entity with a local identifier. "
+ "According to the RO-Crate specification, local entities with local identifiers "
+ "are not required to be included in the RO-Crate payload"
+ "(see https://github.com/ResearchObject/ro-crate/issues/400#issuecomment-2779152885 and "
+ "https://github.com/ResearchObject/ro-crate/pull/426 for more details)",
+ entity.id)
+ continue
+ if not entity.has_relative_path():
+ logger.debug(
+ "Ignoring the Data Entity '%s' as it is a local entity with an absolute path. "
+ "According to the RO-Crate specification, local entities with absolute paths "
+ "are not required to be included in the RO-Crate payload. "
+ "It is only recommended that they exist at the time of RO-Crate creation.",
+ entity.id)
+ continue
+ if not entity.is_available():
+ context.result.add_issue(
+ f"The RO-Crate does not include the Data Entity '{entity.id}' as part of its payload", self)
+ result = False
+ except Exception as e:
+ context.result.add_issue(
+ f"Unable to check the the presence of the Data Entity '{entity.id}' within the RO-Crate", self)
+ if logger.isEnabledFor(logging.DEBUG):
+ logger.debug(e, exc_info=True)
+ result = False
+ if not result and context.fail_fast:
+ return result
+ return result
diff --git a/tests/data/rocrate_validator_profiles/ro-crate/must/4_data_entity_metadata.ttl b/tests/data/rocrate_validator_profiles/ro-crate/must/4_data_entity_metadata.ttl
new file mode 100644
index 0000000..a172e1f
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/ro-crate/must/4_data_entity_metadata.ttl
@@ -0,0 +1,216 @@
+# Copyright (c) 2024-2026 CRS4
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix rdf: .
+@prefix schema_org: .
+@prefix sh: .
+@prefix owl: .
+@prefix validator: .
+
+ro-crate:DataEntityRequiredProperties a sh:NodeShape ;
+ sh:name "Data Entity: REQUIRED properties" ;
+ sh:description """A Data Entity MUST be a `URI Path` relative to the ROCrate root,
+ or an absolute URI""" ;
+ sh:targetClass ro-crate:DataEntity ;
+
+ sh:property [
+ sh:name "Data Entity: @id value restriction" ;
+ sh:description """Check if the Data Entity has an absolute or relative URI as `@id`""" ;
+ sh:path [sh:inversePath rdf:type ] ;
+ sh:nodeKind sh:IRI ;
+ sh:severity sh:Violation ;
+ sh:message """Data Entities MUST have an absolute or relative URI as @id.""" ;
+ ] .
+
+ro-crate:FileDataEntity a sh:NodeShape ;
+ sh:name "File Data Entity: REQUIRED properties" ;
+ sh:description """A File Data Entity MUST be a `File`.
+ `File` is an RO-Crate alias for the schema.org `MediaObject`.
+ The term `File` here is liberal, and includes "downloadable" resources where `@id` is an absolute URI.
+ """ ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:prefixes ro-crate:sparqlPrefixes ;
+ sh:select """
+ SELECT ?this
+ WHERE {
+ ?this a schema:MediaObject .
+ ?metadatafile schema:about ?root .
+ FILTER(contains(str(?metadatafile), "ro-crate-metadata.json"))
+ FILTER(!STRSTARTS(STR(?this), CONCAT(STR(?root), "#")))
+ }
+ """
+ ] ;
+
+ sh:property [
+ sh:name "File Data Entity: REQUIRED type" ;
+ sh:description """Check if the File Data Entity has `File` as `@type`.
+ `File` is an RO-Crate alias for the schema.org `MediaObject`.
+ """ ;
+ sh:path rdf:type ;
+ sh:hasValue ro-crate:File ;
+ sh:severity sh:Violation ;
+ sh:message """File Data Entities MUST have "File" as a value for @type.""" ;
+ ] ;
+
+ # Expand data graph with triples from the file data entity
+ sh:rule [
+ a sh:TripleRule ;
+ sh:subject sh:this ;
+ sh:predicate rdf:type ;
+ sh:object ro-crate:DataEntity ;
+ ] .
+
+
+ro-crate:DirectoryDataEntity a sh:NodeShape ;
+ sh:name "Directory Data Entity: REQUIRED properties" ;
+ sh:description """A Directory Data Entity MUST be of @type `Dataset`.
+ The term `directory` here includes HTTP file listings where `@id` is an absolute URI.
+ """ ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:prefixes ro-crate:sparqlPrefixes ;
+ sh:select """
+ SELECT ?this
+ WHERE {
+ ?this a schema:Dataset .
+ ?metadatafile schema:about ?root .
+ # Exclude all dataset entities that ends with `./#`
+ FILTER(contains(str(?metadatafile), "ro-crate-metadata.json"))
+ FILTER(?this != ?root)
+ FILTER(!STRSTARTS(STR(?this), CONCAT(STR(?root), "#")))
+ }
+ """
+ ] ;
+
+ # Decomment for debugging
+ # sh:property [
+ # sh:name "Test Directory" ;
+ # sh:description """Data Entities representing directories MUST have "Directory" as a value for @type.""" ;
+ # sh:path rdf:type ;
+ # sh:hasValue ro-crate:File ;
+ # sh:severity sh:Violation ;
+ # ] ;
+
+ # Expand data graph with triples from the file data entity
+ sh:rule [
+ a sh:TripleRule ;
+ sh:subject sh:this ;
+ sh:predicate rdf:type ;
+ sh:object ro-crate:Directory ;
+ ] ;
+
+ # Expand data graph with triples from the directory data entity
+ sh:rule [
+ a sh:TripleRule ;
+ sh:subject sh:this ;
+ sh:predicate rdf:type ;
+ sh:object ro-crate:DataEntity ;
+ ] ;
+
+ # Ensure that the directory data entity is a dataset
+ sh:property [
+ sh:name "Directory Data Entity: REQUIRED type" ;
+ sh:description """Check if the Directory Data Entity has `Dataset` as `@type`.""" ;
+ sh:path rdf:type ;
+ sh:hasValue schema_org:Dataset ;
+ sh:severity sh:Violation ;
+ ] .
+
+ro-crate:DataEntityRequiredPropertiesShape a sh:NodeShape ;
+ sh:name "Data Entity: REQUIRED properties" ;
+ sh:description """A `DataEntity` MUST be linked, either directly or indirectly, from the Root Data Entity""" ;
+ sh:targetClass ro-crate:DataEntity ;
+ sh:property
+ [
+ a sh:PropertyShape ;
+ sh:path [ sh:inversePath schema_org:hasPart ] ;
+ sh:node schema_org:Dataset ;
+ sh:minCount 1 ;
+ sh:name "Data Entity MUST be directly referenced" ;
+ sh:description """Check if the Data Entity is linked, either directly or indirectly, to the `Root Data Entity` using the `hasPart` (as defined in `schema.org`) property" """ ;
+ # sh:message "A Data Entity MUST be directly or indirectly linked to the `Root Data Entity` through the `hasPart` property" ;
+ ] .
+
+ro-crate:GenericDataEntityRequiredProperties a sh:NodeShape ;
+ sh:name "Generic Data Entity: REQUIRED properties" ;
+ sh:description """A Data Entity other than a File or a Directory MUST be a `DataEntity`""" ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:prefixes ro-crate:sparqlPrefixes ;
+ sh:select """
+ SELECT ?this
+ WHERE {
+ ?root schema:hasPart ?this .
+ ?metadatafile schema:about ?root .
+ FILTER(contains(str(?metadatafile), "ro-crate-metadata.json"))
+ FILTER(?this != ?root)
+ FILTER(?this != ?metadatafile)
+ FILTER NOT EXISTS {
+ ?this a schema:MediaObject .
+ ?this a schema:Dataset .
+ }
+ }
+ """
+ ] ;
+
+ # Expand data graph with triples to mark the matching entities as GenericDataEntity instances
+ sh:rule [
+ a sh:TripleRule ;
+ sh:subject sh:this ;
+ sh:predicate rdf:type ;
+ sh:object ro-crate:GenericDataEntity ;
+ ] ;
+
+ # Expand data graph with triples to mark the matching entities as DataEntity instances
+ sh:rule [
+ a sh:TripleRule ;
+ sh:subject sh:this ;
+ sh:predicate rdf:type ;
+ sh:object ro-crate:DataEntity ;
+ ] .
+
+
+# Uncomment for debugging
+# ro-crate:TestGenericDataEntity a sh:NodeShape ;
+# sh:disabled true ;
+# sh:targetClass ro-crate:GenericDataEntity ;
+# sh:name "Generic Data Entity: test invalid property";
+# sh:description """Check if the GenericDataEntity has the invalidProperty property""" ;
+# sh:property [
+# sh:minCount 1 ;
+# sh:maxCount 1 ;
+# sh:path ro-crate:invalidProperty ;
+# sh:severity sh:Violation ;
+# sh:message "Testing the generic data entity";
+# sh:datatype xsd:string ;
+# sh:message "Testing for the invalidProperty of the generic data entity";
+# ] .
+
+
+# Uncomment for debugging
+# ro:testDirectory a sh:NodeShape ;
+# sh:name "Definition of Test Directory" ;
+# sh:description """A Test Directory is a digital object that is stored in a file format""" ;
+# sh:targetClass ro-crate:Directory ;
+
+# sh:property [
+# sh:name "Test Directory instance" ;
+# sh:description """Check if the Directory DataEntity instance has the fake property ro-crate:foo""" ;
+# sh:path rdf:type ;
+# sh:hasValue ro-crate:foo ;
+# sh:severity sh:Violation ;
+# ] .
diff --git a/tests/data/rocrate_validator_profiles/ro-crate/must/5_web_data_entity_metadata.ttl b/tests/data/rocrate_validator_profiles/ro-crate/must/5_web_data_entity_metadata.ttl
new file mode 100644
index 0000000..c547d60
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/ro-crate/must/5_web_data_entity_metadata.ttl
@@ -0,0 +1,50 @@
+# Copyright (c) 2024-2026 CRS4
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix rdf: .
+@prefix dct: .
+@prefix schema_org: .
+@prefix sh: .
+@prefix owl: .
+@prefix xsd: .
+@prefix validator: .
+
+
+ro-crate:WebBasedDataEntity a sh:NodeShape, validator:HiddenShape ;
+ sh:name "Web-based Data Entity: REQUIRED properties" ;
+ sh:description """A Web-based Data Entity is a `File` identified by an absolute URL""" ;
+
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:prefixes ro-crate:sparqlPrefixes ;
+ sh:select """
+ SELECT ?this
+ WHERE {
+ ?this a schema:MediaObject .
+ FILTER(?this != ro:ro-crate-metadata.json)
+ FILTER regex(str(?this), "^(https?|ftps?)://", "i")
+ }
+ """
+ ] ;
+
+ # Expand data graph with triples which identify the web-based data entity
+ sh:rule [
+ a sh:TripleRule ;
+ sh:subject sh:this ;
+ sh:predicate rdf:type ;
+ sh:object ro-crate:WebDataEntity ;
+ ] .
+
diff --git a/tests/data/rocrate_validator_profiles/ro-crate/must/6_contextual_entity.ttl b/tests/data/rocrate_validator_profiles/ro-crate/must/6_contextual_entity.ttl
new file mode 100644
index 0000000..9ecf87a
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/ro-crate/must/6_contextual_entity.ttl
@@ -0,0 +1,81 @@
+# Copyright (c) 2024-2026 CRS4
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix rdf: .
+@prefix schema: .
+@prefix sh: .
+@prefix xsd: .
+@prefix owl: .
+@prefix validator: .
+
+
+ro-crate:FindLicenseEntity a sh:NodeShape, validator:HiddenShape ;
+ sh:name "Identify License Entity" ;
+ sh:description """Mark a license entity any Data Entity referenced by the `schema:license` property.""" ;
+ sh:target [
+ a sh:SPARQLTarget ;
+ sh:prefixes ro-crate:sparqlPrefixes ;
+ sh:select """
+ SELECT ?this
+ WHERE {
+ ?subject schema:license ?this .
+ }
+ """
+ ] ;
+
+ # Expand data graph with triples from the file data entity
+ sh:rule [
+ a sh:TripleRule ;
+ sh:subject sh:this ;
+ sh:predicate rdf:type ;
+ sh:object ro-crate:ContextualEntity ;
+ ] .
+
+
+ro-crate:WebSiteRecommendedProperties a sh:NodeShape ;
+ sh:name "WebSite RECOMMENDED Properties" ;
+ sh:description """A `WebSite` MUST be identified by a valid IRI and MUST have a `name` property.""" ;
+ sh:targetClass schema:WebSite ;
+ sh:property [
+ sh:path [sh:inversePath rdf:type] ;
+ sh:datType sh:IRI ;
+ sh:name "WebSite: value restriction of its identifier" ;
+ sh:description "Check if the WebSite has a valid IRI" ;
+ sh:message "A WebSite MUST have a valid IRI" ;
+ ] ;
+ sh:property [
+ sh:path schema:name ;
+ sh:minCount 1 ;
+ sh:dataType xsd:string ;
+ sh:name "WebSite: REQUIRED `name` property" ;
+ sh:description "Check if the WebSite has a `name` property" ;
+ sh:message "A WebSite MUST have a `name` property" ;
+ ] .
+
+
+ro-crate:CreativeWorkAuthorDefinition a sh:NodeShape, validator:HiddenShape ;
+ sh:name "CreativeWork Author Definition" ;
+ sh:description """Define the `CreativeWorkAuthor` as the `Person` object of the `schema:author` predicate.""" ;
+ sh:targetObjectsOf schema:author ;
+ sh:rule [
+ a sh:TripleRule ;
+ sh:subject sh:this ;
+ sh:predicate rdf:type ;
+ sh:object ro-crate:CreativeWorkAuthor ;
+ sh:condition [
+ sh:property [ sh:path rdf:type ; sh:hasValue schema:Person ; sh:minCount 1 ] ;
+ ] ;
+ ] .
diff --git a/tests/data/rocrate_validator_profiles/ro-crate/ontology.ttl b/tests/data/rocrate_validator_profiles/ro-crate/ontology.ttl
new file mode 100644
index 0000000..9af3a09
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/ro-crate/ontology.ttl
@@ -0,0 +1,67 @@
+# Copyright (c) 2024-2026 CRS4
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix owl: .
+@prefix rdf: .
+@prefix xml: .
+@prefix xsd: .
+@prefix rdfs: .
+@prefix schema: .
+@prefix rocrate: .
+@prefix bioschemas: .
+@prefix ro-crate: .
+# @base <./.> .
+
+ rdf:type owl:Ontology ;
+ owl:versionIRI .
+
+# # #################################################################
+# # # Classes
+# # #################################################################
+
+# Declare the RootDataEntity class
+ro-crate:RootDataEntity rdf:type owl:Class ;
+ rdfs:subClassOf schema:Dataset ;
+ rdfs:label "RootDataEntity"@en .
+
+### http://schema.org/CreativeWork
+schema:CreativeWork rdf:type owl:Class ;
+ rdfs:label "CreativeWork"@en .
+
+### http://schema.org/MediaObject
+schema:MediaObject rdf:type owl:Class ;
+ owl:equivalentClass ro-crate:File ;
+ rdfs:label "MediaObject"@en .
+
+
+### http://schema.org/SoftwareSourceCode
+schema:SoftwareSourceCode rdf:type owl:Class ;
+ rdfs:subClassOf schema:CreativeWork .
+
+
+### https://bioschemas.org/ComputationalWorkflow
+bioschemas:ComputationalWorkflow rdf:type owl:Class .
+
+
+### https://w3id.org/ro/crate/1.1/DataEntity
+ro-crate:DataEntity rdf:type owl:Class ;
+ rdfs:subClassOf schema:CreativeWork ;
+ rdfs:label "DataEntity"@en .
+
+
+# # ### https://w3id.org/ro/crate/1.1/Directory
+ro-crate:Directory rdf:type owl:Class ;
+ rdfs:subClassOf schema:Dataset ;
+ rdfs:label "Directory"@en .
diff --git a/tests/data/rocrate_validator_profiles/ro-crate/prefixes.ttl b/tests/data/rocrate_validator_profiles/ro-crate/prefixes.ttl
new file mode 100644
index 0000000..79006d5
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/ro-crate/prefixes.ttl
@@ -0,0 +1,49 @@
+# Copyright (c) 2024-2026 CRS4
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix sh: .
+@prefix xsd: .
+@prefix ro-crate: .
+
+# Define the prefixes used in the SPARQL queries
+ro-crate:sparqlPrefixes
+ sh:declare [
+ sh:prefix "schema" ;
+ sh:namespace "http://schema.org/"^^xsd:anyURI ;
+ ] ;
+ sh:declare [
+ sh:prefix "bioschemas" ;
+ sh:namespace "https://bioschemas.org/"^^xsd:anyURI ;
+ ] ;
+ sh:declare [
+ sh:prefix "bioschemas-cw" ;
+ sh:namespace "https://bioschemas.org/ComputationalWorkflow#"^^xsd:anyURI ;
+ ] ;
+ sh:declare [
+ sh:prefix "rocrate" ;
+ sh:namespace "https://w3id.org/ro/crate/1.1/"^^xsd:anyURI ;
+ ] ;
+ sh:declare [
+ sh:prefix "wfrun" ;
+ sh:namespace "https://w3id.org/ro/terms/workflow-run#"^^xsd:anyURI ;
+ ] ;
+ sh:declare [
+ sh:prefix "codemeta" ;
+ sh:namespace "https://codemeta.github.io/terms/"^^xsd:anyURI ;
+ ] ;
+ sh:declare [
+ sh:prefix "ro" ;
+ sh:namespace "./"^^xsd:anyURI ;
+ ] .
diff --git a/tests/data/rocrate_validator_profiles/ro-crate/profile.ttl b/tests/data/rocrate_validator_profiles/ro-crate/profile.ttl
new file mode 100644
index 0000000..47872a9
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/ro-crate/profile.ttl
@@ -0,0 +1,74 @@
+# Copyright (c) 2024-2026 CRS4
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix dct: .
+@prefix prof: .
+@prefix role: .
+@prefix rdfs: .
+
+
+
+ # a Profile; it's identifying URI
+ a prof:Profile ;
+
+ # common metadata for the Profile
+
+ # the Profile's label
+ rdfs:label "RO-Crate Metadata Specification 1.1" ;
+
+ # regular metadata, a basic description of the Profile
+ rdfs:comment """RO-Crate Metadata Specification."""@en ;
+
+ # regular metadata, URI of publisher
+ dct:publisher ;
+
+ # this profile has a JSON-LD context resource
+ prof:hasResource [
+ a prof:ResourceDescriptor ;
+
+ # it's in JSON-LD format
+ dct:format ;
+
+ # it conforms to JSON-LD, here referred to by its namespace URI as a Profile
+ dct:conformsTo ;
+
+ # this profile resource plays the role of "Vocabulary"
+ # described in this ontology's accompanying Roles vocabulary
+ prof:hasRole role:Vocabulary ;
+
+ # this profile resource's actual file
+ prof:hasArtifact ;
+ ] ;
+
+ # this profile has a human-readable documentation resource
+ prof:hasResource [
+ a prof:ResourceDescriptor ;
+
+ # it's in HTML format
+ dct:format ;
+
+ # it conforms to HTML, here referred to by its namespace URI as a Profile
+ dct:conformsTo ;
+
+ # this profile resource plays the role of "Specification"
+ # described in this ontology's accompanying Roles vocabulary
+ prof:hasRole role:Specification ;
+
+ # this profile resource's actual file
+ prof:hasArtifact ;
+ ] ;
+
+ # a short code to refer to the Profile with when a URI can't be used
+ prof:hasToken "ro-crate" ;
+.
diff --git a/tests/data/rocrate_validator_profiles/ro-crate/should/2_root_data_entity_metadata.ttl b/tests/data/rocrate_validator_profiles/ro-crate/should/2_root_data_entity_metadata.ttl
new file mode 100644
index 0000000..7cb53f0
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/ro-crate/should/2_root_data_entity_metadata.ttl
@@ -0,0 +1,74 @@
+# Copyright (c) 2024-2026 CRS4
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix rdf: .
+@prefix schema_org: .
+@prefix sh: .
+@prefix validator: .
+
+ro-crate:RootDataEntityDirectRecommendedProperties a sh:NodeShape ;
+ sh:name "RO-Crate Root Data Entity RECOMMENDED properties" ;
+ sh:description """The Root Data Entity SHOULD have
+ the properties `name`, `description` and `license` defined as described
+ in the RO-Crate specification """;
+ sh:targetClass ro-crate:RootDataEntity ;
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "Root Data Entity: `license` SHOULD link to a Contextual Entity" ;
+ sh:description """Check if the Root Data Entity includes a `license` property
+ that links to a Contextual Entity with type `schema_org:CreativeWork` to describe the license.""" ;
+ sh:nodeKind sh:BlankNodeOrIRI ;
+ sh:class schema_org:CreativeWork ;
+ sh:path schema_org:license;
+ sh:minCount 1 ;
+ sh:message """The Root Data Entity SHOULD have a link to a Contextual Entity representing the schema_org:license type""" ;
+ ] ;
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "Root Data Entity: `author` property" ;
+ sh:description """Check if the Root Data Entity includes a `author` property (as specified by schema.org)
+ to provide information about its author.""" ;
+ sh:or (
+ [ sh:class schema_org:Person ;]
+ [ sh:class schema_org:Organization ;]
+ ) ;
+ sh:path schema_org:author;
+ sh:minCount 1 ;
+ sh:message """The Root Data Entity SHOULD have a link to a Contextual Entity representing the `author` of the RO-Crate""" ;
+ ] ;
+ sh:property [
+ sh:minCount 1 ;
+ sh:maxCount 1 ;
+ sh:path schema_org:publisher ;
+ sh:severity sh:Warning ;
+ sh:name "Root Data Entity: `publisher` property" ;
+ sh:description """Check if the Root Data Entity has a `publisher` property of type `Organization`.""" ;
+ sh:message "The `publisher` property of a `Root Data Entity` SHOULD be an `Organization`";
+ sh:nodeKind sh:IRI ;
+ sh:class schema_org:Organization ;
+ ] ;
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "Root Data Entity: RECOMMENDED `datePublished` property" ;
+ sh:description """Check if the Root Data Entity includes a `datePublished` (as specified by schema.org)
+ to provide the date when the dataset was published. The datePublished MUST be a valid ISO 8601 date.
+ It SHOULD be specified to at least the day level, but MAY include a time component.""" ;
+ sh:minCount 1 ;
+ sh:nodeKind sh:Literal ;
+ sh:path schema_org:datePublished ;
+ sh:pattern "^([\\+-]?\\d{4})((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))|W([0-4]\\d|5[0-2])(-?[1-7])|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)?[0-5]\\d)?|24:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)$" ;
+ sh:message "The Root Data Entity MUST have a `datePublished` property (as specified by schema.org) with a valid ISO 8601 date and the precision of at least the day level" ;
+ ] .
diff --git a/tests/data/rocrate_validator_profiles/ro-crate/should/2_root_data_entity_relative_uri.py b/tests/data/rocrate_validator_profiles/ro-crate/should/2_root_data_entity_relative_uri.py
new file mode 100644
index 0000000..dd7d8fc
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/ro-crate/should/2_root_data_entity_relative_uri.py
@@ -0,0 +1,42 @@
+# Copyright (c) 2024-2026 CRS4
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from rocrate_validator.utils import log as logging
+from rocrate_validator.models import ValidationContext
+from rocrate_validator.requirements.python import (PyFunctionCheck, check,
+ requirement)
+
+# set up logging
+logger = logging.getLogger(__name__)
+
+
+@requirement(name="RO-Crate Root Data Entity RECOMMENDED value")
+class RootDataEntityRelativeURI(PyFunctionCheck):
+ """
+ The Root Data Entity SHOULD be denoted by the string /
+ """
+
+ @check(name="Root Data Entity: RECOMMENDED value")
+ def check_relative_uris(self, context: ValidationContext) -> bool:
+ """Check if the Root Data Entity is denoted by the string `./` in the file descriptor JSON-LD"""
+ try:
+ if not context.ro_crate.metadata.get_root_data_entity().id == './':
+ context.result.add_issue(
+ 'Root Data Entity URI is not denoted by the string `./`', self)
+ return False
+ return True
+ except Exception as e:
+ context.result.add_issue(
+ f'Error checking Root Data Entity URI: {str(e)}', self)
+ return False
diff --git a/tests/data/rocrate_validator_profiles/ro-crate/should/4_data_entity_existence.py b/tests/data/rocrate_validator_profiles/ro-crate/should/4_data_entity_existence.py
new file mode 100644
index 0000000..f5742ac
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/ro-crate/should/4_data_entity_existence.py
@@ -0,0 +1,58 @@
+# Copyright (c) 2024-2026 CRS4
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from rocrate_validator.utils import log as logging
+from rocrate_validator.models import ValidationContext
+from rocrate_validator.requirements.python import (PyFunctionCheck, check,
+ requirement)
+
+# set up logging
+logger = logging.getLogger(__name__)
+
+
+@requirement(name="Data Entity: RECOMMENDED resource availability")
+class DataEntityRecommendedChecker(PyFunctionCheck):
+ """
+ Data Entities with absolute URI paths SHOULD be available
+ at the time of RO-Crate creation
+ """
+
+ @check(name="Data Entity: RECOMMENDED resource availability")
+ def check_availability(self, context: ValidationContext) -> bool:
+ """
+ Check the availability of the Data Entity with absolute URI paths
+ are available at the time of RO-Crate creation
+ """
+ # Skip the check in metadata-only mode
+ if context.settings.metadata_only:
+ logger.debug("Skipping file descriptor existence check in metadata-only mode")
+ return True
+ # Perform the check
+ result = True
+ for entity in [
+ _ for _ in context.ro_crate.metadata.get_data_entities(exclude_web_data_entities=True)
+ if _.has_absolute_path()]:
+ assert entity.id is not None, "Entity has no @id"
+ try:
+ if not entity.is_available():
+ context.result.add_issue(
+ f'Data Entity {entity.id} is not available', self)
+ result = False
+ except Exception as e:
+ context.result.add_issue(
+ f'Web-based Data Entity {entity.id} is not available: {e}', self)
+ result = False
+ if not result and context.fail_fast:
+ return result
+ return result
diff --git a/tests/data/rocrate_validator_profiles/ro-crate/should/4_data_entity_metadata.ttl b/tests/data/rocrate_validator_profiles/ro-crate/should/4_data_entity_metadata.ttl
new file mode 100644
index 0000000..188e3a8
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/ro-crate/should/4_data_entity_metadata.ttl
@@ -0,0 +1,69 @@
+# Copyright (c) 2024-2026 CRS4
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix rdf: .
+@prefix schema_org: .
+@prefix sh: .
+@prefix xsd: .
+
+ro-crate:FileRecommendedProperties a sh:NodeShape ;
+ sh:targetClass ro-crate:File ;
+ sh:name "File Data Entity: RECOMMENDED properties";
+ sh:description """A `File` Data Entity SHOULD have detailed descriptions encodings through the `encodingFormat` property""" ;
+ sh:property [
+ sh:minCount 1 ;
+ sh:maxCount 2 ;
+ sh:path schema_org:encodingFormat ;
+ sh:severity sh:Warning ;
+ sh:name "File Data Entity: RECOMMENDED `encodingFormat` property" ;
+ sh:description """Check if the File Data Entity has a detailed description of encodings through the `encodingFormat` property.
+ The `encodingFormat` property SHOULD be a PRONOM identifier (e.g., application/pdf) or,
+ to add more detail, SHOULD be linked using a `PRONOM` to a `Contextual Entity` of type `WebSite`
+ (see [Adding detailed descriptions of encodings](https://www.researchobject.org/ro-crate/1.1/data-entities.html#adding-detailed-descriptions-of-encodings)).
+ """ ;
+ sh:message "Missing or invalid `encodingFormat` linked to the `File Data Entity`";
+ sh:or (
+ [
+ sh:datatype xsd:string ;
+ sh:pattern "^(\\w*)\\/(\\w[\\w\\.-]*)(?:\\+(\\w[\\w\\.-]*))?(?:;(\\w+=[^;]+))*$" ;
+ sh:name "File Data Entity: RECOMMENDED `PRONOM` for the `encodingFormat` property" ;
+ sh:description """Check if the File Data Entity is linked to its `encodingFormat` through a PRONOM identifier
+ (e.g., application/pdf, application/text, image/svg+xml, image/svg;q=0.9,/;q=0.8,image/svg+xml;q=0.9,/;q=0.8, application/vnd.uplanet.listcmd-wbxml;charset=utf-8).
+ """ ;
+ sh:message "The `encodingFormat` SHOULD be linked using a PRONOM identifier (e.g., application/pdf).";
+ ]
+ [
+ sh:nodeKind sh:IRI ;
+ sh:class schema_org:WebSite ;
+ sh:name "File Data Entity: RECOMMENDED `Contextual Entity` linked to the `encodingFormat` property";
+ sh:description "Check if the File Data Entity `encodingFormat` is linked to a `Contextual Entity of type `WebSite`." ;
+ sh:message "The `encodingFormat` SHOULD be linked to a `Contextual Entity` of type `Web Site`." ;
+ ]
+ )
+ ] .
+
+ro-crate:DirectoryDataEntityRequiredValueRestriction a sh:NodeShape ;
+ sh:name "Directory Data Entity: RECOMMENDED value restriction" ;
+ sh:description """A Directory Data Entity SHOULD end with `/`""" ;
+ sh:targetNode ro-crate:Directory ;
+ sh:property [
+ a sh:PropertyShape ;
+ sh:name "Directory Data Entity: RECOMMENDED value restriction" ;
+ sh:description """Check if the Directory Data Entity ends with `/`""" ;
+ sh:path [ sh:inversePath rdf:type ] ;
+ sh:message """Every Data Entity Directory URI SHOULD end with `/`""" ;
+ sh:pattern "/$" ;
+ ] .
diff --git a/tests/data/rocrate_validator_profiles/ro-crate/should/5_web_data_entity_metadata.py b/tests/data/rocrate_validator_profiles/ro-crate/should/5_web_data_entity_metadata.py
new file mode 100644
index 0000000..13ef914
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/ro-crate/should/5_web_data_entity_metadata.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024-2026 CRS4
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from rocrate_validator.utils import log as logging
+from rocrate_validator.models import ValidationContext
+from rocrate_validator.requirements.python import (PyFunctionCheck, check,
+ requirement)
+
+# set up logging
+logger = logging.getLogger(__name__)
+
+
+@requirement(name="Web-based Data Entity: RECOMMENDED resource availability")
+class WebDataEntityRecommendedChecker(PyFunctionCheck):
+ """
+ Web-based Data Entity instances SHOULD be available
+ at the URIs specified in the `@id` property of the Web-based Data Entity.
+ """
+
+ @check(name="Web-based Data Entity: resource availability")
+ def check_availability(self, context: ValidationContext) -> bool:
+ """
+ Check if the Web-based Data Entity is directly downloadable
+ by a simple retrieval (e.g. HTTP GET) permitting redirection and HTTP/HTTPS URIs
+ """
+ result = True
+ for entity in context.ro_crate.metadata.get_web_data_entities():
+ assert entity.id is not None, "Entity has no @id"
+ try:
+ if not entity.is_available():
+ context.result.add_issue(
+ f'Web-based Data Entity {entity.id} is not available', self)
+ result = False
+ except Exception as e:
+ context.result.add_issue(
+ f'Web-based Data Entity {entity.id} is not available: {e}', self)
+ result = False
+ if not result and context.fail_fast:
+ return result
+ return result
+
+ @check(name="Web-based Data Entity: `contentSize` property")
+ def check_content_size(self, context: ValidationContext) -> bool:
+ """
+ Check if the Web-based Data Entity has a `contentSize` property
+ and if it is set to actual size of the downloadable content
+ """
+ result = True
+ for entity in context.ro_crate.metadata.get_web_data_entities():
+ assert entity.id is not None, "Entity has no @id"
+ if entity.is_available():
+ content_size = entity.get_property("contentSize")
+ if content_size and int(content_size) != context.ro_crate.get_external_file_size(entity.id):
+ context.result.add_issue(
+ f'The property contentSize={content_size} of the Web-based Data Entity '
+ f'{entity.id} does not match the actual size of '
+ f'the downloadable content, i.e., {entity.content_size} (bytes)', self,
+ violatingEntity=entity.id, violatingProperty='contentSize', violatingPropertyValue=content_size)
+ result = False
+ if not result and context.fail_fast:
+ return result
+ return result
diff --git a/tests/data/rocrate_validator_profiles/ro-crate/should/5_web_data_entity_metadata.ttl b/tests/data/rocrate_validator_profiles/ro-crate/should/5_web_data_entity_metadata.ttl
new file mode 100644
index 0000000..e4f3cc5
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/ro-crate/should/5_web_data_entity_metadata.ttl
@@ -0,0 +1,63 @@
+# Copyright (c) 2024-2026 CRS4
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix rdf: .
+@prefix dct: .
+@prefix schema_org: .
+@prefix sh: .
+@prefix owl: .
+@prefix xsd: .
+@prefix validator: .
+
+
+ro-crate:WebBasedDataEntityRequiredValueRestriction a sh:NodeShape ;
+ sh:name "Web-based Data Entity: RECOMMENDED properties" ;
+ sh:description """A Web-based Data Entity MUST be identified by an absolute URL and
+ SHOULD have a `contentSize` and `sdDatePublished` property""" ;
+ sh:targetClass ro-crate:WebDataEntity ;
+ # Check if the Web-based Data Entity has a contentSize property
+ sh:property [
+ a sh:PropertyShape ;
+ sh:minCount 1 ;
+ sh:name "Web-based Data Entity: `contentSize` property" ;
+ sh:description """Check if the Web-based Data Entity has a `contentSize` property""" ;
+ sh:path schema_org:contentSize ;
+ sh:datatype xsd:string ;
+ sh:severity sh:Warning ;
+ sh:message """Web-based Data Entities SHOULD have a `contentSize` property""" ;
+ sh:sparql [
+ sh:message "If the value is a string it must be a string representing an integer." ;
+ sh:select """
+ SELECT ?this ?value
+ WHERE {
+ ?this schema:contentSize ?value .
+ FILTER NOT EXISTS {
+ FILTER (xsd:integer(?value) = ?value)
+ }
+ }
+ """ ;
+ ] ;
+ ] ;
+ # Check if the Web-based Data Entity has a sdDatePublished property
+ sh:property [
+ a sh:PropertyShape ;
+ sh:minCount 1 ;
+ sh:name "Web-based Data Entity: `sdDatePublished` property" ;
+ sh:description """Check if the Web-based Data Entity has a `sdDatePublished` property""" ;
+ sh:path schema_org:sdDatePublished ;
+ sh:pattern "^([\\+-]?\\d{4})((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))|W([0-4]\\d|5[0-2])(-?[1-7])|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)?[0-5]\\d)?|24:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)$" ;
+ sh:message """Web-based Data Entities SHOULD have a `sdDatePublished` property to indicate when the absolute URL was accessed""" ;
+ ] .
diff --git a/tests/data/rocrate_validator_profiles/ro-crate/should/6_contextual_entity_metadata.ttl b/tests/data/rocrate_validator_profiles/ro-crate/should/6_contextual_entity_metadata.ttl
new file mode 100644
index 0000000..a90dbc2
--- /dev/null
+++ b/tests/data/rocrate_validator_profiles/ro-crate/should/6_contextual_entity_metadata.ttl
@@ -0,0 +1,75 @@
+# Copyright (c) 2024-2026 CRS4
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix ro: <./> .
+@prefix ro-crate: .
+@prefix schema: .
+@prefix sh: .
+@prefix xsd: .
+
+ro-crate:CreativeWorkAuthorMinimumRecommendedProperties a sh:NodeShape ;
+ sh:name "CreativeWork Author: minimum RECOMMENDED properties" ;
+ sh:description """The minimum recommended properties for a `CreativeWork Author` are `name` and `affiliation`.""" ;
+ sh:targetClass ro-crate:CreativeWorkAuthor ;
+ sh:property [
+ sh:path schema:name ;
+ sh:minCount 1 ;
+ sh:dataType xsd:string ;
+ sh:name "CreativeWork Author: RECOMMENDED name property" ;
+ sh:description "Check if the author has a name." ;
+ sh:message "The author SHOULD have a name." ;
+ ] ;
+ sh:property [
+ sh:path schema:affiliation ;
+ sh:minCount 1 ;
+ sh:or (
+ [ sh:dataType xsd:string ; ]
+ [ sh:class schema:Organization ;]
+ ) ;
+ sh:severity sh:Warning ;
+ sh:name "CreativeWork Author: RECOMMENDED affiliation property" ;
+ sh:description "Check if the author has an organizational affiliation." ;
+ sh:message "The author SHOULD have an organizational affiliation." ;
+ ] ;
+ sh:property [
+ sh:path schema:affiliation ;
+ sh:minCount 1 ;
+ sh:class schema:Organization ;
+ sh:severity sh:Warning ;
+ sh:name "CreativeWork Author: RECOMMENDED Contextual Entity linked for the organizational `affiliation` property" ;
+ sh:description "Check if the author has a Contextual Entity for the organizational `affiliation` property." ;
+ sh:message "The author SHOULD have a Contextual Entity which specifies the organizational `affiliation`." ;
+ ] .
+
+
+ro-crate:OrganizationRecommendedProperties a sh:NodeShape ;
+ sh:name "Organization: RECOMMENDED properties" ;
+ sh:description """The recommended properties for an `Organization` are `name` and `url`.""" ;
+ sh:targetClass schema:Organization ;
+ sh:property [
+ sh:path schema:name ;
+ sh:minCount 1 ;
+ sh:dataType xsd:string ;
+ sh:name "Organization: RECOMMENDED name property" ;
+ sh:description "Check if the `organization` has a name." ;
+ sh:message "The organization SHOULD have a name." ;
+ ] ;
+ sh:property [
+ sh:path schema:url ;
+ sh:minCount 1 ;
+ sh:dataType xsd:anyURI ;
+ sh:name "Organization: RECOMMENDED url property" ;
+ sh:description "Check if the `organization` has a URL." ;
+ sh:message "The organization SHOULD have a URL." ;
+ ] .
From 29887a2a67d60201cab40331996373341cd97390 Mon Sep 17 00:00:00 2001
From: Douglas Lowe <10961945+douglowe@users.noreply.github.com>
Date: Mon, 9 Feb 2026 13:59:48 +0000
Subject: [PATCH 13/15] add profile_name to API description in readme
---
README.md | 1 +
1 file changed, 1 insertion(+)
diff --git a/README.md b/README.md
index 98d4322..d1d8ae8 100644
--- a/README.md
+++ b/README.md
@@ -21,6 +21,7 @@ This project presents a Flask-based API for validating RO-Crates.
|------------|-----------|-------------------------|-----------------------------------------------------------------------|
| root_path | optional | string | Root path which contains the RO-Crate |
| webhook_url | optional | string | Webhook to send validation result to |
+| profile_name | optional | string | RO-Crate profile to validate against |
| minio_config | required | dictionary | MinIO Configuration Details |
`minio_config`
From 95ea1dc382ded38e0bb15348795f4ca24eda8ab2 Mon Sep 17 00:00:00 2001
From: Douglas Lowe <10961945+douglowe@users.noreply.github.com>
Date: Mon, 9 Feb 2026 14:08:17 +0000
Subject: [PATCH 14/15] API and docker updates in readme
---
README.md | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index d1d8ae8..a82204e 100644
--- a/README.md
+++ b/README.md
@@ -168,12 +168,24 @@ curl -X 'POST' \
2. Create the `.env` file for shared environment information. An example environment file is included (`example.env`), which can be copied for this purpose. But make sure to change any security settings (username and passwords).
-3. Build and start the services using Docker Compose:
+3. A directory containing RO-Crate profiles to replace the default RO-Crate profiles for validation may be provided. Note that this will need to contain all profile files, as the default profile data will not be used. An example of this is given in the `docker-compose-develop.yml` file, and described here:
+ 1. Store the profiles in a convenient directory, e.g.: `./local/rocrate_validator_profiles`
+ 2. Add a volume to the celery worker container for these, e.g.:
+```
+ volumes:
+ - ./local/rocrate_validator_profiles:/app/profiles:ro
+```
+ 3. Provide the `PROFILES_PATH` environment to the flask container (not the celery worker container) to match the internal path, e.g.:
+```
+ - PROFILES_PATH=/app/profiles
+```
+
+4. Build and start the services using Docker Compose:
```bash
docker compose up --build
```
-4. Set up the MinIO bucket
+5. Set up the MinIO bucket
1. Open the MinIO web interface at `http://localhost:9000`.
2. Log in with your MinIO credentials.
3. Create a new bucket named `ro-crates`.
From 53cbe0988df9b9c9db2458d1e6fa3a3a6ebe96c1 Mon Sep 17 00:00:00 2001
From: Douglas Lowe <10961945+douglowe@users.noreply.github.com>
Date: Mon, 9 Feb 2026 14:09:32 +0000
Subject: [PATCH 15/15] readme cleanup
---
README.md | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/README.md b/README.md
index a82204e..8eaad38 100644
--- a/README.md
+++ b/README.md
@@ -171,14 +171,14 @@ curl -X 'POST' \
3. A directory containing RO-Crate profiles to replace the default RO-Crate profiles for validation may be provided. Note that this will need to contain all profile files, as the default profile data will not be used. An example of this is given in the `docker-compose-develop.yml` file, and described here:
1. Store the profiles in a convenient directory, e.g.: `./local/rocrate_validator_profiles`
2. Add a volume to the celery worker container for these, e.g.:
-```
+ ```
volumes:
- ./local/rocrate_validator_profiles:/app/profiles:ro
-```
+ ```
3. Provide the `PROFILES_PATH` environment to the flask container (not the celery worker container) to match the internal path, e.g.:
-```
+ ```
- PROFILES_PATH=/app/profiles
-```
+ ```
4. Build and start the services using Docker Compose:
```bash