Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ This project presents a Flask-based API for validating RO-Crates.
|------------|-----------|-------------------------|-----------------------------------------------------------------------|
| root_path | optional | string | Root path which contains the RO-Crate |
| webhook_url | optional | string | Webhook to send validation result to |
| profile_name | optional | string | RO-Crate profile to validate against |
| minio_config | required | dictionary | MinIO Configuration Details |

`minio_config`
Expand Down Expand Up @@ -167,12 +168,24 @@ curl -X 'POST' \

2. Create the `.env` file for shared environment information. An example environment file is included (`example.env`), which can be copied for this purpose. But make sure to change any security settings (username and passwords).

3. Build and start the services using Docker Compose:
3. A directory containing RO-Crate profiles to replace the default RO-Crate profiles for validation may be provided. Note that this will need to contain all profile files, as the default profile data will not be used. An example of this is given in the `docker-compose-develop.yml` file, and described here:
1. Store the profiles in a convenient directory, e.g.: `./local/rocrate_validator_profiles`
2. Add a volume to the celery worker container for these, e.g.:
```
volumes:
- ./local/rocrate_validator_profiles:/app/profiles:ro
```
3. Provide the `PROFILES_PATH` environment to the flask container (not the celery worker container) to match the internal path, e.g.:
```
- PROFILES_PATH=/app/profiles

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is the default is it not? so no need to set unless they don't obey step 2?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's in the testing dockerfile, but not in the default dockerfile (that deploys our pre-built docker container)

```

4. Build and start the services using Docker Compose:
```bash
docker compose up --build
```

4. Set up the MinIO bucket
5. Set up the MinIO bucket
1. Open the MinIO web interface at `http://localhost:9000`.
2. Log in with your MinIO credentials.
3. Create a new bucket named `ro-crates`.
Expand Down
7 changes: 5 additions & 2 deletions app/ro_crates/routes/post_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from apiflask import APIBlueprint, Schema
from apiflask.fields import String, Boolean
from marshmallow.fields import Nested
from flask import Response
from flask import Response, current_app

from app.services.validation_service import (
queue_ro_crate_validation_task,
Expand Down Expand Up @@ -81,7 +81,10 @@ def validate_ro_crate_via_id(json_data, crate_id) -> tuple[Response, int]:
else:
profile_name = None

return queue_ro_crate_validation_task(minio_config, crate_id, root_path, profile_name, webhook_url)
profiles_path = current_app.config["PROFILES_PATH"]

return queue_ro_crate_validation_task(minio_config, crate_id, root_path, profile_name,
webhook_url, profiles_path)


@post_routes_bp.post("/validate_metadata")
Expand Down
6 changes: 4 additions & 2 deletions app/services/validation_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@


def queue_ro_crate_validation_task(
minio_config, crate_id, root_path=None, profile_name=None, webhook_url=None
minio_config, crate_id, root_path=None, profile_name=None, webhook_url=None,
profiles_path=None
) -> tuple[Response, int]:
"""
Queues an RO-Crate for validation with Celery.
Expand All @@ -51,7 +52,8 @@ def queue_ro_crate_validation_task(
raise InvalidAPIUsage(f"No RO-Crate with prefix: {crate_id}", 400)

try:
process_validation_task_by_id.delay(minio_config, crate_id, root_path, profile_name, webhook_url)
process_validation_task_by_id.delay(minio_config, crate_id, root_path,
profile_name, webhook_url, profiles_path)
return jsonify({"message": "Validation in progress"}), 202

except Exception as e:
Expand Down
11 changes: 7 additions & 4 deletions app/tasks/validation_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@

@celery.task
def process_validation_task_by_id(
minio_config: dict, crate_id: str, root_path: str, profile_name: str | None, webhook_url: str | None
minio_config: dict, crate_id: str, root_path: str, profile_name: str | None,
webhook_url: str | None, profiles_path: str | None
) -> None:
"""
Background task to process the RO-Crate validation by ID.
Expand All @@ -56,7 +57,7 @@ def process_validation_task_by_id(
logging.info(f"Processing validation task for {file_path}")

# Perform validation:
validation_result = perform_ro_crate_validation(file_path, profile_name)
validation_result = perform_ro_crate_validation(file_path, profile_name, profiles_path=profiles_path)

if isinstance(validation_result, str):
logging.error(f"Validation failed: {validation_result}")
Expand Down Expand Up @@ -158,14 +159,15 @@ def process_validation_task_by_metadata(


def perform_ro_crate_validation(
file_path: str, profile_name: str | None, skip_checks_list: Optional[list] = None
file_path: str, profile_name: str | None, skip_checks_list: Optional[list] = None, profiles_path: Optional[str] = None
) -> ValidationResult | str:
"""
Validates an RO-Crate using the provided file path and profile name.

:param file_path: The path to the RO-Crate file to validate
:param profile_name: The name of the validation profile to use. Defaults to None. If None, the CRS4 validator will
attempt to determine the profile.
:param profiles_path: The path to the profiles definition directory
:param skip_checks_list: A list of checks to skip, if needed
:return: The validation result.
:raises Exception: If an error occurs during the validation process.
Expand All @@ -183,7 +185,8 @@ def perform_ro_crate_validation(
settings = services.ValidationSettings(
rocrate_uri=full_file_path,
**({"profile_identifier": profile_name} if profile_name else {}),
**({"skip_checks": skip_checks_list} if skip_checks_list else {})
**({"skip_checks": skip_checks_list} if skip_checks_list else {}),
**({"profiles_path": profiles_path} if profiles_path else {})
)

return services.validate(settings)
Expand Down
31 changes: 16 additions & 15 deletions app/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,34 +10,32 @@
from flask import Flask


def get_env(name: str, default=None, required=False):
value = os.environ.get(name, default)
if required and value is None:
raise RuntimeError(f"Missing required environment variable: {name}")
return value


class Config:
"""Base configuration class for the Flask application."""

SECRET_KEY = os.getenv("SECRET_KEY", "my_precious")

# Celery configuration:
CELERY_BROKER_URL = os.getenv("CELERY_BROKER_URL")
CELERY_RESULT_BACKEND = os.getenv("CELERY_RESULT_BACKEND")
CELERY_BROKER_URL = get_env("CELERY_BROKER_URL", required=False)
CELERY_RESULT_BACKEND = get_env("CELERY_RESULT_BACKEND", required=False)

# MinIO configuration:
MINIO_ENDPOINT = os.getenv("MINIO_ENDPOINT")
MINIO_ACCESS_KEY = os.getenv("MINIO_ACCESS_KEY")
MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY")
MINIO_BUCKET_NAME = os.getenv("MINIO_BUCKET_NAME", "bucket-name")
# rocrate validator configuration:
PROFILES_PATH = get_env("PROFILES_PATH", required=False)


class DevelopmentConfig(Config):
"""Development configuration class."""

DEBUG = True
ENV = "development"


class ProductionConfig(Config):
"""Production configuration class."""

DEBUG = False
ENV = "production"


class InvalidAPIUsage(Exception):
Expand All @@ -63,10 +61,13 @@ def make_celery(app: Flask = None) -> Celery:
:param app: The Flask application to use.
:return: The Celery instance.
"""
env = os.environ.get("FLASK_ENV", "development")
config_cls = ProductionConfig if env == "production" else DevelopmentConfig

celery = Celery(
app.import_name if app else __name__,
broker=os.getenv("CELERY_BROKER_URL"),
backend=os.getenv("CELERY_RESULT_BACKEND"),
broker=config_cls.CELERY_BROKER_URL,
backend=config_cls.CELERY_RESULT_BACKEND,
)

if app:
Expand Down
7 changes: 3 additions & 4 deletions docker-compose-develop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ services:
- MINIO_ROOT_USER=${MINIO_ROOT_USER}
- MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD}
- MINIO_BUCKET_NAME=${MINIO_BUCKET_NAME}
- PROFILES_PATH=/app/profiles
depends_on:
- redis
- minio
Expand All @@ -28,13 +29,11 @@ services:
environment:
- CELERY_BROKER_URL=redis://redis:6379/0
- CELERY_RESULT_BACKEND=redis://redis:6379/0
- MINIO_ENDPOINT=${MINIO_ENDPOINT}
- MINIO_ROOT_USER=${MINIO_ROOT_USER}
- MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD}
- MINIO_BUCKET_NAME=${MINIO_BUCKET_NAME}
depends_on:
- redis
- minio
volumes:
- ./tests/data/rocrate_validator_profiles:/app/profiles:ro

redis:
image: "redis:alpine"
Expand Down
4 changes: 0 additions & 4 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,6 @@ services:
environment:
- CELERY_BROKER_URL=redis://redis:6379/0
- CELERY_RESULT_BACKEND=redis://redis:6379/0
- MINIO_ENDPOINT=${MINIO_ENDPOINT}
- MINIO_ROOT_USER=${MINIO_ROOT_USER}
- MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD}
- MINIO_BUCKET_NAME=${MINIO_BUCKET_NAME}
depends_on:
- redis
- minio
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright (c) 2025 eScience Lab, The University of Manchester
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

@prefix ro: <./> .
@prefix ro-crate: <https://github.com/crs4/rocrate-validator/profiles/ro-crate/> .
@prefix alpha-crate: <https://github.com/eScienceLab/rocrate-validator/profiles/alpha-crate/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix schema: <http://schema.org/> .
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix validator: <https://github.com/crs4/rocrate-validator/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .


alpha-crate:RootDataEntityRequiredProperties
a sh:NodeShape ;
sh:name "RootDataEntity" ;
sh:targetClass ro-crate:RootDataEntity ;

sh:property [
a sh:PropertyShape ;
sh:name "sourceOrganization" ;
sh:path schema:sourceOrganization;
sh:minCount 1 ;
sh:severity sh:Violation ;
sh:message """The Root Data Entity MUST have a `sourceOrganization` property.""" ;
] ;

sh:property [
a sh:PropertyShape ;
sh:name "sourceOrganization" ;
sh:path schema:sourceOrganization ;
sh:class schema:Project ;
sh:severity sh:Violation ;
sh:message """The `sourceOrganization` property of the RootDataEntity MUST point to a Project entity.""" ;
] .
55 changes: 55 additions & 0 deletions tests/data/rocrate_validator_profiles/alpha-crate/profile.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Copyright (c) 2024-2025 CRS4, University of Manchester
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

@prefix dct: <http://purl.org/dc/terms/> .
@prefix prof: <http://www.w3.org/ns/dx/prof/> .
@prefix role: <http://www.w3.org/ns/dx/prof/role/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

<https://w3id.org/alpha-crate/0.1>
a prof:Profile ;

# the Profile's label
rdfs:label "Alpha RO-Crate 0.1" ;

# regular metadata, a basic description of the Profile
rdfs:comment """Alpha RO-Crate Metadata Specification 0.1"""@en ;

# URI of the publisher of the Metadata Specification
dct:publisher <https://w3id.org/alpha-crate> ;

# This profile is a transitive profile of the RO-Crate Metadata Specification
prof:isTransitiveProfileOf <https://w3id.org/ro/crate/1.1> ;

# this profile has a JSON-LD context resource
prof:hasResource [
a prof:ResourceDescriptor ;

# it's in JSON-LD format
dct:format <https://w3id.org/mediatype/application/ld+json> ;

# it conforms to JSON-LD, here referred to by its namespace URI as a Profile
dct:conformsTo <https://www.w3.org/TR/json-ld11/> ;

# this profile resource plays the role of "Vocabulary"
# described in this ontology's accompanying Roles vocabulary
prof:hasRole role:Vocabulary ;

# this profile resource's actual file
prof:hasArtifact <https://w3id.org/ro/terms/workflow-run/context> ;
] ;

# a short code to refer to the Profile with when a URI can't be used
prof:hasToken "alpha-crate" ;
.
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Copyright (c) 2025 eScience Lab, The University of Manchester
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

@prefix ro: <./> .
@prefix ro-crate: <https://github.com/crs4/rocrate-validator/profiles/ro-crate/> .
@prefix five-safes-crate: <https://github.com/eScienceLab/rocrate-validator/profiles/five-safes-crate/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix schema: <http://schema.org/> .
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix validator: <https://github.com/crs4/rocrate-validator/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .


five-safes-crate:WorkflowexecutionObjectHasStartTimeIfBegun
a sh:NodeShape ;
sh:name "WorkflowExecution" ;
sh:description (
"The workflow execution object MAY have a startTime if actionStatus is "
"either ActiveActionStatus, CompletedActionStatus or FailedActionStatus."
) ;

sh:target [
a sh:SPARQLTarget ;
sh:select """
PREFIX schema: <http://schema.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT ?this
WHERE {
?this rdf:type schema:CreateAction ;
schema:actionStatus ?status .
FILTER(?status IN (
"http://schema.org/CompletedActionStatus",
"http://schema.org/FailedActionStatus",
"http://schema.org/ActiveActionStatus"
))
}
""" ;
] ;

sh:property [
a sh:PropertyShape ;
sh:name "StartTime" ;
sh:path schema:startTime ;
sh:minCount 1 ;
sh:maxCount 1 ;
sh:severity sh:Info ;
sh:description (
"The workflow execution object MAY have a startTime if actionStatus is "
"either ActiveActionStatus, CompletedActionStatus or FailedActionStatus."
) ;
sh:message "The workflow execution object MAY have a startTime if actionStatus is either ActiveActionStatus, CompletedActionStatus or FailedActionStatus." ;
] .
Loading