diff --git a/lib/galaxy/files/sources/irods.py b/lib/galaxy/files/sources/irods.py new file mode 100644 index 000000000000..d1e2ed07f19b --- /dev/null +++ b/lib/galaxy/files/sources/irods.py @@ -0,0 +1,177 @@ +import ssl +from fnmatch import fnmatch +import os +from typing import ( + Optional, + Union, +) + +import fs +import fs.errors + +from galaxy.files.models import ( + AnyRemoteEntry, + BaseFileSourceConfiguration, + BaseFileSourceTemplateConfiguration, + FilesSourceRuntimeContext, +) +from galaxy.exceptions import ( + AuthenticationRequired, + MessageException, +) +from galaxy.util.config_templates import TemplateExpansion +from ._pyfilesystem2 import PyFilesystem2FilesSource + +try: + from fs_irods import iRODSFS +except ImportError: + iRODSFS = None + +try: + from irods.session import iRODSSession +except ImportError: + iRODSSession = None + + +class IrodsFileSourceTemplateConfiguration(BaseFileSourceTemplateConfiguration): + host: Union[str, TemplateExpansion] + port: Union[int, TemplateExpansion] = 1247 + username: Union[str, TemplateExpansion] + password: Union[str, TemplateExpansion] + zone: Union[str, TemplateExpansion] + root: Optional[Union[str, TemplateExpansion]] = None + timeout: Union[int, TemplateExpansion] = 30 + refresh_time: Union[int, TemplateExpansion] = 300 + client_server_negotiation: Optional[Union[str, TemplateExpansion]] = None + client_server_policy: Optional[Union[str, TemplateExpansion]] = None + encryption_algorithm: Optional[Union[str, TemplateExpansion]] = None + encryption_key_size: Optional[Union[int, TemplateExpansion]] = None + encryption_num_hash_rounds: Optional[Union[int, TemplateExpansion]] = None + encryption_salt_size: Optional[Union[int, TemplateExpansion]] = None + ssl_verify_server: Optional[Union[str, TemplateExpansion]] = None + ssl_ca_certificate_file: Optional[Union[str, TemplateExpansion]] = None + resource: Optional[Union[str, TemplateExpansion]] = None + + +class IrodsFileSourceConfiguration(BaseFileSourceConfiguration): + host: str + port: int = 1247 + username: str + password: str + zone: str + root: Optional[str] = None + timeout: int = 30 + refresh_time: int = 300 + client_server_negotiation: Optional[str] = None + client_server_policy: Optional[str] = None + encryption_algorithm: Optional[str] = None + encryption_key_size: Optional[int] = None + encryption_num_hash_rounds: Optional[int] = None + encryption_salt_size: Optional[int] = None + ssl_verify_server: Optional[str] = None + ssl_ca_certificate_file: Optional[str] = None + resource: Optional[str] = None + + +class IrodsFilesSource(PyFilesystem2FilesSource[IrodsFileSourceTemplateConfiguration, IrodsFileSourceConfiguration]): + plugin_type = "irods" + required_module = iRODSFS + required_package = "fs-irods" + + template_config_class = IrodsFileSourceTemplateConfiguration + resolved_config_class = IrodsFileSourceConfiguration + + def _iter_directory_entries(self, fs_handle, parent_path: str, normalized_query: Optional[str] = None): + for raw_name in fs_handle.listdir(parent_path): + name = os.path.basename(str(raw_name).rstrip("/")) + if not name: + continue + if normalized_query and not fnmatch(name.lower(), f"*{normalized_query}*"): + continue + entry_path = fs.path.join(parent_path, name) + info = fs_handle.getinfo(entry_path, namespaces=["details"]) + yield entry_path, info + + def _list_recursive(self, fs_handle, path: str) -> tuple[list[AnyRemoteEntry], int]: + result: list[AnyRemoteEntry] = [] + pending = [path] + while pending: + current_path = pending.pop(0) + for entry_path, info in self._iter_directory_entries(fs_handle, current_path): + result.append(self._resource_info_to_dict(current_path, info)) + if info.is_dir: + pending.append(entry_path) + return result, len(result) + + def _list_non_recursive( + self, + fs_handle, + path: str, + limit: Optional[int] = None, + offset: Optional[int] = None, + query: Optional[str] = None, + ) -> tuple[list[AnyRemoteEntry], int]: + normalized_query = query.lower() if query else None + entries = [] + for _, info in self._iter_directory_entries(fs_handle, path, normalized_query): + entries.append(self._resource_info_to_dict(path, info)) + count = len(entries) + page = self._to_page(limit, offset) + if page is not None: + entries = entries[page[0] : page[1]] + return entries, count + + def _list( + self, + context: FilesSourceRuntimeContext[IrodsFileSourceConfiguration], + path="/", + recursive=False, + write_intent: bool = False, + limit: Optional[int] = None, + offset: Optional[int] = None, + query: Optional[str] = None, + sort_by: Optional[str] = None, + ) -> tuple[list[AnyRemoteEntry], int]: + try: + with self._open_fs(context) as fs_handle: + if recursive: + return self._list_recursive(fs_handle, path) + return self._list_non_recursive(fs_handle, path, limit, offset, query) + except fs.errors.PermissionDenied as e: + raise AuthenticationRequired( + f"Permission Denied. Reason: {e}. Please check your credentials in your preferences for {self.label}." + ) from e + except fs.errors.FSError as e: + raise MessageException(f"Problem listing file source path {path}. Reason: {e}") from e + + def _open_fs(self, context: FilesSourceRuntimeContext[IrodsFileSourceConfiguration]): + if iRODSFS is None or iRODSSession is None: + raise self.required_package_exception + + config = context.config + session_kwargs = { + "host": config.host, + "port": config.port, + "user": config.username, + "password": config.password, + "zone": config.zone, + "refresh_time": config.refresh_time, + "client_server_negotiation": config.client_server_negotiation, + "client_server_policy": config.client_server_policy, + "encryption_algorithm": config.encryption_algorithm, + "encryption_key_size": config.encryption_key_size, + "encryption_num_hash_rounds": config.encryption_num_hash_rounds, + "encryption_salt_size": config.encryption_salt_size, + "ssl_verify_server": config.ssl_verify_server, + "ssl_ca_certificate_file": config.ssl_ca_certificate_file, + "ssl_context": ssl.create_default_context(purpose=ssl.Purpose.SERVER_AUTH), + } + session = iRODSSession(**session_kwargs) + session.connection_timeout = config.timeout + if config.resource: + session.default_resource = config.resource + + return iRODSFS(session=session, root=config.root) + + +__all__ = ("IrodsFilesSource",) diff --git a/lib/galaxy/files/templates/examples/irods.yml b/lib/galaxy/files/templates/examples/irods.yml new file mode 100644 index 000000000000..22b168a32eb5 --- /dev/null +++ b/lib/galaxy/files/templates/examples/irods.yml @@ -0,0 +1,61 @@ +- id: irods + version: 0 + name: iRODS + description: | + Use this template to connect an iRODS collection as a Galaxy file source for + importing and exporting datasets. + + You need valid iRODS credentials and the target root collection path. + variables: + host: + label: iRODS host + type: string + help: Hostname of the iRODS server. + port: + label: iRODS port + type: integer + help: Port of the iRODS server. + default: 1247 + username: + label: iRODS user + type: string + help: Username used to authenticate against iRODS. + zone: + label: iRODS zone + type: string + help: Zone used for authentication (for example tempZone). + root: + label: Root collection path + type: string + help: | + Collection path exposed in Galaxy, for example /tempZone/home/rods. + timeout: + label: Connection timeout (seconds) + type: integer + help: Timeout in seconds for iRODS connection attempts. + default: 30 + refresh_time: + label: Session refresh time (seconds) + type: integer + help: Seconds between refresh checks for active iRODS sessions. + default: 300 + writable: + label: Writable? + type: boolean + help: Allow writing datasets back to iRODS. + default: false + secrets: + password: + label: Password + help: Password used to authenticate against iRODS. + configuration: + type: irods + host: "{{ variables.host }}" + port: "{{ variables.port }}" + username: "{{ variables.username }}" + password: "{{ secrets.password }}" + zone: "{{ variables.zone }}" + root: "{{ variables.root }}" + timeout: "{{ variables.timeout }}" + refresh_time: "{{ variables.refresh_time }}" + writable: "{{ variables.writable }}" \ No newline at end of file diff --git a/lib/galaxy/files/templates/models.py b/lib/galaxy/files/templates/models.py index ed9adc73f2d9..d7e824f697ae 100644 --- a/lib/galaxy/files/templates/models.py +++ b/lib/galaxy/files/templates/models.py @@ -37,6 +37,7 @@ "s3fs", "azure", "azureflat", + "irods", "onedata", "webdav", "dropbox", @@ -193,6 +194,34 @@ class AzureFlatFileSourceConfiguration(StrictModel): writable: bool = False +class IrodsFileSourceTemplateConfiguration(StrictModel): + type: Literal["irods"] + host: Union[str, TemplateExpansion] + port: Union[int, TemplateExpansion] = 1247 + username: Union[str, TemplateExpansion] + password: Union[str, TemplateExpansion] + zone: Union[str, TemplateExpansion] + root: Optional[Union[str, TemplateExpansion]] = None + timeout: Union[int, TemplateExpansion] = 30 + refresh_time: Union[int, TemplateExpansion] = 300 + writable: Union[bool, TemplateExpansion] = False + template_start: Optional[str] = None + template_end: Optional[str] = None + + +class IrodsFileSourceConfiguration(StrictModel): + type: Literal["irods"] + host: str + port: int = 1247 + username: str + password: str + zone: str + root: Optional[str] = None + timeout: int = 30 + refresh_time: int = 300 + writable: bool = False + + class OnedataFileSourceTemplateConfiguration(StrictModel): type: Literal["onedata"] access_token: Union[str, TemplateExpansion] @@ -358,6 +387,7 @@ class OmeroFileSourceConfiguration(StrictModel): FtpFileSourceTemplateConfiguration, AzureFileSourceTemplateConfiguration, AzureFlatFileSourceTemplateConfiguration, + IrodsFileSourceTemplateConfiguration, OnedataFileSourceTemplateConfiguration, WebdavFileSourceTemplateConfiguration, DropboxFileSourceTemplateConfiguration, @@ -380,6 +410,7 @@ class OmeroFileSourceConfiguration(StrictModel): FtpFileSourceConfiguration, AzureFileSourceConfiguration, AzureFlatFileSourceConfiguration, + IrodsFileSourceConfiguration, OnedataFileSourceConfiguration, WebdavFileSourceConfiguration, DropboxFileSourceConfiguration, @@ -460,6 +491,7 @@ def template_to_configuration( "s3fs": S3FSFileSourceConfiguration, "azure": AzureFileSourceConfiguration, "azureflat": AzureFlatFileSourceConfiguration, + "irods": IrodsFileSourceConfiguration, "onedata": OnedataFileSourceConfiguration, "webdav": WebdavFileSourceConfiguration, "dropbox": DropboxFileSourceConfiguration, diff --git a/test/unit/files/test_irods.py b/test/unit/files/test_irods.py new file mode 100644 index 000000000000..0ad83fd51c62 --- /dev/null +++ b/test/unit/files/test_irods.py @@ -0,0 +1,204 @@ +import os +import socket +import pytest + +from galaxy.files.models import ( + FileSourcePluginsConfig, + FilesSourceRuntimeContext, + UserData, +) +from galaxy.files.plugins import FileSourcePluginLoader +from galaxy.files.sources.irods import IrodsFilesSource +from ._util import ( + assert_realizes_contains, + configured_file_sources, + write_from, +) + +try: + from irods.session import iRODSSession +except ImportError: + iRODSSession = None + + +ROUNDTRIP_TEST_FILENAME = "numerical_sort_and_write_back_to_irods_v2.tab" + + +class _FakeSession: + init_kwargs = None + + def __init__(self, **kwargs): + type(self).init_kwargs = kwargs + self.connection_timeout = None + self.default_resource = None + + +class _FakeIrodsFs: + def __init__(self, session, root=None): + self.session = session + self.root = root + + +def _irods_live_settings() -> dict: + host = os.environ.get("GALAXY_TEST_IRODS_HOST", "127.0.0.1") + port = int(os.environ.get("GALAXY_TEST_IRODS_PORT", "1247")) + username = os.environ.get("GALAXY_TEST_IRODS_USER", "rods") + password = os.environ.get("GALAXY_TEST_IRODS_PASSWORD", "rods") + zone = os.environ.get("GALAXY_TEST_IRODS_ZONE", "tempZone") + root = os.environ.get("GALAXY_TEST_IRODS_ROOT", f"/{zone}/home/{username}") + timeout = int(os.environ.get("GALAXY_TEST_IRODS_TIMEOUT", "30")) + refresh_time = int(os.environ.get("GALAXY_TEST_IRODS_REFRESH_TIME", "300")) + return { + "host": host, + "port": port, + "username": username, + "password": password, + "zone": zone, + "root": root, + "timeout": timeout, + "refresh_time": refresh_time, + } + + +def _skip_if_irods_unreachable(host: str, port: int): + try: + with socket.create_connection((host, port), timeout=1): + return + except OSError: + pytest.skip( + f"No reachable iRODS service at {host}:{port}. " + "Start your local iRODS Docker stack or override GALAXY_TEST_IRODS_* settings." + ) + + +def _live_file_source_config(settings: dict, writable: bool = False) -> list[dict]: + return [ + { + "type": "irods", + "id": "test1", + "label": "iRODS Live Test", + "doc": "Live iRODS connectivity smoke test", + "host": settings["host"], + "port": settings["port"], + "username": settings["username"], + "password": settings["password"], + "zone": settings["zone"], + "root": settings["root"], + "timeout": settings["timeout"], + "refresh_time": settings["refresh_time"], + "writable": writable, + } + ] + + +def _cleanup_live_test_artifacts(settings: dict): + root = settings["root"].rstrip("/") + logical_path = f"{root}/{ROUNDTRIP_TEST_FILENAME}" + + session = iRODSSession( + host=settings["host"], + port=settings["port"], + user=settings["username"], + password=settings["password"], + zone=settings["zone"], + refresh_time=settings["refresh_time"], + ) + session.connection_timeout = settings["timeout"] + + try: + if session.data_objects.exists(logical_path): + session.data_objects.unlink(logical_path) + finally: + session.cleanup() + + +def test_irods_plugin_registered(): + plugin_loader = FileSourcePluginLoader() + plugin_class = plugin_loader.get_plugin_type_class("irods") + assert plugin_class is IrodsFilesSource + + +def test_irods_open_fs_builds_session(monkeypatch): + monkeypatch.setattr("galaxy.files.sources.irods.iRODSSession", _FakeSession) + monkeypatch.setattr("galaxy.files.sources.irods.iRODSFS", _FakeIrodsFs) + monkeypatch.setattr(IrodsFilesSource, "required_module", _FakeIrodsFs) + + file_source = IrodsFilesSource( + IrodsFilesSource.build_template_config( + type="irods", + id="test_irods", + file_sources_config=FileSourcePluginsConfig(), + host="irods.example.org", + port=1247, + username="rods", + password="secret", + zone="tempZone", + root="/tempZone/home/rods", + timeout=42, + refresh_time=120, + resource="demoResc", + writable=True, + ) + ) + + resolved_config = file_source._evaluate_template_config(UserData()) + context = FilesSourceRuntimeContext(user_data=UserData(), config=resolved_config) + + fs = file_source._open_fs(context) + init_kwargs = _FakeSession.init_kwargs + + assert isinstance(fs, _FakeIrodsFs) + assert fs.root == "/tempZone/home/rods" + assert init_kwargs is not None + assert init_kwargs["host"] == "irods.example.org" + assert init_kwargs["port"] == 1247 + assert init_kwargs["user"] == "rods" + assert init_kwargs["password"] == "secret" + assert init_kwargs["zone"] == "tempZone" + assert init_kwargs["refresh_time"] == 120 + assert fs.session.connection_timeout == 42 + assert fs.session.default_resource == "demoResc" + + +def test_irods_live_touch(): + settings = _irods_live_settings() + _skip_if_irods_unreachable(settings["host"], settings["port"]) + _cleanup_live_test_artifacts(settings) + + file_sources = configured_file_sources(_live_file_source_config(settings, writable=False)) + file_source_pair = file_sources.get_file_source_path("gxfiles://test1") + + assert file_source_pair.path == "/" + entries, count = file_source_pair.file_source.list("/", recursive=False) + assert isinstance(entries, list) + assert count >= 0 + _cleanup_live_test_artifacts(settings) + + +def test_irods_live_recursive_list(): + settings = _irods_live_settings() + _skip_if_irods_unreachable(settings["host"], settings["port"]) + _cleanup_live_test_artifacts(settings) + + file_sources = configured_file_sources(_live_file_source_config(settings, writable=False)) + file_source_pair = file_sources.get_file_source_path("gxfiles://test1") + + entries, count = file_source_pair.file_source.list("/", recursive=True) + assert isinstance(entries, list) + assert count >= 0 + _cleanup_live_test_artifacts(settings) + + +def test_irods_live_write_and_read_roundtrip(): + settings = _irods_live_settings() + _skip_if_irods_unreachable(settings["host"], settings["port"]) + _cleanup_live_test_artifacts(settings) + + test_contents = "1\t2\t999\n666\t6\t555\n3\t4\t5\n" + target_uri = f"gxfiles://test1/{ROUNDTRIP_TEST_FILENAME}" + + file_sources = configured_file_sources(_live_file_source_config(settings, writable=True)) + _ = write_from(file_sources, target_uri, test_contents) + assert_realizes_contains(file_sources, target_uri, test_contents) + + _cleanup_live_test_artifacts(settings)