Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,39 @@
# DataSHIELD Interface Python

This DataSHIELD Client Interface is a Python port of the original DataSHIELD Client Interface written in R ([DSI](https://github.com/datashield/DSI)). The provided interface can be implemented for accessing a data repository supporting the DataSHIELD infrastructure: controlled R commands to be executed on the server side are garanteeing that non disclosive information is returned to client side.

## Configuration

The search path for the DataSHIELD configuration file is the following:

1. User general location: `~/.config/datashield/config.yaml`
2. Current project specific location: `./.datashield/config.yaml`

The configurations are merged: any existing entry is replaced by the new one (for instance server names must be unique).

The format of the DataSHIELD configuration file is:

```yaml
servers:
- name: server1
url: https://opal-demo.obiba.org
user: dsuser
password: P@ssw0rd
- name: server2
url: https://opal.example.org
token: your-access-token-here
profile: default
- name: server3
url: https://study.example.org/opal
user: dsuser
password: P@ssw0rd
profile: custom
driver: datashield_opal.OpalDriver
```

Each server entry in the list must have:
- `name`: Unique identifier for the server
- `url`: The server URL
- Authentication: Either `user` and `password`, or `token` (recommended)
- `profile`: DataSHIELD profile name (optional, defaults to "default")
- `driver`: Connection driver class name (optional, defaults to "datashield_opal.OpalDriver")
1 change: 1 addition & 0 deletions datashield/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from datashield.interface import (
DSConnection as DSConnection,
DSConfig as DSConfig,
DSLoginInfo as DSLoginInfo,
DSDriver as DSDriver,
DSError as DSError,
Expand Down
25 changes: 21 additions & 4 deletions datashield/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""

import logging
from datashield.interface import DSLoginInfo, DSConnection, DSDriver, DSError
from datashield.interface import DSConfig, DSLoginInfo, DSConnection, DSDriver, DSError
import time


Expand All @@ -12,8 +12,23 @@ class DSLoginBuilder:
Helper class to formalize DataSHIELD login arguments for a set of servers.
"""

def __init__(self):
def __init__(self, names: list[str] = None):
"""Create a builder, optionally loading login information from configuration files
for the specified server names.

:param names: The list of server names to load from configuration files, if any. If not defined,
no login information will be loaded from configuration files.
"""
self.items: list[DSLoginInfo] = []
# load login information from configuration files, in order of precedence
if names is not None and len(names) > 0:
config = DSConfig.load()
if config.servers:
items = [x for x in config.servers if x.name in names]
Comment thread
ymarcon marked this conversation as resolved.
Outdated
if len(items) == 0:
logging.warning(f"No matching server names found in configuration for: {', '.join(names)}")
else:
self.items.extend(items)

def add(
self,
Expand Down Expand Up @@ -46,7 +61,9 @@ def add(
raise ValueError(f"Server name must be unique: {name}")
if user is None and token is None:
raise ValueError("Either user or token must be provided")
self.items.append(DSLoginInfo(name, url, user, password, token, profile, driver))
self.items.append(
DSLoginInfo(name=name, url=url, user=user, password=password, token=token, profile=profile, driver=driver)
)
return self

def remove(self, name: str):
Expand Down Expand Up @@ -109,7 +126,7 @@ def open(self, restore: str = None, failSafe: bool = False) -> None:
raise e
if self.has_errors():
for name in self.errors:
print(f"Connection to {name} has failed")
logging.error(f"Connection to {name} has failed")

def close(self, save: str = None) -> None:
"""
Expand Down
96 changes: 76 additions & 20 deletions datashield/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,87 @@
"""

import importlib
import logging
import os
import yaml
from pydantic import BaseModel, Field

# Default configuration file paths to look for DataSHIELD login information, in order of precedence
CONFIG_FILES = ["~/.config/datashield/config.yaml", "./.datashield/config.yaml"]

class DSLoginInfo:

class DSLoginInfo(BaseModel):
"""
Helper class with DataSHIELD login details.
"""

def __init__(
self,
name: str,
url: str,
user: str = None,
password: str = None,
token: str = None,
profile: str = "default",
driver: str = "datashield_opal.OpalDriver",
):
self.items = []
self.name = name
self.url = url
self.user = user
self.password = password
self.token = token
self.profile = profile if profile is not None else "default"
self.driver = driver if driver is not None else "datashield_opal.OpalDriver"
name: str
url: str
user: str | None = None
password: str | None = None
token: str | None = None
profile: str = "default"
driver: str = "datashield_opal.OpalDriver"

model_config = {"extra": "forbid"}


class DSConfig(BaseModel):
"""
Helper class with DataSHIELD configuration details.
"""

servers: list[DSLoginInfo] = Field(default_factory=list)

model_config = {"extra": "forbid"}
Comment thread
ymarcon marked this conversation as resolved.

@classmethod
def load(cls) -> "DSConfig":
"""
Load the DataSHIELD configuration from default configuration files. The file must contain
a list of servers with their login details. The configuration from the first file found will be loaded,
in order of precedence. If multiple files are found, the configurations will be merged, with new server
details replacing existing ones by name.

Comment thread
ymarcon marked this conversation as resolved.
Outdated
:return: The DataSHIELD configuration object
"""
merged_config = None
for config_file in CONFIG_FILES:
try:
# check file exists and is readable, if not, silently ignore
if not os.path.exists(config_file):
continue
if not os.access(config_file, os.R_OK):
continue
Comment thread
ymarcon marked this conversation as resolved.
config = cls.load_from_file(config_file)
if merged_config is None:
merged_config = config
else:
# merge servers by name, new ones replacing existing ones, and keep the rest of existing ones
existing_servers = {x.name: x for x in merged_config.servers}
for server in config.servers:
existing_servers[server.name] = server
merged_config.servers = list(existing_servers.values())
except Exception as e:
# silently ignore errors, e.g. file not found or invalid format
logging.error(f"Failed to load login information from {config_file}: {e}")
Comment thread
ymarcon marked this conversation as resolved.
Outdated
return merged_config if merged_config else cls()
Comment thread
ymarcon marked this conversation as resolved.

@classmethod
def load_from_file(cls, file: str) -> "DSConfig":
"""
Load the DataSHIELD configuration from a YAML file. The file must contain a list of servers with their login details.

:param file: The path to the YAML file containing the DataSHIELD configuration
:return: The DataSHIELD configuration object
"""
with open(file) as f:
config_data = yaml.safe_load(f)

if config_data is None:
config_data = {}

return cls.model_validate(config_data)


class DSResult:
Expand Down Expand Up @@ -409,7 +465,7 @@ def new_connection(cls, args: DSLoginInfo, restore: str = None) -> DSConnection:
raise NotImplementedError("DSConnection function not available")

@classmethod
def load_class(cls, name: str) -> any:
def load_class(cls, name: str) -> type["DSDriver"]:
"""
Load a class from its fully qualified name (dot separated).

Expand Down
7 changes: 5 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "datashield"
version = "0.2.0"
version = "0.3.0"
description = "DataSHIELD Client Interface in Python."
authors = [
{name = "Yannick Marcon", email = "yannick.marcon@obiba.org"}
Expand All @@ -22,7 +22,10 @@ classifiers = [
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
]
dependencies = []
dependencies = [
"pydantic>=2.0",
"PyYAML>=6.0",
]

[project.optional-dependencies]
test = [
Expand Down
Loading
Loading