Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,8 @@ ENV/

# IDE settings
.vscode/
# pixi environments
.pixi/*
!.pixi/config.toml
pixi.toml
pixi.lock
Comment on lines +119 to +123
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I set up my development environment with Pixi. Also happy to remove the diff in the .gitignore, if that's not wanted.

15 changes: 12 additions & 3 deletions cloudpathlib/azure/azblobclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
SharedKeyCredentialPolicy as DataLakeSharedKeyCredentialPolicy,
)

from azure.identity import DefaultAzureCredential

except ModuleNotFoundError:
implementation_registry["azure"].dependencies_loaded = False

Expand Down Expand Up @@ -66,20 +68,23 @@ def __init__(
https://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.blobserviceclient?view=azure-python).
Supports the following authentication methods of `BlobServiceClient`.

- Environment variable `""AZURE_STORAGE_CONNECTION_STRING"` containing connecting string
- Environment variable `AZURE_STORAGE_CONNECTION_STRING` containing connecting string
with account credentials. See [Azure Storage SDK documentation](
https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python#copy-your-credentials-from-the-azure-portal).
- Environment variable `AZURE_STORAGE_ACCOUNT_URL` containing the account URL.
`DefaultAzureCredential` will be used automatically.
- Connection string via `connection_string`, authenticated either with an embedded SAS
token or with credentials passed to `credentials`.
- Account URL via `account_url`, authenticated either with an embedded SAS token, or with
credentials passed to `credentials`.
credentials passed to `credentials`. If `credential` is not provided,
`DefaultAzureCredential` will be used automatically.
- Instantiated and already authenticated [`BlobServiceClient`](
https://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.blobserviceclient?view=azure-python) or
[`DataLakeServiceClient`](https://learn.microsoft.com/en-us/python/api/azure-storage-file-datalake/azure.storage.filedatalake.datalakeserviceclient).

If multiple methods are used, priority order is reverse of list above (later in list takes
priority). If no methods are used, a [`MissingCredentialsError`][cloudpathlib.exceptions.MissingCredentialsError]
exception will be raised raised.
exception will be raised.

Args:
account_url (Optional[str]): The URL to the blob storage account, optionally
Expand Down Expand Up @@ -117,6 +122,8 @@ def __init__(

if connection_string is None:
connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING", None)
if account_url is None:
account_url = os.getenv("AZURE_STORAGE_ACCOUNT_URL", None)

self.data_lake_client: Optional[DataLakeServiceClient] = (
None # only needs to end up being set if HNS is enabled
Expand Down Expand Up @@ -174,6 +181,8 @@ def __init__(
conn_str=connection_string, credential=credential
)
elif account_url is not None:
if credential is None:
credential = DefaultAzureCredential()
if ".dfs." in account_url:
self.service_client = BlobServiceClient(
account_url=account_url.replace(".dfs.", ".blob."), credential=credential
Expand Down
1 change: 1 addition & 0 deletions cloudpathlib/local/implementations/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def __init__(self, *args, **kwargs):
kwargs.get("connection_string", None),
kwargs.get("account_url", None),
os.getenv("AZURE_STORAGE_CONNECTION_STRING", None),
os.getenv("AZURE_STORAGE_ACCOUNT_URL", None),
]
super().__init__(*args, **kwargs)

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ dependencies = [
]

[project.optional-dependencies]
azure = ["azure-storage-blob>=12", "azure-storage-file-datalake>=12"]
azure = ["azure-storage-blob>=12", "azure-storage-file-datalake>=12", "azure-identity>=1"]
gs = ["google-cloud-storage"]
s3 = ["boto3>=1.34.0"]
all = ["cloudpathlib[azure]", "cloudpathlib[gs]", "cloudpathlib[s3]"]
Expand Down
8 changes: 7 additions & 1 deletion tests/mock_clients/mock_adls_gen2.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,13 @@


class MockedDataLakeServiceClient:
def __init__(self, test_dir, adls):
def __init__(self, test_dir=None, adls=None, account_url=None, credential=None):
if account_url is not None:
# account_url-based construction: store url and credential for verification
self._account_url = account_url
self._credential = credential
return

# root is parent of the test specific directory
self.root = test_dir.parent
self.test_dir = test_dir
Expand Down
8 changes: 7 additions & 1 deletion tests/mock_clients/mock_azureblob.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,13 @@ def get(self, key, default=None):


class MockBlobServiceClient:
def __init__(self, test_dir, adls):
def __init__(self, test_dir=None, adls=None, account_url=None, credential=None):
if account_url is not None:
# account_url-based construction: store url and credential for verification
self._account_url = account_url
self._credential = credential
return
Comment on lines +52 to +57
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As per request, I added the account and credential settings to the mock classes:

Also, please follow existing test patterns, not the MagicMock patterns in this PR. You should add azure mocks if you need them to the mock we already have and actually ensure the AzureClient object gets properties set on it correctly.

But honestly, this doesn't feel right to me. If I understand correctly, the MockedDataLakeServiceClient and MockBlobServiceClient are used to test file-system operations (cp, rm, mv, ls, etc.). For the unit tests I added, I only want to test the authentication step, without running file-system operations.

Extending the constructor feels unnatural. For example, we need a conditional block and an early return to avoid running into the shutil.copytree, which would fail (since we don't specify a test directory).

Happy to incorporate any feedback from the maintainers on this topic. Also fine with leaving it as-is, if that's the preferred solution.


# copy test assets for reference in tests without affecting assets
shutil.copytree(TEST_ASSETS, test_dir, dirs_exist_ok=True)

Expand Down
93 changes: 92 additions & 1 deletion tests/test_azure_specific.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from azure.storage.filedatalake import DataLakeServiceClient
import pytest

import cloudpathlib.azure.azblobclient
from urllib.parse import urlparse, parse_qs
from cloudpathlib import AzureBlobClient, AzureBlobPath
from cloudpathlib.exceptions import (
Expand All @@ -19,7 +20,8 @@
)
from cloudpathlib.local import LocalAzureBlobClient, LocalAzureBlobPath

from .mock_clients.mock_azureblob import MockStorageStreamDownloader
from .mock_clients.mock_azureblob import MockBlobServiceClient, MockStorageStreamDownloader
from .mock_clients.mock_adls_gen2 import MockedDataLakeServiceClient


@pytest.mark.parametrize("path_class", [AzureBlobPath, LocalAzureBlobPath])
Expand All @@ -39,10 +41,95 @@ def test_azureblobpath_properties(path_class, monkeypatch):
@pytest.mark.parametrize("client_class", [AzureBlobClient, LocalAzureBlobClient])
def test_azureblobpath_nocreds(client_class, monkeypatch):
monkeypatch.delenv("AZURE_STORAGE_CONNECTION_STRING", raising=False)
monkeypatch.delenv("AZURE_STORAGE_ACCOUNT_URL", raising=False)
with pytest.raises(MissingCredentialsError):
client_class()


def _mock_azure_clients(monkeypatch):
"""Monkeypatch BlobServiceClient and DataLakeServiceClient with mocks."""
monkeypatch.setattr(
cloudpathlib.azure.azblobclient, "BlobServiceClient", MockBlobServiceClient
)
monkeypatch.setattr(
cloudpathlib.azure.azblobclient, "DataLakeServiceClient", MockedDataLakeServiceClient
)


def test_default_credential_used_with_account_url(monkeypatch):
"""DefaultAzureCredential is used when account_url is provided without credential."""
monkeypatch.delenv("AZURE_STORAGE_CONNECTION_STRING", raising=False)
monkeypatch.delenv("AZURE_STORAGE_ACCOUNT_URL", raising=False)
_mock_azure_clients(monkeypatch)

client = AzureBlobClient(account_url="https://myaccount.blob.core.windows.net")

assert isinstance(client.service_client, MockBlobServiceClient)
assert client.service_client._account_url == "https://myaccount.blob.core.windows.net"
assert isinstance(client.service_client._credential, DefaultAzureCredential)

assert isinstance(client.data_lake_client, MockedDataLakeServiceClient)
assert client.data_lake_client._account_url == "https://myaccount.dfs.core.windows.net"
assert isinstance(client.data_lake_client._credential, DefaultAzureCredential)


def test_no_default_credential_when_explicit_credential(monkeypatch):
"""DefaultAzureCredential is NOT used when an explicit credential is provided."""
monkeypatch.delenv("AZURE_STORAGE_CONNECTION_STRING", raising=False)
monkeypatch.delenv("AZURE_STORAGE_ACCOUNT_URL", raising=False)
_mock_azure_clients(monkeypatch)

explicit_cred = "my-explicit-credential"
client = AzureBlobClient(
account_url="https://myaccount.blob.core.windows.net",
credential=explicit_cred,
)

assert client.service_client._credential == explicit_cred
assert not isinstance(client.service_client._credential, DefaultAzureCredential)


def test_account_url_env_var_blob(monkeypatch):
"""AZURE_STORAGE_ACCOUNT_URL env var with .blob. URL creates both clients."""
monkeypatch.delenv("AZURE_STORAGE_CONNECTION_STRING", raising=False)
monkeypatch.setenv(
"AZURE_STORAGE_ACCOUNT_URL", "https://myaccount.blob.core.windows.net"
)
_mock_azure_clients(monkeypatch)

client = AzureBlobClient()

assert isinstance(client.service_client, MockBlobServiceClient)
assert client.service_client._account_url == "https://myaccount.blob.core.windows.net"
assert isinstance(client.service_client._credential, DefaultAzureCredential)

assert isinstance(client.data_lake_client, MockedDataLakeServiceClient)
assert client.data_lake_client._account_url == "https://myaccount.dfs.core.windows.net"
assert isinstance(client.data_lake_client._credential, DefaultAzureCredential)


def test_account_url_env_var_dfs(monkeypatch):
"""AZURE_STORAGE_ACCOUNT_URL env var with .dfs. URL creates both clients."""
monkeypatch.delenv("AZURE_STORAGE_CONNECTION_STRING", raising=False)
monkeypatch.setenv(
"AZURE_STORAGE_ACCOUNT_URL", "https://myaccount.dfs.core.windows.net"
)
_mock_azure_clients(monkeypatch)

client = AzureBlobClient()

assert client.service_client._account_url == "https://myaccount.blob.core.windows.net"
assert client.data_lake_client._account_url == "https://myaccount.dfs.core.windows.net"


def test_missing_creds_error_no_env_vars(monkeypatch):
"""MissingCredentialsError is still raised when nothing is configured."""
monkeypatch.delenv("AZURE_STORAGE_CONNECTION_STRING", raising=False)
monkeypatch.delenv("AZURE_STORAGE_ACCOUNT_URL", raising=False)
with pytest.raises(MissingCredentialsError):
AzureBlobClient()


def test_as_url(azure_rigs):
p: AzureBlobPath = azure_rigs.create_cloud_path("dir_0/file0_0.txt")

Expand Down Expand Up @@ -141,6 +228,10 @@ def _check_access(az_client, gen2=False):
cl: AzureBlobClient = azure_rigs.client_class(credential=credential, account_url=bsc.url)
_check_access(cl, gen2=azure_rigs.is_adls_gen2)

# test DefaultAzureCredential used automatically with only account_url
cl = azure_rigs.client_class(account_url=bsc.url)
_check_access(cl, gen2=azure_rigs.is_adls_gen2)
Comment on lines +231 to +233
Copy link
Author

@janjagusch janjagusch Mar 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please note: Technically, this addition to test alters the state for the assertion that follows (because p is built from cl). We could change the order to restore the original behaviour. A better solution might be to turn all client instantiations into separate tests (e.g., via parametrizations).


# add basic checks for gen2 to exercise limited-privilege access scenarios
p = azure_rigs.create_cloud_path("new_dir/new_file.txt", client=cl)
assert cl._check_hns(p) == azure_rigs.is_adls_gen2
Expand Down