diff --git a/.gitignore b/.gitignore index 8363e9b8..c282d5af 100644 --- a/.gitignore +++ b/.gitignore @@ -116,3 +116,8 @@ ENV/ # IDE settings .vscode/ +# pixi environments +.pixi/* +!.pixi/config.toml +pixi.toml +pixi.lock diff --git a/cloudpathlib/azure/azblobclient.py b/cloudpathlib/azure/azblobclient.py index 60bd01d3..ab61fe75 100644 --- a/cloudpathlib/azure/azblobclient.py +++ b/cloudpathlib/azure/azblobclient.py @@ -39,6 +39,8 @@ SharedKeyCredentialPolicy as DataLakeSharedKeyCredentialPolicy, ) + from azure.identity import DefaultAzureCredential + except ModuleNotFoundError: implementation_registry["azure"].dependencies_loaded = False @@ -66,20 +68,23 @@ def __init__( https://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.blobserviceclient?view=azure-python). Supports the following authentication methods of `BlobServiceClient`. - - Environment variable `""AZURE_STORAGE_CONNECTION_STRING"` containing connecting string + - Environment variable `AZURE_STORAGE_CONNECTION_STRING` containing connecting string with account credentials. See [Azure Storage SDK documentation]( https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python#copy-your-credentials-from-the-azure-portal). + - Environment variable `AZURE_STORAGE_ACCOUNT_URL` containing the account URL. + `DefaultAzureCredential` will be used automatically. - Connection string via `connection_string`, authenticated either with an embedded SAS token or with credentials passed to `credentials`. - Account URL via `account_url`, authenticated either with an embedded SAS token, or with - credentials passed to `credentials`. + credentials passed to `credentials`. If `credential` is not provided, + `DefaultAzureCredential` will be used automatically. - Instantiated and already authenticated [`BlobServiceClient`]( https://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.blobserviceclient?view=azure-python) or [`DataLakeServiceClient`](https://learn.microsoft.com/en-us/python/api/azure-storage-file-datalake/azure.storage.filedatalake.datalakeserviceclient). If multiple methods are used, priority order is reverse of list above (later in list takes priority). If no methods are used, a [`MissingCredentialsError`][cloudpathlib.exceptions.MissingCredentialsError] - exception will be raised raised. + exception will be raised. Args: account_url (Optional[str]): The URL to the blob storage account, optionally @@ -117,6 +122,8 @@ def __init__( if connection_string is None: connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING", None) + if account_url is None: + account_url = os.getenv("AZURE_STORAGE_ACCOUNT_URL", None) self.data_lake_client: Optional[DataLakeServiceClient] = ( None # only needs to end up being set if HNS is enabled @@ -174,6 +181,8 @@ def __init__( conn_str=connection_string, credential=credential ) elif account_url is not None: + if credential is None: + credential = DefaultAzureCredential() if ".dfs." in account_url: self.service_client = BlobServiceClient( account_url=account_url.replace(".dfs.", ".blob."), credential=credential diff --git a/cloudpathlib/local/implementations/azure.py b/cloudpathlib/local/implementations/azure.py index 2b44814f..f7940153 100644 --- a/cloudpathlib/local/implementations/azure.py +++ b/cloudpathlib/local/implementations/azure.py @@ -24,6 +24,7 @@ def __init__(self, *args, **kwargs): kwargs.get("connection_string", None), kwargs.get("account_url", None), os.getenv("AZURE_STORAGE_CONNECTION_STRING", None), + os.getenv("AZURE_STORAGE_ACCOUNT_URL", None), ] super().__init__(*args, **kwargs) diff --git a/pyproject.toml b/pyproject.toml index 81f3d433..aa7248b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ dependencies = [ ] [project.optional-dependencies] -azure = ["azure-storage-blob>=12", "azure-storage-file-datalake>=12"] +azure = ["azure-storage-blob>=12", "azure-storage-file-datalake>=12", "azure-identity>=1"] gs = ["google-cloud-storage"] s3 = ["boto3>=1.34.0"] all = ["cloudpathlib[azure]", "cloudpathlib[gs]", "cloudpathlib[s3]"] diff --git a/tests/mock_clients/mock_adls_gen2.py b/tests/mock_clients/mock_adls_gen2.py index aaee7cd1..349c3ad0 100644 --- a/tests/mock_clients/mock_adls_gen2.py +++ b/tests/mock_clients/mock_adls_gen2.py @@ -8,7 +8,13 @@ class MockedDataLakeServiceClient: - def __init__(self, test_dir, adls): + def __init__(self, test_dir=None, adls=None, account_url=None, credential=None): + if account_url is not None: + # account_url-based construction: store url and credential for verification + self._account_url = account_url + self._credential = credential + return + # root is parent of the test specific directory self.root = test_dir.parent self.test_dir = test_dir diff --git a/tests/mock_clients/mock_azureblob.py b/tests/mock_clients/mock_azureblob.py index f99e0d4a..f0c56687 100644 --- a/tests/mock_clients/mock_azureblob.py +++ b/tests/mock_clients/mock_azureblob.py @@ -49,7 +49,13 @@ def get(self, key, default=None): class MockBlobServiceClient: - def __init__(self, test_dir, adls): + def __init__(self, test_dir=None, adls=None, account_url=None, credential=None): + if account_url is not None: + # account_url-based construction: store url and credential for verification + self._account_url = account_url + self._credential = credential + return + # copy test assets for reference in tests without affecting assets shutil.copytree(TEST_ASSETS, test_dir, dirs_exist_ok=True) diff --git a/tests/test_azure_specific.py b/tests/test_azure_specific.py index 142730b4..c635e563 100644 --- a/tests/test_azure_specific.py +++ b/tests/test_azure_specific.py @@ -10,6 +10,7 @@ from azure.storage.filedatalake import DataLakeServiceClient import pytest +import cloudpathlib.azure.azblobclient from urllib.parse import urlparse, parse_qs from cloudpathlib import AzureBlobClient, AzureBlobPath from cloudpathlib.exceptions import ( @@ -19,7 +20,8 @@ ) from cloudpathlib.local import LocalAzureBlobClient, LocalAzureBlobPath -from .mock_clients.mock_azureblob import MockStorageStreamDownloader +from .mock_clients.mock_azureblob import MockBlobServiceClient, MockStorageStreamDownloader +from .mock_clients.mock_adls_gen2 import MockedDataLakeServiceClient @pytest.mark.parametrize("path_class", [AzureBlobPath, LocalAzureBlobPath]) @@ -39,10 +41,95 @@ def test_azureblobpath_properties(path_class, monkeypatch): @pytest.mark.parametrize("client_class", [AzureBlobClient, LocalAzureBlobClient]) def test_azureblobpath_nocreds(client_class, monkeypatch): monkeypatch.delenv("AZURE_STORAGE_CONNECTION_STRING", raising=False) + monkeypatch.delenv("AZURE_STORAGE_ACCOUNT_URL", raising=False) with pytest.raises(MissingCredentialsError): client_class() +def _mock_azure_clients(monkeypatch): + """Monkeypatch BlobServiceClient and DataLakeServiceClient with mocks.""" + monkeypatch.setattr( + cloudpathlib.azure.azblobclient, "BlobServiceClient", MockBlobServiceClient + ) + monkeypatch.setattr( + cloudpathlib.azure.azblobclient, "DataLakeServiceClient", MockedDataLakeServiceClient + ) + + +def test_default_credential_used_with_account_url(monkeypatch): + """DefaultAzureCredential is used when account_url is provided without credential.""" + monkeypatch.delenv("AZURE_STORAGE_CONNECTION_STRING", raising=False) + monkeypatch.delenv("AZURE_STORAGE_ACCOUNT_URL", raising=False) + _mock_azure_clients(monkeypatch) + + client = AzureBlobClient(account_url="https://myaccount.blob.core.windows.net") + + assert isinstance(client.service_client, MockBlobServiceClient) + assert client.service_client._account_url == "https://myaccount.blob.core.windows.net" + assert isinstance(client.service_client._credential, DefaultAzureCredential) + + assert isinstance(client.data_lake_client, MockedDataLakeServiceClient) + assert client.data_lake_client._account_url == "https://myaccount.dfs.core.windows.net" + assert isinstance(client.data_lake_client._credential, DefaultAzureCredential) + + +def test_no_default_credential_when_explicit_credential(monkeypatch): + """DefaultAzureCredential is NOT used when an explicit credential is provided.""" + monkeypatch.delenv("AZURE_STORAGE_CONNECTION_STRING", raising=False) + monkeypatch.delenv("AZURE_STORAGE_ACCOUNT_URL", raising=False) + _mock_azure_clients(monkeypatch) + + explicit_cred = "my-explicit-credential" + client = AzureBlobClient( + account_url="https://myaccount.blob.core.windows.net", + credential=explicit_cred, + ) + + assert client.service_client._credential == explicit_cred + assert not isinstance(client.service_client._credential, DefaultAzureCredential) + + +def test_account_url_env_var_blob(monkeypatch): + """AZURE_STORAGE_ACCOUNT_URL env var with .blob. URL creates both clients.""" + monkeypatch.delenv("AZURE_STORAGE_CONNECTION_STRING", raising=False) + monkeypatch.setenv( + "AZURE_STORAGE_ACCOUNT_URL", "https://myaccount.blob.core.windows.net" + ) + _mock_azure_clients(monkeypatch) + + client = AzureBlobClient() + + assert isinstance(client.service_client, MockBlobServiceClient) + assert client.service_client._account_url == "https://myaccount.blob.core.windows.net" + assert isinstance(client.service_client._credential, DefaultAzureCredential) + + assert isinstance(client.data_lake_client, MockedDataLakeServiceClient) + assert client.data_lake_client._account_url == "https://myaccount.dfs.core.windows.net" + assert isinstance(client.data_lake_client._credential, DefaultAzureCredential) + + +def test_account_url_env_var_dfs(monkeypatch): + """AZURE_STORAGE_ACCOUNT_URL env var with .dfs. URL creates both clients.""" + monkeypatch.delenv("AZURE_STORAGE_CONNECTION_STRING", raising=False) + monkeypatch.setenv( + "AZURE_STORAGE_ACCOUNT_URL", "https://myaccount.dfs.core.windows.net" + ) + _mock_azure_clients(monkeypatch) + + client = AzureBlobClient() + + assert client.service_client._account_url == "https://myaccount.blob.core.windows.net" + assert client.data_lake_client._account_url == "https://myaccount.dfs.core.windows.net" + + +def test_missing_creds_error_no_env_vars(monkeypatch): + """MissingCredentialsError is still raised when nothing is configured.""" + monkeypatch.delenv("AZURE_STORAGE_CONNECTION_STRING", raising=False) + monkeypatch.delenv("AZURE_STORAGE_ACCOUNT_URL", raising=False) + with pytest.raises(MissingCredentialsError): + AzureBlobClient() + + def test_as_url(azure_rigs): p: AzureBlobPath = azure_rigs.create_cloud_path("dir_0/file0_0.txt") @@ -141,6 +228,10 @@ def _check_access(az_client, gen2=False): cl: AzureBlobClient = azure_rigs.client_class(credential=credential, account_url=bsc.url) _check_access(cl, gen2=azure_rigs.is_adls_gen2) + # test DefaultAzureCredential used automatically with only account_url + cl = azure_rigs.client_class(account_url=bsc.url) + _check_access(cl, gen2=azure_rigs.is_adls_gen2) + # add basic checks for gen2 to exercise limited-privilege access scenarios p = azure_rigs.create_cloud_path("new_dir/new_file.txt", client=cl) assert cl._check_hns(p) == azure_rigs.is_adls_gen2