Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
c167a8b
Add AzureBlobFileSystem placeholder, verify devtools::document() beha…
Collinbrown95 Mar 3, 2026
2ed3ab7
Added c++ stub
marberts Mar 5, 2026
8659e73
Updated codegen
marberts Mar 5, 2026
bd602ff
Added a comment
marberts Mar 5, 2026
e8b0452
add simple test function to work through codegen.R
Collinbrown95 Mar 5, 2026
033e9a8
temporarily "force" build DARROW_R_WITH_AZUREFS build flag.
Collinbrown95 Mar 5, 2026
94a4773
Merge branch '32123-expose-azure-blob-filesystem' of github.com:marbe…
Collinbrown95 Mar 5, 2026
a9efa92
cleanup azurefs test function code
Collinbrown95 Mar 5, 2026
7733eab
document instructions to start local azurite container
Collinbrown95 Mar 5, 2026
99aad4e
add arrow_with_azure helper following convention for s3/gcp
Collinbrown95 Mar 5, 2026
2a7f8ea
add ARROW_AZURE flag to nixlibs.R
Collinbrown95 Mar 5, 2026
8ca7bc6
debug first argument check
Collinbrown95 Mar 5, 2026
1111ea6
Renamed R6 class correctly
marberts Mar 11, 2026
ed01c13
Added endpoint + key, token, and default authentication
marberts Mar 11, 2026
6795548
Finished logical for AzureFileSystem to match pyarrow
marberts Mar 11, 2026
de711d3
standardize on ARROW_R_WITH_AZURE instead of ARROW_R_WITH_AZUREFS
Collinbrown95 Mar 13, 2026
4c14d2f
standardize on ARROW_R_WITH_AZURE
Collinbrown95 Mar 13, 2026
87049af
Turn on ARROW_AZURE flag in nixlibs.R
Collinbrown95 Mar 13, 2026
9434223
drop temporary arrow env var hack
Collinbrown95 Mar 13, 2026
668bdb6
temporary documentation of what I've tried so far
Collinbrown95 Mar 13, 2026
fcfbd94
Add TODO note in configure script to remove hard-coded link flags
Collinbrown95 Mar 13, 2026
cdab7f1
initial filesystem tests
Collinbrown95 Mar 14, 2026
221aba4
uncomment line 256 of filesystem.cpp
Collinbrown95 Mar 15, 2026
b587aaf
checkpoint: resolved segfault error
Collinbrown95 Mar 15, 2026
ed20ea1
skip test_filesystem tests that rely on being able to connect directl…
Collinbrown95 Mar 15, 2026
9f6f606
Add most test cases from test_filesystem and recreate a couple that w…
Collinbrown95 Mar 15, 2026
97118d8
rename az_bucket to az_container
Collinbrown95 Mar 15, 2026
01fdf52
check that azurite is installed as precondition for test-azure.R script.
Collinbrown95 Mar 15, 2026
d58594f
add setup code to start azurite from the test-azure.R script, then ki…
Collinbrown95 Mar 15, 2026
02feb8c
run air formatter
Collinbrown95 Mar 15, 2026
6cc6987
add documentation to az_container.
Collinbrown95 Mar 15, 2026
ba28477
docs: Updated documentation for AzureFileSystem and updated vignette …
marberts Mar 17, 2026
4b443c0
Updated installation vignettes to include Azure
marberts Mar 18, 2026
e671f4a
Updated install scripts
marberts Mar 18, 2026
bb66e40
add tests for valid and invalid combinations of options to AzureFileS…
Collinbrown95 Mar 18, 2026
bc20a6d
Ran pre-commit hooks
marberts Mar 19, 2026
99a8598
Removed tmp.md
marberts Mar 19, 2026
942d3fc
wrap credential configuration methods with StopIfNotOk
Collinbrown95 Mar 19, 2026
f079ac0
move link flags to arrow_built_with ARROW_AZURE block in configure sc…
Collinbrown95 Mar 19, 2026
0e4e2e2
fix error message to check in test for empty call to AzureFileSystem$…
Collinbrown95 Mar 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion r/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ Imports:
utils,
vctrs
Roxygen: list(markdown = TRUE, r6 = FALSE, load = "source")
RoxygenNote: 7.3.3
RoxygenNote: 7.3.3.9000
Config/testthat/edition: 3
Config/build/bootstrap: TRUE
Suggests:
Expand Down
3 changes: 3 additions & 0 deletions r/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ S3method(vec_ptype_full,arrow_fixed_size_list)
S3method(vec_ptype_full,arrow_large_list)
S3method(vec_ptype_full,arrow_list)
export(Array)
export(AzureFileSystem)
export(Buffer)
export(BufferOutputStream)
export(BufferReader)
Expand Down Expand Up @@ -282,6 +283,7 @@ export(arrow_available)
export(arrow_info)
export(arrow_table)
export(arrow_with_acero)
export(arrow_with_azure)
export(arrow_with_dataset)
export(arrow_with_gcs)
export(arrow_with_json)
Expand All @@ -295,6 +297,7 @@ export(as_data_type)
export(as_record_batch)
export(as_record_batch_reader)
export(as_schema)
export(az_container)
export(binary)
export(bool)
export(boolean)
Expand Down
10 changes: 10 additions & 0 deletions r/R/arrow-info.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ arrow_info <- function() {
json = arrow_with_json(),
s3 = arrow_with_s3(),
gcs = arrow_with_gcs(),
azure = arrow_with_azure(),
utf8proc = "utf8_upper" %in% compute_funcs,
re2 = "replace_substring_regex" %in% compute_funcs,
vapply(tolower(names(CompressionType)[-1]), codec_is_available, logical(1))
Expand Down Expand Up @@ -128,6 +129,15 @@ arrow_with_gcs <- function() {
})
}

#' @rdname arrow_info
#' @export
arrow_with_azure <- function() {
tryCatch(.Call(`_azure_available`), error = function(e) {
return(FALSE)
})
}


#' @rdname arrow_info
#' @export
arrow_with_json <- function() {
Expand Down
4 changes: 4 additions & 0 deletions r/R/arrowExports.R

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

116 changes: 116 additions & 0 deletions r/R/filesystem.R
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,31 @@ FileSelector$create <- function(base_dir, allow_not_found = FALSE, recursive = F
#' - `default_metadata`: default metadata to write in new objects.
#' - `project_id`: the project to use for creating buckets.
#'
#' `AzureFileSystem$create()` takes following required argument:
#'
#' - `account_name`: Azure Blob Storage account name.
#'
#' `AzureFileSystem$create()` takes following optional arguments:
#'
#' - `account_key`: Account key of the storage account. Cannot be used with
#' `sas_token`.
#' - `blob_storage_authority`: Hostname of the blob service, defaulting to
#' `"blob.core.windows.net"`.
#' - `blob_storage_scheme`: Either `"http"` or `"https"` (the default).
#' - `client_id`: The client/application ID for Azure Active Directory
#' authentication. If used with `client_secret` and `tenant_id` then it is the
#' application ID for a registered Azure AD application. Otherwise, it is the
#' client ID of a user-assigned managed identity.
#' - `client_secret`: Client secret for Azure Active Directory authentication.
#' Must be provided with both `client_id` and `tenant_id`.
#' - `dfs_storage_authority`: Hostname of the data lake (gen 2) service,
#' defaulting to `"dfs.core.windows.net"`.
#' - `dfs_storage_scheme`: Either `"http"` or `"https"` (the default).
#' - `sas_token`: Shared access signature (SAS) token for the storage account.
#' Cannot be used with `account key`.
#' - `tenant_id`: Tenant ID for Azure Active Directory authentication. Must
#' be provided with both `client_id` and `client_secret`.
#'
#' @section Methods:
#'
#' - `path(x)`: Create a `SubTreeFileSystem` from the current `FileSystem`
Expand Down Expand Up @@ -253,6 +278,10 @@ FileSelector$create <- function(base_dir, allow_not_found = FALSE, recursive = F
#' (the default), 'ERROR', 'WARN', 'INFO', 'DEBUG' (recommended), 'TRACE', and
#' 'OFF'.
#'
#' On `AzureFileSystem`, passing no arguments for authentication uses the
#' `AzureDefaultCredential` for authentication, so that several authentication
#' types are tried until one succeeds.
#'
#' @usage NULL
#' @format NULL
#' @docType class
Expand Down Expand Up @@ -645,6 +674,93 @@ GcsFileSystem$create <- function(anonymous = FALSE, retry_limit_seconds = 15, ..
fs___GcsFileSystem__Make(anonymous, options)
}

#' @usage NULL
#' @format NULL
#' @rdname FileSystem
#' @importFrom utils modifyList
#' @export
AzureFileSystem <- R6Class(
"AzureFileSystem",
inherit = FileSystem
)

AzureFileSystem$create <- function(account_name, ...) {
options <- list(...)
valid_opts <- c(
"account_key",
"blob_storage_authority",
"blob_storage_scheme",
"client_id",
"client_secret",
"dfs_storage_authority",
"dfs_storage_scheme",
"sas_token",
"tenant_id"
)

invalid_opts <- setdiff(names(options), valid_opts)
if (length(invalid_opts)) {
stop(
"Invalid options for AzureFileSystem: ",
oxford_paste(invalid_opts),
call. = FALSE
)
}
if (!is.null(options$tenant_id) || !is.null(options$client_id) || !is.null(options$client_secret)) {
if (is.null(options$client_id)) {
stop(
"`client_id` must be given with `tenant_id` and `client_secret`",
call. = FALSE
)
}
if (sum(is.null(options$tenant_id), is.null(options$client_secret)) == 1) {
stop(
"Provide only `client_id` to authenticate with ",
"Managed Identity Credential, or provide `client_id`, `tenant_id`, ",
"and`client_secret` to authenticate with Client Secret Credential",
call. = FALSE
)
}
} else if (!is.null(options$account_key) && !is.null(options$sas_token)) {
stop(
"Cannot specify both `account_key` and `sas_token`",
call. = FALSE
)
}

fs___AzureFileSystem__Make(c(account_name = account_name, options))
}

#' Connect to an Azure Blob Storage container
#'
#' `az_conainer` is a convenience function to create an `AzureFileSystem` object
#' that provides a file system interface for blob storage containers in an Azure
#' Storage Account.
#'
#' @param container_path string Container name or path.
#' @param ... Additional connection options, passed to `AzureFileSystem$create()`.
#'
#' @return A `SubTreeFileSystem` containing an `AzureFileSystem` and the container's
#' relative path. Note that this function's success does not guarantee that you
#' are authorized to access the container's contents.
#' @examplesIf FALSE
#' container_fs <- az_container(
#' container_path = "arrow-datasets",
#' account_name = azurite_account_name,
#' account_key = azurite_account_key,
#' blob_storage_authority = azurite_blob_storage_authority,
#' blob_storage_scheme = azurite_blob_storage_scheme
#' )
#' @export
az_container <- function(container_path, ...) {
assert_that(is.string(container_path))
args <- list2(...)

fs <- exec(AzureFileSystem$create, !!!args)

SubTreeFileSystem$create(container_path, fs)
}

#' @usage NULL
#' @format NULL
#' @rdname FileSystem
Expand Down
3 changes: 2 additions & 1 deletion r/_pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -261,10 +261,11 @@ reference:

- title: File systems
desc: >
Functions for working with files on S3 and GCS
Functions for working with files on S3, GCS, and Azure
contents:
- s3_bucket
- gs_bucket
- az_container
- copy_files

- title: Flight
Expand Down
6 changes: 5 additions & 1 deletion r/configure
Original file line number Diff line number Diff line change
Expand Up @@ -359,10 +359,14 @@ add_feature_flags () {
if arrow_built_with ARROW_S3; then
PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_S3"
fi
if arrow_built_with ARROW_AZURE; then
PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_AZURE"
PKG_LIBS_FEATURES="$PKG_LIBS_FEATURES -lcurl -lxml2 -lssl"
fi
if arrow_built_with ARROW_GCS; then
PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_GCS"
fi
if arrow_built_with ARROW_GCS || arrow_built_with ARROW_S3; then
if arrow_built_with ARROW_GCS || arrow_built_with ARROW_S3 || arrow_built_with ARROW_AZURE; then
# If pkg-config is available it will handle this for us automatically
SSL_LIBS_WITHOUT_PC="-lcurl -lssl -lcrypto"
fi
Expand Down
5 changes: 4 additions & 1 deletion r/configure.win
Original file line number Diff line number Diff line change
Expand Up @@ -187,10 +187,13 @@ add_feature_flags () {
if arrow_built_with ARROW_S3; then
PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_S3"
fi
if arrow_built_with ARROW_AZURE; then
PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_AZURE"
fi
if arrow_built_with ARROW_GCS; then
PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_GCS"
fi
if arrow_built_with ARROW_GCS || arrow_built_with ARROW_S3; then
if arrow_built_with ARROW_GCS || arrow_built_with ARROW_S3 || arrow_built_with ARROW_AZURE; then
# If pkg-config is available it will handle this for us automatically
SSL_LIBS_WITHOUT_PC="-lcurl -lssl -lcrypto"
fi
Expand Down
2 changes: 1 addition & 1 deletion r/data-raw/codegen.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
# Ensure that all machines are sorting the same way
invisible(Sys.setlocale("LC_COLLATE", "C"))

features <- c("acero", "dataset", "substrait", "parquet", "s3", "gcs", "json")
features <- c("acero", "dataset", "substrait", "parquet", "s3", "gcs", "azure", "json")

suppressPackageStartupMessages({
library(decor)
Expand Down
1 change: 1 addition & 0 deletions r/inst/build_arrow_static.sh
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ ${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \
-Dlz4_SOURCE=${lz4_SOURCE:-} \
-DARROW_FILESYSTEM=ON \
-DARROW_GCS=${ARROW_GCS:-OFF} \
-DARROW_AZURE=${ARROW_AZURE:-OFF} \
-DARROW_JEMALLOC=${ARROW_JEMALLOC:-$ARROW_DEFAULT_PARAM} \
-DARROW_MIMALLOC=${ARROW_MIMALLOC:-ON} \
-DARROW_JSON=${ARROW_JSON:-ON} \
Expand Down
32 changes: 32 additions & 0 deletions r/man/FileSystem.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions r/man/acero.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions r/man/arrow_info.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 34 additions & 0 deletions r/man/az_container.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading