diff --git a/.gitignore b/.gitignore
index fa4e7f520..19eb11079 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,6 +18,7 @@ jenkins/
.idea/*
docs/build/doctrees/*
docs/build/html/_sources/*
+docs_site/*
build/*
/venv
diff --git a/docs/explanations/storage_location_architecture.md b/docs/explanations/storage_location_architecture.md
new file mode 100644
index 000000000..612ef7d21
--- /dev/null
+++ b/docs/explanations/storage_location_architecture.md
@@ -0,0 +1,785 @@
+# Storage Location Architecture
+
+This document provides an in-depth architectural overview of the StorageLocation
+system in the Synapse Python Client. It explains the design decisions, class
+relationships, and data flows that enable flexible storage configuration.
+
+---
+
+## On This Page
+
+
+
+- **[Domain Model](#domain-model)**
+
+ Core classes, enums, and their relationships
+
+- **[Storage Types](#storage-type-mapping)**
+
+ How storage types map to REST API types and choosing the right one
+
+- **[Entity Inheritance](#entity-inheritance-hierarchy)**
+
+ How Projects and Folders gain storage capabilities
+
+- **[Operation Flows](#operation-flows)**
+
+ Sequence diagrams for store, setup, and STS operations
+
+- **[Settings & API](#project-setting-lifecycle)**
+
+ Project settings lifecycle and REST API architecture
+
+- **[Migration](#migration-flow)**
+
+ Two-phase file migration process
+
+
+
+---
+
+## Overview
+
+The StorageLocation system enables Synapse users to configure where uploaded files
+are stored. By default, Synapse stores files in its internal S3 storage, but
+users can configure projects and folders to use external storage backends such as
+AWS S3 buckets, Google Cloud Storage, SFTP servers, or proxy servers.
+
+!!! info "Key Concepts"
+ - **StorageLocation**: A configuration describing where files are stored
+ - **Project Setting**: Links a storage location to a Project or Folder
+ - **STS Credentials**: Temporary AWS credentials for direct S3 access
+ - **Storage Migration**: Moving files between storage locations
+
+---
+
+
+
+# Part 1: Data Model
+
+This section covers the core classes, enumerations, and type mappings.
+
+
+
+## Domain Model
+
+The following class diagram shows the core classes and their relationships in the
+StorageLocation system.
+
+```mermaid
+classDiagram
+ direction TB
+
+ class StorageLocation {
+ +int storage_location_id
+ +StorageLocationType storage_type
+ +UploadType upload_type
+ +str bucket
+ +str base_key
+ +bool sts_enabled
+ +str banner
+ +str description
+ +str etag
+ +str created_on
+ +int created_by
+ +str url
+ +bool supports_subfolders
+ +str endpoint_url
+ +str proxy_url
+ +str secret_key
+ +str benefactor_id
+ +store() StorageLocation
+ +get() StorageLocation
+ +setup_s3() Tuple~Folder, StorageLocation~
+ +fill_from_dict(dict) StorageLocation
+ }
+
+ class StorageLocationType {
+ <>
+ SYNAPSE_S3
+ EXTERNAL_S3
+ EXTERNAL_GOOGLE_CLOUD
+ EXTERNAL_SFTP
+ EXTERNAL_OBJECT_STORE
+ PROXY
+ }
+
+ class UploadType {
+ <>
+ S3
+ GOOGLE_CLOUD_STORAGE
+ SFTP
+ HTTPS
+ NONE
+ }
+
+ class StorageLocationConfigurable {
+ <>
+ +set_storage_location(storage_location_id)
+ +get_project_setting(setting_type)
+ +delete_project_setting(setting_id)
+ +get_sts_storage_token(permission, output_format)
+ +index_files_for_migration(dest_storage_location_id, db_path)
+ +migrate_indexed_files(db_path)
+ }
+
+ class Project {
+ +str id
+ +str name
+ +str description
+ }
+
+ class Folder {
+ +str id
+ +str name
+ +str parent_id
+ }
+
+ StorageLocation --> StorageLocationType : storage_type
+ StorageLocation --> UploadType : upload_type
+ StorageLocationConfigurable <|-- Project : implements
+ StorageLocationConfigurable <|-- Folder : implements
+```
+
+
+
+### Key Components
+
+| Component | Purpose |
+|-----------|---------|
+| [StorageLocation][synapseclient.models.StorageLocation] | Data model representing a storage location setting in Synapse |
+| [StorageLocationType][synapseclient.models.StorageLocationType] | Enumeration defining the supported storage backend types |
+| [UploadType][synapseclient.models.UploadType] | Enumeration defining the upload protocol for each storage type |
+| [StorageLocationConfigurable][synapseclient.models.mixins.StorageLocationConfigurable] | Mixin providing storage management methods to entities |
+
+---
+
+
+
+## Storage Type Mapping
+
+Each `StorageLocationType` maps to a specific REST API `concreteType` and has a
+default `UploadType`. This mapping is bidirectional, allowing the system to parse
+responses from the API and construct requests.
+
+```mermaid
+flowchart LR
+ subgraph StorageLocationType
+ SYNAPSE_S3["SYNAPSE_S3"]
+ EXTERNAL_S3["EXTERNAL_S3"]
+ EXTERNAL_GOOGLE_CLOUD["EXTERNAL_GOOGLE_CLOUD"]
+ EXTERNAL_SFTP["EXTERNAL_SFTP"]
+ EXTERNAL_OBJECT_STORE["EXTERNAL_OBJECT_STORE"]
+ PROXY["PROXY"]
+ end
+
+ subgraph concreteType
+ S3SLS["S3StorageLocationSetting"]
+ ExtS3SLS["ExternalS3StorageLocationSetting"]
+ ExtGCSSLS["ExternalGoogleCloudStorageLocationSetting"]
+ ExtSLS["ExternalStorageLocationSetting"]
+ ExtObjSLS["ExternalObjectStorageLocationSetting"]
+ ProxySLS["ProxyStorageLocationSettings"]
+ end
+
+ subgraph UploadType
+ S3["S3"]
+ GCS["GOOGLECLOUDSTORAGE"]
+ SFTP["SFTP"]
+ HTTPS["HTTPS"]
+ end
+
+ SYNAPSE_S3 --> S3SLS --> S3
+ EXTERNAL_S3 --> ExtS3SLS --> S3
+ EXTERNAL_GOOGLE_CLOUD --> ExtGCSSLS --> GCS
+ EXTERNAL_SFTP --> ExtSLS --> SFTP
+ EXTERNAL_OBJECT_STORE --> ExtObjSLS --> S3
+ PROXY --> ProxySLS --> HTTPS
+```
+
+
+
+### Type-Specific Attributes
+
+Different storage types support different configuration attributes:
+
+| Attribute | SYNAPSE | EXT_S3 | EXT_GCS | EXT_SFTP | EXT_OBJ | PROXY |
+|-----------|:-------:|:------:|:-------:|:--------:|:-------:|:-----:|
+| `bucket` | ✓ | ✓ | ✓ | | ✓ | |
+| `base_key` | ✓ | ✓ | ✓ | | | |
+| `sts_enabled` | ✓ | ✓ | | | | |
+| `endpoint_url` | | ✓ | | | ✓ | |
+| `url` | | | | ✓ | | |
+| `supports_subfolders` | | | | ✓ | | |
+| `proxy_url` | | | | | | ✓ |
+| `secret_key` | | | | | | ✓ |
+| `benefactor_id` | | | | | | ✓ |
+
+**Legend:** SYNAPSE = SYNAPSE_S3, EXT_S3 = EXTERNAL_S3, EXT_GCS = EXTERNAL_GOOGLE_CLOUD, EXT_SFTP = EXTERNAL_SFTP, EXT_OBJ = EXTERNAL_OBJECT_STORE
+
+
+
+### Choosing a Storage Type
+
+Use this decision tree to select the appropriate storage type for your use case:
+
+```mermaid
+flowchart TB
+ Start([Need custom storage?]) --> Q1{Want Synapse to
manage storage?}
+
+ Q1 -->|Yes| SYNAPSE_S3[Use SYNAPSE_S3]
+ Q1 -->|No| Q2{What storage
backend?}
+
+ Q2 -->|AWS S3| Q3{Synapse accesses
bucket directly?}
+ Q2 -->|Google Cloud| EXTERNAL_GOOGLE_CLOUD[Use EXTERNAL_GOOGLE_CLOUD]
+ Q2 -->|SFTP Server| EXTERNAL_SFTP[Use EXTERNAL_SFTP]
+ Q2 -->|Proxy Server| PROXY[Use PROXY]
+ Q2 -->|S3-compatible
non-AWS| EXTERNAL_OBJECT_STORE[Use EXTERNAL_OBJECT_STORE]
+
+ Q3 -->|Yes| Q4{Need STS
credentials?}
+ Q3 -->|No| EXTERNAL_OBJECT_STORE
+
+ Q4 -->|Yes| EXTERNAL_S3_STS[Use EXTERNAL_S3
with sts_enabled=True]
+ Q4 -->|No| EXTERNAL_S3[Use EXTERNAL_S3]
+
+ SYNAPSE_S3 --> Benefits1[Benefits:
- Zero configuration
- Managed by Synapse
- STS available]
+ EXTERNAL_S3 --> Benefits2[Benefits:
- Use your own bucket
- Control access & costs
- Optional STS]
+ EXTERNAL_S3_STS --> Benefits2
+ EXTERNAL_GOOGLE_CLOUD --> Benefits3[Benefits:
- GCP native
- Use existing GCS buckets]
+ EXTERNAL_SFTP --> Benefits4[Benefits:
- Legacy systems
- Synapse never touches data]
+ EXTERNAL_OBJECT_STORE --> Benefits5[Benefits:
- OpenStack, MinIO, etc
- Synapse never touches data]
+ PROXY --> Benefits6[Benefits:
- Custom access control
- Data transformation]
+```
+
+---
+
+
+
+## Entity Inheritance Hierarchy
+
+Projects and Folders inherit storage configuration capabilities through the
+`StorageLocationConfigurable` mixin. This pattern allows consistent storage
+management across container entities.
+
+```mermaid
+classDiagram
+ direction TB
+
+ class AccessControllable {
+ <>
+ +get_permissions()
+ +set_permissions()
+ +delete_permissions()
+ }
+
+ class StorableContainer {
+ <>
+ +sync()
+ +get_children()
+ }
+
+ class StorageLocationConfigurable {
+ <>
+ +set_storage_location()
+ +get_project_setting()
+ +delete_project_setting()
+ +get_sts_storage_token()
+ +index_files_for_migration()
+ +migrate_indexed_files()
+ }
+
+ class Project {
+ +str id
+ +str name
+ +str description
+ +str etag
+ }
+
+ class Folder {
+ +str id
+ +str name
+ +str parent_id
+ +str etag
+ }
+
+ AccessControllable <|-- Project
+ AccessControllable <|-- Folder
+ StorableContainer <|-- Project
+ StorableContainer <|-- Folder
+ StorageLocationConfigurable <|-- Project
+ StorageLocationConfigurable <|-- Folder
+```
+
+!!! tip "Mixin Pattern"
+ The mixin pattern allows `Project` and `Folder` to share storage location
+ functionality without code duplication. Both classes inherit the same
+ methods from `StorageLocationConfigurable`.
+
+---
+
+
+
+
+# Part 2: Operation Flows
+
+This section contains sequence diagrams for key operations.
+
+
+
+## Operation Flows
+
+### Store Operation
+
+The `store()` method creates a new storage location in Synapse.
+
+```mermaid
+sequenceDiagram
+ participant User
+ participant StorageLocation
+ participant _to_synapse_request as _to_synapse_request()
+ participant API as storage_location_services
+ participant Synapse as Synapse REST API
+
+ User->>StorageLocation: store()
+ activate StorageLocation
+
+ StorageLocation->>_to_synapse_request: Build request body
+ activate _to_synapse_request
+
+ Note over _to_synapse_request: Validate storage_type is set
+ Note over _to_synapse_request: Build concreteType from storage_type
+ Note over _to_synapse_request: Determine uploadType
+ Note over _to_synapse_request: Add type-specific fields
+
+ _to_synapse_request-->>StorageLocation: Request body dict
+ deactivate _to_synapse_request
+
+ StorageLocation->>API: create_storage_location_setting(body)
+ activate API
+
+ API->>Synapse: POST /storageLocation
+ activate Synapse
+
+ Synapse-->>API: Response with storageLocationId
+ deactivate Synapse
+
+ API-->>StorageLocation: Response dict
+ deactivate API
+
+ StorageLocation->>StorageLocation: fill_from_dict(response)
+ Note over StorageLocation: Parse storageLocationId
+ Note over StorageLocation: Parse concreteType → storage_type
+ Note over StorageLocation: Parse uploadType → upload_type
+ Note over StorageLocation: Extract type-specific fields
+
+ StorageLocation-->>User: StorageLocation (populated)
+ deactivate StorageLocation
+```
+
+!!! note "Idempotent Behavior"
+ Storage locations are immutable once created. If you call `store()` with
+ identical parameters, Synapse returns the existing storage location rather
+ than creating a duplicate.
+
+
+
+### Setup S3 Convenience Flow
+
+The `setup_s3()` class method creates a folder with S3 storage in a single call.
+
+??? example "Click to expand sequence diagram"
+ ```mermaid
+ sequenceDiagram
+ participant User
+ participant setup_s3 as StorageLocation.setup_s3()
+ participant StorageLocation
+ participant Folder
+ participant Mixin as StorageLocationConfigurable
+ participant API as storage_location_services
+ participant Synapse as Synapse REST API
+
+ User->>setup_s3: setup_s3(parent, folder_name, bucket_name)
+ activate setup_s3
+
+ Note over setup_s3: Validate: folder_name XOR folder
+
+ alt folder_name provided
+ setup_s3->>Folder: Folder(name, parent_id).store()
+ activate Folder
+ Folder->>Synapse: POST /entity
+ Synapse-->>Folder: Folder response
+ Folder-->>setup_s3: New Folder
+ deactivate Folder
+ else folder ID provided
+ setup_s3->>Folder: Folder(id).get()
+ activate Folder
+ Folder->>Synapse: GET /entity/{id}
+ Synapse-->>Folder: Folder response
+ Folder-->>setup_s3: Existing Folder
+ deactivate Folder
+ end
+
+ alt bucket_name provided
+ Note over setup_s3: storage_type = EXTERNAL_S3
+ else bucket_name is None
+ Note over setup_s3: storage_type = SYNAPSE_S3
+ end
+
+ setup_s3->>StorageLocation: StorageLocation(...).store()
+ activate StorageLocation
+ StorageLocation->>Synapse: POST /storageLocation
+ Synapse-->>StorageLocation: StorageLocation response
+ StorageLocation-->>setup_s3: StorageLocation
+ deactivate StorageLocation
+
+ setup_s3->>Mixin: folder.set_storage_location(storage_location_id)
+ activate Mixin
+
+ Mixin->>API: get_project_setting(project_id, "upload")
+ API->>Synapse: GET /projectSettings/{id}/type/upload
+ Synapse-->>API: Setting or empty
+
+ alt Setting exists
+ API-->>Mixin: Existing setting
+ Mixin->>API: update_project_setting(body)
+ API->>Synapse: PUT /projectSettings
+ else No setting
+ Mixin->>API: create_project_setting(body)
+ API->>Synapse: POST /projectSettings
+ end
+
+ Synapse-->>API: Project setting response
+ API-->>Mixin: Updated setting
+ deactivate Mixin
+
+ setup_s3-->>User: (Folder, StorageLocation)
+ deactivate setup_s3
+ ```
+
+
+
+### STS Token Retrieval
+
+STS (AWS Security Token Service) enables direct S3 access using temporary credentials.
+
+```mermaid
+sequenceDiagram
+ participant User
+ participant Entity as Folder/Project
+ participant Mixin as StorageLocationConfigurable
+ participant STS as sts_transfer module
+ participant Client as Synapse Client
+ participant Synapse as Synapse REST API
+
+ User->>Entity: get_sts_storage_token(permission, output_format)
+ activate Entity
+
+ Entity->>Mixin: get_sts_storage_token_async()
+ activate Mixin
+
+ Mixin->>Client: Synapse.get_client()
+ Client-->>Mixin: Synapse client instance
+
+ Mixin->>STS: sts_transfer.get_sts_credentials()
+ activate STS
+
+ STS->>Synapse: GET /entity/{id}/sts?permission={permission}
+ activate Synapse
+
+ Synapse-->>STS: STS credentials response
+ deactivate Synapse
+
+ Note over STS: Parse credentials
+
+ alt output_format == "boto"
+ Note over STS: Format for boto3 client kwargs
+ STS-->>Mixin: {aws_access_key_id, aws_secret_access_key, aws_session_token}
+ else output_format == "json"
+ Note over STS: Return JSON string
+ STS-->>Mixin: JSON credentials string
+ else output_format == "shell" / "bash"
+ Note over STS: Format as export commands
+ STS-->>Mixin: Shell export commands
+ else output_format == "dictionary"
+ Note over STS: Return raw dict
+ STS-->>Mixin: Dictionary
+ end
+ deactivate STS
+
+ Mixin-->>Entity: Formatted credentials
+ deactivate Mixin
+
+ Entity-->>User: Credentials
+ deactivate Entity
+```
+
+
+
+#### Credential Output Formats
+
+| Format | Description | Use Case |
+|--------|-------------|----------|
+| `boto` | Dict with `aws_access_key_id`, `aws_secret_access_key`, `aws_session_token` | Pass directly to `boto3.client('s3', **creds)` |
+| `json` | JSON string | Store or pass to external tools |
+| `shell` / `bash` | `export AWS_ACCESS_KEY_ID=...` format | Execute in shell |
+| `cmd` | Windows SET commands | Windows command prompt |
+| `powershell` | PowerShell variable assignments | PowerShell scripts |
+| `dictionary` | Raw Python dict | Custom processing |
+
+---
+
+
+
+
+# Part 3: Settings & Infrastructure
+
+This section covers project settings, API architecture, and the async/sync pattern.
+
+
+
+## Project Setting Lifecycle
+
+Project settings control which storage location(s) are used for uploads to an
+entity. The following state diagram shows the lifecycle of a project setting.
+
+```mermaid
+stateDiagram-v2
+ [*] --> NoSetting: Entity created
+
+ NoSetting --> Created: set_storage_location()
+ Note right of NoSetting: Inherits from parent\nor uses Synapse default
+
+ Created --> Updated: set_storage_location()\nwith different locations
+ Updated --> Updated: set_storage_location()\nwith different locations
+
+ Created --> Deleted: delete_project_setting()
+ Updated --> Deleted: delete_project_setting()
+
+ Deleted --> NoSetting: Returns to default
+
+ state Created {
+ [*] --> Active
+ Active: locations = [storage_location_id]
+ Active: settingsType = "upload"
+ }
+
+ state Updated {
+ [*] --> Modified
+ Modified: locations = [new_id, ...]
+ Modified: settingsType = "upload"
+ }
+```
+
+
+
+### Setting Types
+
+| Type | Purpose |
+|------|---------|
+| `upload` | Configures upload destination storage location(s) |
+| `external_sync` | Configures external sync settings |
+| `requester_pays` | Configures requester-pays bucket access |
+
+---
+
+
+
+## API Layer Architecture
+
+The storage location services module provides async functions that wrap the
+Synapse REST API endpoints. This layer handles serialization and error handling.
+
+```mermaid
+flowchart TB
+ subgraph "Model Layer"
+ SL[StorageLocation]
+ SLCM[StorageLocationConfigurable Mixin]
+ end
+
+ subgraph "API Layer (storage_location_services.py)"
+ create_sls[create_storage_location_setting]
+ get_sls[get_storage_location_setting]
+ get_ps[get_project_setting]
+ create_ps[create_project_setting]
+ update_ps[update_project_setting]
+ delete_ps[delete_project_setting]
+ end
+
+ subgraph "REST Endpoints"
+ POST_SL["POST /storageLocation"]
+ GET_SL["GET /storageLocation/{id}"]
+ GET_PS["GET /projectSettings/{id}/type/{type}"]
+ POST_PS["POST /projectSettings"]
+ PUT_PS["PUT /projectSettings"]
+ DELETE_PS["DELETE /projectSettings/{id}"]
+ end
+
+ SL --> create_sls --> POST_SL
+ SL --> get_sls --> GET_SL
+
+ SLCM --> get_ps --> GET_PS
+ SLCM --> create_ps --> POST_PS
+ SLCM --> update_ps --> PUT_PS
+ SLCM --> delete_ps --> DELETE_PS
+```
+
+
+
+### REST API Reference
+
+| Method | Endpoint | Description |
+|--------|----------|-------------|
+| POST | `/storageLocation` | Create a new storage location setting |
+| GET | `/storageLocation/{id}` | Retrieve a storage location by ID |
+| GET | `/projectSettings/{projectId}/type/{type}` | Get project settings for an entity |
+| POST | `/projectSettings` | Create a new project setting |
+| PUT | `/projectSettings` | Update an existing project setting |
+| DELETE | `/projectSettings/{id}` | Delete a project setting |
+
+---
+
+
+
+## Async/Sync Pattern
+
+The StorageLocation system follows the Python client's `@async_to_sync` pattern,
+providing both async and sync versions of all methods.
+
+```mermaid
+flowchart LR
+ subgraph "User Code"
+ SyncCall["folder.set_storage_location()"]
+ AsyncCall["await folder.set_storage_location_async()"]
+ end
+
+ subgraph "@async_to_sync Decorator"
+ Wrapper["Sync wrapper"]
+ AsyncMethod["Async implementation"]
+ end
+
+ subgraph "Event Loop"
+ RunSync["wrap_async_to_sync()"]
+ AsyncIO["asyncio"]
+ end
+
+ SyncCall --> Wrapper
+ Wrapper --> RunSync
+ RunSync --> AsyncIO
+ AsyncIO --> AsyncMethod
+
+ AsyncCall --> AsyncMethod
+```
+
+
+
+### Method Pairs
+
+| Sync Method | Async Method |
+|-------------|--------------|
+| `StorageLocation.store()` | `StorageLocation.store_async()` |
+| `StorageLocation.get()` | `StorageLocation.get_async()` |
+| `StorageLocation.setup_s3()` | `StorageLocation.setup_s3_async()` |
+| `folder.set_storage_location()` | `folder.set_storage_location_async()` |
+| `folder.get_project_setting()` | `folder.get_project_setting_async()` |
+| `folder.delete_project_setting()` | `folder.delete_project_setting_async()` |
+| `folder.get_sts_storage_token()` | `folder.get_sts_storage_token_async()` |
+| `folder.index_files_for_migration()` | `folder.index_files_for_migration_async()` |
+| `folder.migrate_indexed_files()` | `folder.migrate_indexed_files_async()` |
+
+---
+
+
+
+
+# Part 4: Migration
+
+This section covers the file migration system.
+
+
+
+## Migration Flow
+
+File migration is a two-phase process that moves files from one storage location
+to another while preserving Synapse metadata.
+
+```mermaid
+sequenceDiagram
+ participant User
+ participant Entity as Project/Folder
+ participant IndexFn as index_files_for_migration
+ participant DB as SQLite Database
+ participant MigrateFn as migrate_indexed_files
+ participant Synapse as Synapse REST API
+
+ rect rgb(240, 248, 255)
+ Note over User,Synapse: Phase 1: Index Files
+ User->>Entity: index_files_for_migration(dest_id, db_path)
+ activate Entity
+
+ Entity->>IndexFn: Start indexing
+ activate IndexFn
+
+ IndexFn->>Synapse: Query entity tree
+ Synapse-->>IndexFn: File list
+
+ loop For each file
+ IndexFn->>Synapse: Get file metadata
+ Synapse-->>IndexFn: File info
+ IndexFn->>DB: Record file for migration
+ end
+
+ IndexFn-->>Entity: MigrationResult (indexed counts)
+ deactivate IndexFn
+
+ Entity-->>User: MigrationResult
+ deactivate Entity
+ end
+
+ rect rgb(255, 248, 240)
+ Note over User,Synapse: Phase 2: Migrate Files
+ User->>Entity: migrate_indexed_files(db_path)
+ activate Entity
+
+ Entity->>MigrateFn: Start migration
+ activate MigrateFn
+
+ MigrateFn->>DB: Read indexed files
+
+ loop For each indexed file
+ MigrateFn->>Synapse: Copy file to new storage
+ Synapse-->>MigrateFn: Success/Failure
+ MigrateFn->>DB: Update status
+ end
+
+ MigrateFn-->>Entity: MigrationResult (migrated counts)
+ deactivate MigrateFn
+
+ Entity-->>User: MigrationResult
+ deactivate Entity
+ end
+```
+
+
+
+### Migration Strategies
+
+| Strategy | Description |
+|----------|-------------|
+| `new` | Create new file versions in destination (default) |
+| `all` | Migrate all versions of each file |
+| `latest` | Only migrate the latest version |
+| `skip` | Skip if file already exists in destination |
+
+---
+
+
+
+
+# Learn More
+
+| Resource | Description |
+|----------|-------------|
+| [Storage Location Tutorial](../tutorials/python/storage_location.md) | Step-by-step guide to using storage locations |
+| [StorageLocation API Reference][synapseclient.models.StorageLocation] | Complete API documentation |
+| [StorageLocationConfigurable Mixin][synapseclient.models.mixins.StorageLocationConfigurable] | Mixin methods for Projects and Folders |
+| [Custom Storage Locations (Synapse Docs)](https://help.synapse.org/docs/Custom-Storage-Locations.2048327803.html) | Official Synapse documentation |
diff --git a/docs/js/mermaid-init.js b/docs/js/mermaid-init.js
new file mode 100644
index 000000000..823cbce57
--- /dev/null
+++ b/docs/js/mermaid-init.js
@@ -0,0 +1,12 @@
+// Initialize Mermaid diagrams
+document.addEventListener("DOMContentLoaded", function() {
+ mermaid.initialize({
+ startOnLoad: true,
+ theme: "default",
+ securityLevel: "loose",
+ flowchart: {
+ useMaxWidth: true,
+ htmlLabels: true
+ }
+ });
+});
diff --git a/docs/reference/experimental/async/folder.md b/docs/reference/experimental/async/folder.md
index 7b29f84ea..fd74e65dd 100644
--- a/docs/reference/experimental/async/folder.md
+++ b/docs/reference/experimental/async/folder.md
@@ -30,3 +30,9 @@ at your own risk.
- get_schema_derived_keys_async
- get_schema_validation_statistics_async
- get_invalid_validation_async
+ - set_storage_location_async
+ - get_project_setting_async
+ - delete_project_setting_async
+ - get_sts_storage_token_async
+ - index_files_for_migration_async
+ - migrate_indexed_files_async
diff --git a/docs/reference/experimental/async/project.md b/docs/reference/experimental/async/project.md
index e3adfa9fc..42803e871 100644
--- a/docs/reference/experimental/async/project.md
+++ b/docs/reference/experimental/async/project.md
@@ -29,3 +29,9 @@ at your own risk.
- get_schema_derived_keys_async
- get_schema_validation_statistics_async
- get_invalid_validation_async
+ - set_storage_location_async
+ - get_project_setting_async
+ - delete_project_setting_async
+ - get_sts_storage_token_async
+ - index_files_for_migration_async
+ - migrate_indexed_files_async
diff --git a/docs/reference/experimental/async/storage_location.md b/docs/reference/experimental/async/storage_location.md
new file mode 100644
index 000000000..00e03fc47
--- /dev/null
+++ b/docs/reference/experimental/async/storage_location.md
@@ -0,0 +1,23 @@
+# StorageLocation
+
+Contained within this file are experimental interfaces for working with the Synapse Python
+Client. Unless otherwise noted these interfaces are subject to change at any time. Use
+at your own risk.
+
+## API Reference
+
+::: synapseclient.models.StorageLocation
+ options:
+ inherited_members: true
+ members:
+ - store_async
+ - get_async
+ - setup_s3_async
+
+---
+
+::: synapseclient.models.StorageLocationType
+
+---
+
+::: synapseclient.models.UploadType
diff --git a/docs/reference/experimental/mixins/manifest_generatable.md b/docs/reference/experimental/mixins/manifest_generatable.md
new file mode 100644
index 000000000..47aac2a4c
--- /dev/null
+++ b/docs/reference/experimental/mixins/manifest_generatable.md
@@ -0,0 +1,69 @@
+# ManifestGeneratable Mixin
+
+The `ManifestGeneratable` mixin provides manifest TSV file generation and reading capabilities for container entities (Projects and Folders).
+
+## Overview
+
+This mixin enables:
+
+- Generating manifest TSV files after syncing from Synapse
+- Uploading files from manifest TSV files
+- Validating manifest files before upload
+
+## Usage
+
+The mixin is automatically available on `Project` and `Folder` classes:
+
+```python
+from synapseclient.models import Project, Folder
+
+# Project and Folder both have manifest capabilities
+project = Project(id="syn123")
+folder = Folder(id="syn456")
+```
+
+## API Reference
+
+::: synapseclient.models.mixins.manifest.ManifestGeneratable
+ options:
+ show_root_heading: true
+ show_source: false
+ members:
+ - generate_manifest
+ - generate_manifest_async
+ - from_manifest
+ - from_manifest_async
+ - validate_manifest
+ - validate_manifest_async
+ - get_manifest_data
+ - get_manifest_data_async
+
+## Constants
+
+### MANIFEST_FILENAME
+
+The default filename for generated manifests: `SYNAPSE_METADATA_MANIFEST.tsv`
+
+```python
+from synapseclient.models import MANIFEST_FILENAME
+
+print(MANIFEST_FILENAME) # "SYNAPSE_METADATA_MANIFEST.tsv"
+```
+
+### DEFAULT_GENERATED_MANIFEST_KEYS
+
+The default columns included in generated manifest files:
+
+```python
+from synapseclient.models import DEFAULT_GENERATED_MANIFEST_KEYS
+
+print(DEFAULT_GENERATED_MANIFEST_KEYS)
+# ['path', 'parent', 'name', 'id', 'synapseStore', 'contentType',
+# 'used', 'executed', 'activityName', 'activityDescription']
+```
+
+## See Also
+
+- [Manifest Operations Tutorial](../../../tutorials/python/manifest_operations.md)
+- [StorableContainer Mixin](storable_container.md)
+- [Manifest TSV Format](../../../explanations/manifest_tsv.md)
diff --git a/docs/reference/experimental/mixins/storage_location_configurable.md b/docs/reference/experimental/mixins/storage_location_configurable.md
new file mode 100644
index 000000000..3cf29d81a
--- /dev/null
+++ b/docs/reference/experimental/mixins/storage_location_configurable.md
@@ -0,0 +1,54 @@
+# StorageLocationConfigurable
+
+The `StorageLocationConfigurable` mixin provides methods for managing storage locations
+on entities (Projects and Folders).
+
+For architecture diagrams and design documentation, see
+[Storage Location Architecture](../../../explanations/storage_location_architecture.md).
+
+This mixin includes:
+
+- Setting upload storage locations
+- Getting and deleting project settings
+- Obtaining STS credentials for direct S3 access
+- Migrating files to new storage locations
+
+## Methods Overview
+
+| Method | Description |
+|--------|-------------|
+| `set_storage_location` | Set the upload storage location for this entity |
+| `get_project_setting` | Get project settings (upload, external_sync, etc.) |
+| `delete_project_setting` | Delete a project setting |
+| `get_sts_storage_token` | Get STS credentials for direct S3 access |
+| `index_files_for_migration` | Index files for migration to a new storage location |
+| `migrate_indexed_files` | Migrate previously indexed files |
+
+## Usage Example
+
+```python
+from synapseclient.models import Folder, StorageLocation, StorageLocationType
+
+# Create a storage location
+storage = StorageLocation(
+ storage_type=StorageLocationType.EXTERNAL_S3,
+ bucket="my-bucket",
+ sts_enabled=True,
+).store()
+
+# Set storage location on a folder
+folder = Folder(id="syn123").get()
+folder.set_storage_location(storage_location_id=storage.storage_location_id)
+
+# Get STS credentials
+credentials = folder.get_sts_storage_token(
+ permission="read_write",
+ output_format="boto",
+)
+```
+
+::: synapseclient.models.mixins.StorageLocationConfigurable
+
+---
+
+::: synapseclient.models.protocols.storage_location_mixin_protocol.StorageLocationConfigurableSynchronousProtocol
diff --git a/docs/reference/experimental/sync/folder.md b/docs/reference/experimental/sync/folder.md
index 43272ea30..c866a727e 100644
--- a/docs/reference/experimental/sync/folder.md
+++ b/docs/reference/experimental/sync/folder.md
@@ -41,3 +41,9 @@ at your own risk.
- get_schema_derived_keys
- get_schema_validation_statistics
- get_invalid_validation
+ - set_storage_location
+ - get_project_setting
+ - delete_project_setting
+ - get_sts_storage_token
+ - index_files_for_migration
+ - migrate_indexed_files
diff --git a/docs/reference/experimental/sync/project.md b/docs/reference/experimental/sync/project.md
index 4e2f35a26..1bb859795 100644
--- a/docs/reference/experimental/sync/project.md
+++ b/docs/reference/experimental/sync/project.md
@@ -40,3 +40,9 @@ at your own risk.
- get_schema_derived_keys
- get_schema_validation_statistics
- get_invalid_validation
+ - set_storage_location
+ - get_project_setting
+ - delete_project_setting
+ - get_sts_storage_token
+ - index_files_for_migration
+ - migrate_indexed_files
diff --git a/docs/reference/experimental/sync/storage_location.md b/docs/reference/experimental/sync/storage_location.md
new file mode 100644
index 000000000..a764c9d7d
--- /dev/null
+++ b/docs/reference/experimental/sync/storage_location.md
@@ -0,0 +1,24 @@
+[](){ #storage-location-reference-sync }
+# StorageLocation
+
+Contained within this file are experimental interfaces for working with the Synapse Python
+Client. Unless otherwise noted these interfaces are subject to change at any time. Use
+at your own risk.
+
+## API Reference
+
+::: synapseclient.models.StorageLocation
+ options:
+ inherited_members: true
+ members:
+ - store
+ - get
+ - setup_s3
+
+---
+
+::: synapseclient.models.StorageLocationType
+
+---
+
+::: synapseclient.models.UploadType
diff --git a/docs/tutorials/python/manifest_operations.md b/docs/tutorials/python/manifest_operations.md
new file mode 100644
index 000000000..25362a347
--- /dev/null
+++ b/docs/tutorials/python/manifest_operations.md
@@ -0,0 +1,328 @@
+# Manifest Operations
+
+This tutorial covers how to work with manifest TSV files for bulk file operations in Synapse. Manifest files provide a way to track file metadata, download files with their annotations, and upload files with provenance information.
+
+## Overview
+
+A manifest file is a tab-separated values (TSV) file that contains metadata about files in Synapse. The manifest includes:
+
+- File paths and Synapse IDs
+- Parent container IDs
+- Annotations
+- Provenance information (used/executed references)
+
+## Generating Manifests During Download
+
+When syncing files from Synapse, you can automatically generate a manifest file that captures all file metadata.
+
+### Using sync_from_synapse with Manifest Generation
+
+```python
+from synapseclient.models import Project
+import synapseclient
+
+synapseclient.login()
+
+# Download a project with manifest generation at each directory level
+project = Project(id="syn123456").sync_from_synapse(
+ path="/path/to/download",
+ generate_manifest="all"
+)
+
+# Or generate a single manifest at the root level only
+project = Project(id="syn123456").sync_from_synapse(
+ path="/path/to/download",
+ generate_manifest="root"
+)
+```
+
+### Manifest Generation Options
+
+The `generate_manifest` parameter accepts three values:
+
+| Value | Description |
+|-------|-------------|
+| `"suppress"` | (Default) Do not create any manifest files |
+| `"root"` | Create a single manifest at the root download path |
+| `"all"` | Create a manifest in each directory level |
+
+### Generating Manifest Separately
+
+You can also generate a manifest after syncing:
+
+```python
+from synapseclient.models import Project
+import synapseclient
+
+synapseclient.login()
+
+# First sync without manifest
+project = Project(id="syn123456").sync_from_synapse(
+ path="/path/to/download"
+)
+
+# Then generate manifest separately
+manifest_path = project.generate_manifest(
+ path="/path/to/download",
+ manifest_scope="root"
+)
+print(f"Manifest created at: {manifest_path}")
+```
+
+## Manifest File Format
+
+The generated manifest file (`SYNAPSE_METADATA_MANIFEST.tsv`) contains the following columns:
+
+| Column | Description |
+|--------|-------------|
+| `path` | Local file path |
+| `parent` | Synapse ID of the parent container |
+| `name` | File name in Synapse |
+| `id` | Synapse file ID |
+| `synapseStore` | Whether the file is stored in Synapse |
+| `contentType` | MIME type of the file |
+| `used` | Provenance - entities used to create this file |
+| `executed` | Provenance - code/scripts executed |
+| `activityName` | Name of the provenance activity |
+| `activityDescription` | Description of the provenance activity |
+| *custom columns* | Any annotations on the files |
+
+### Example Manifest
+
+```tsv
+path parent name id synapseStore contentType used executed activityName activityDescription study dataType
+/data/file1.csv syn123 file1.csv syn456 True text/csv Data Processing Study1 RNA-seq
+/data/file2.csv syn123 file2.csv syn789 True text/csv syn456 Analysis Processed from file1 Study1 RNA-seq
+```
+
+## Uploading Files from a Manifest
+
+You can upload files to Synapse using a manifest file:
+
+```python
+from synapseclient.models import Project
+import synapseclient
+
+synapseclient.login()
+
+# Upload files from a manifest
+files = Project.from_manifest(
+ manifest_path="/path/to/manifest.tsv",
+ parent_id="syn123456"
+)
+
+for file in files:
+ print(f"Uploaded: {file.name} ({file.id})")
+```
+
+### Dry Run Validation
+
+Before uploading, you can validate the manifest:
+
+```python
+from synapseclient.models import Project
+
+# Validate without uploading
+is_valid, errors = Project.validate_manifest(
+ manifest_path="/path/to/manifest.tsv"
+)
+
+if is_valid:
+ print("Manifest is valid, ready for upload")
+else:
+ for error in errors:
+ print(f"Error: {error}")
+```
+
+Or use the `dry_run` option to validate the manifest and see what would be uploaded without making changes:
+
+```python
+# Dry run - validates and returns what would be uploaded, but doesn't upload
+files = Project.from_manifest(
+ manifest_path="/path/to/manifest.tsv",
+ parent_id="syn123456",
+ dry_run=True # Validate only, no actual upload
+)
+print(f"Would upload {len(files)} files")
+```
+
+The `dry_run` parameter is useful for:
+
+- Validating manifest format before committing to an upload
+- Testing your manifest configuration
+- Previewing which files will be affected
+
+## Working with Annotations
+
+Annotations in the manifest are automatically handled:
+
+### On Download
+
+When generating a manifest, all file annotations are included as additional columns:
+
+```python
+project = Project(id="syn123456").sync_from_synapse(
+ path="/path/to/download",
+ generate_manifest="root"
+)
+# Annotations appear as columns in the manifest
+```
+
+### On Upload
+
+Any columns in the manifest that aren't standard fields become annotations:
+
+```tsv
+path parent study dataType specimenType
+/data/file1.csv syn123 Study1 RNA-seq tissue
+```
+
+```python
+files = Project.from_manifest(
+ manifest_path="/path/to/manifest.tsv",
+ parent_id="syn123456",
+ merge_existing_annotations=True # Merge with existing annotations
+)
+```
+
+## Working with Provenance
+
+### On Download
+
+Provenance information is captured in the `used`, `executed`, `activityName`, and `activityDescription` columns:
+
+```python
+project = Project(id="syn123456").sync_from_synapse(
+ path="/path/to/download",
+ include_activity=True, # Include provenance
+ generate_manifest="root"
+)
+```
+
+### On Upload
+
+You can specify provenance in the manifest:
+
+```tsv
+path parent used executed activityName activityDescription
+/data/output.csv syn123 syn456;syn789 https://github.com/repo/script.py Analysis Generated from input files
+```
+
+- Multiple references are separated by semicolons (`;`)
+- References can be Synapse IDs, URLs, or local file paths
+
+## Synapse Download List Integration
+
+The manifest functionality integrates with Synapse's Download List feature. You can generate a manifest directly from your Synapse download list, which is useful for exporting metadata about files you've queued for download in the Synapse web interface.
+
+### Generating Manifest from Download List
+
+```python
+from synapseclient.models import Project
+import synapseclient
+
+synapseclient.login()
+
+# Generate a manifest from your Synapse download list
+manifest_path = Project.generate_download_list_manifest(
+ download_path="/path/to/save/manifest"
+)
+print(f"Manifest downloaded to: {manifest_path}")
+```
+
+### Custom CSV Formatting
+
+You can customize the manifest format:
+
+```python
+from synapseclient.models import Project
+import synapseclient
+
+synapseclient.login()
+
+# Generate a tab-separated manifest
+manifest_path = Project.generate_download_list_manifest(
+ download_path="/path/to/save/manifest",
+ csv_separator="\t", # Tab-separated
+ include_header=True
+)
+```
+
+### Using DownloadListManifestRequest Directly
+
+For more control over the manifest generation process, use the `DownloadListManifestRequest` class directly:
+
+```python
+from synapseclient.models import DownloadListManifestRequest, CsvTableDescriptor
+import synapseclient
+
+synapseclient.login()
+
+# Create a request with custom CSV formatting
+request = DownloadListManifestRequest(
+ csv_table_descriptor=CsvTableDescriptor(
+ separator="\t",
+ quote_character='"',
+ is_first_line_header=True
+ )
+)
+
+# Send the job and wait for completion
+request.send_job_and_wait()
+
+# Download the generated manifest
+manifest_path = request.download_manifest(download_path="/path/to/download")
+print(f"Manifest file handle: {request.result_file_handle_id}")
+```
+
+## Best Practices
+
+1. **Use `generate_manifest="root"` for simple cases** - Creates a single manifest at the root level, easier to manage.
+
+2. **Use `generate_manifest="all"` for complex hierarchies** - Creates manifests at each directory level, useful for large projects with many subdirectories.
+
+3. **Validate manifests before upload** - Use `validate_manifest()` or `dry_run=True` to catch errors early.
+
+4. **Include provenance information** - Set `include_activity=True` when syncing to capture provenance in the manifest.
+
+5. **Backup your manifest** - The manifest is a valuable record of your data and its metadata.
+
+## Async API
+
+All manifest operations are available as async methods:
+
+```python
+import asyncio
+from synapseclient.models import Project
+import synapseclient
+
+async def main():
+ synapseclient.login()
+
+ # Async sync with manifest
+ project = Project(id="syn123456")
+ await project.sync_from_synapse_async(
+ path="/path/to/download",
+ generate_manifest="root"
+ )
+
+ # Async manifest generation
+ manifest_path = await project.generate_manifest_async(
+ path="/path/to/download",
+ manifest_scope="root"
+ )
+
+ # Async upload from manifest
+ files = await Project.from_manifest_async(
+ manifest_path="/path/to/manifest.tsv",
+ parent_id="syn123456"
+ )
+
+asyncio.run(main())
+```
+
+## See Also
+
+- [Download Data in Bulk](download_data_in_bulk.md)
+- [Upload Data in Bulk](upload_data_in_bulk.md)
+- [Manifest TSV Format](../../explanations/manifest_tsv.md)
diff --git a/docs/tutorials/python/storage_location.md b/docs/tutorials/python/storage_location.md
new file mode 100644
index 000000000..41dd5036c
--- /dev/null
+++ b/docs/tutorials/python/storage_location.md
@@ -0,0 +1,135 @@
+# Storage Locations in Synapse
+
+Storage locations allow you to configure where files uploaded to Synapse are
+stored. By default, files are stored in Synapse's internal S3 storage, but you
+can configure projects or folders to use your own AWS S3 buckets, Google Cloud
+Storage buckets, or other external storage.
+
+This tutorial demonstrates how to use the Python client to manage storage
+locations using the new object-oriented models.
+
+[Read more about Custom Storage Locations](https://help.synapse.org/docs/Custom-Storage-Locations.2048327803.html)
+
+## Tutorial Purpose
+In this tutorial you will:
+
+1. Create an external S3 storage location
+2. Set up a folder backed by external S3 storage
+3. Create an STS-enabled storage location for direct S3 access
+4. Use STS credentials with boto3
+5. Retrieve and inspect storage location settings
+
+## Prerequisites
+
+* Make sure that you have completed the [Installation](../installation.md) and
+ [Authentication](../authentication.md) setup.
+* You must have a [Project](./project.md) created and replace the one used in
+ this tutorial.
+* An AWS S3 bucket properly configured for use with Synapse, including an
+ `owner.txt` file. See
+ [Custom Storage Locations](https://help.synapse.org/docs/Custom-Storage-Locations.2048327803.html).
+* (Optional) `boto3` installed for STS credential examples.
+
+## Understanding Storage Location Types
+
+Synapse supports several types of storage locations:
+
+- **SYNAPSE_S3**: Synapse-managed S3 storage (default)
+- **EXTERNAL_S3**: User-owned Amazon S3 bucket accessed by Synapse
+- **EXTERNAL_GOOGLE_CLOUD**: User-owned Google Cloud Storage bucket
+- **EXTERNAL_SFTP**: External SFTP server not accessed by Synapse
+- **EXTERNAL_OBJECT_STORE**: S3-like bucket (e.g., OpenStack) not accessed by Synapse
+- **PROXY**: A proxy server that controls access to storage
+
+## STS-Enabled Storage
+
+STS (AWS Security Token Service) enabled storage locations allow users to get
+temporary AWS credentials for direct S3 access. This is useful for:
+
+- Uploading large files directly to S3
+- Using AWS tools like the AWS CLI or boto3
+- Performing bulk operations on files
+
+## 1. Set up and get project
+
+```python
+{!docs/tutorials/python/tutorial_scripts/storage_location.py!lines=5-12}
+```
+
+## 2. Create an external S3 storage location
+
+Create a storage location backed by your own S3 bucket. The bucket must be
+properly configured with an `owner.txt` file.
+
+```python
+{!docs/tutorials/python/tutorial_scripts/storage_location.py!lines=14-27}
+```
+
+
+ You'll notice the output looks like:
+
+```
+Created storage location: 12345
+Type: StorageLocationType.EXTERNAL_S3
+Bucket: my-synapse-bucket
+```
+
+
+## 3. Set up a folder with external S3 storage
+
+The `setup_s3` convenience method handles creating the folder, storage location,
+and project settings in a single call.
+
+```python
+{!docs/tutorials/python/tutorial_scripts/storage_location.py!lines=29-38}
+```
+
+## 4. Create an STS-enabled storage location
+
+STS-enabled storage locations allow you to get temporary AWS credentials for
+direct S3 access.
+
+```python
+{!docs/tutorials/python/tutorial_scripts/storage_location.py!lines=40-50}
+```
+
+## 5. Use STS credentials with boto3
+
+Once you have an STS-enabled folder, you can get temporary credentials to
+access the underlying S3 bucket directly.
+
+```python
+{!docs/tutorials/python/tutorial_scripts/storage_location.py!lines=52-72}
+```
+
+## 6. Retrieve and inspect storage location settings
+
+You can retrieve your storage location settings and inspect their configuration.
+
+```python
+{!docs/tutorials/python/tutorial_scripts/storage_location.py!lines=74-86}
+```
+
+## Source code for this tutorial
+
+
+ Click to show me
+
+```python
+{!docs/tutorials/python/tutorial_scripts/storage_location.py!}
+```
+
+
+## References used in this tutorial
+
+- [StorageLocation][synapseclient.models.StorageLocation]
+- [StorageLocationType][synapseclient.models.StorageLocationType]
+- [Folder][synapseclient.models.Folder]
+- [Project][synapseclient.models.Project]
+- [syn.login][synapseclient.Synapse.login]
+- [Custom Storage Locations Documentation](https://help.synapse.org/docs/Custom-Storage-Locations.2048327803.html)
+
+## See also
+
+- [Storage Location Architecture](../../explanations/storage_location_architecture.md) -
+ In-depth architecture diagrams and design documentation
diff --git a/docs/tutorials/python/tutorial_scripts/storage_location.py b/docs/tutorials/python/tutorial_scripts/storage_location.py
new file mode 100644
index 000000000..9fe81ff6e
--- /dev/null
+++ b/docs/tutorials/python/tutorial_scripts/storage_location.py
@@ -0,0 +1,86 @@
+"""
+Here is where you'll find the code for the Storage Location tutorial.
+"""
+
+# Step 1: Create an External S3 Storage Location
+import synapseclient
+from synapseclient.models import Project, StorageLocation, StorageLocationType
+
+syn = synapseclient.login()
+
+# Retrieve the project
+my_project = Project(name="My uniquely named project about Alzheimer's Disease").get()
+
+# Step 2: Create an External S3 Storage Location
+# Replace with your S3 bucket name (must have owner.txt configured)
+MY_BUCKET_NAME = "my-synapse-bucket"
+MY_BASE_KEY = "synapse-data"
+
+storage_location = StorageLocation(
+ storage_type=StorageLocationType.EXTERNAL_S3,
+ bucket=MY_BUCKET_NAME,
+ base_key=MY_BASE_KEY,
+).store()
+
+print(f"Created storage location: {storage_location.storage_location_id}")
+print(f"Type: {storage_location.storage_type}")
+print(f"Bucket: {storage_location.bucket}")
+
+# Step 3: Set up a folder with external S3 storage
+folder, storage = StorageLocation.setup_s3(
+ folder_name="my-external-storage-folder",
+ parent=my_project.id,
+ bucket_name=MY_BUCKET_NAME,
+ base_key="folder-specific-prefix",
+)
+
+print(f"Created folder: {folder.id}")
+print(f"Storage location ID: {storage.storage_location_id}")
+
+# Step 4: Create an STS-enabled storage location
+sts_folder, sts_storage = StorageLocation.setup_s3(
+ folder_name="my-sts-enabled-folder",
+ parent=my_project.id,
+ bucket_name=MY_BUCKET_NAME,
+ base_key="sts-data",
+ sts_enabled=True,
+)
+
+print(f"Created STS-enabled folder: {sts_folder.id}")
+print(f"STS enabled: {sts_storage.sts_enabled}")
+
+# Step 5: Use STS credentials with boto3
+credentials = sts_folder.get_sts_storage_token(
+ permission="read_write",
+ output_format="boto",
+)
+
+print(f"AWS Access Key ID: {credentials['aws_access_key_id'][:10]}...")
+print("Credentials expire: check 'expiration' in json format")
+
+try:
+ import boto3
+
+ s3_client = boto3.client("s3", **credentials)
+ response = s3_client.list_objects_v2(
+ Bucket=MY_BUCKET_NAME,
+ Prefix="sts-data/",
+ MaxKeys=10,
+ )
+ print(f"Found {response.get('KeyCount', 0)} objects")
+except ImportError:
+ print("boto3 not installed, skipping S3 client example")
+
+# Step 6: Retrieve and inspect storage location settings
+retrieved_storage = StorageLocation(
+ storage_location_id=storage_location.storage_location_id
+).get()
+
+print("Retrieved storage location:")
+print(f" ID: {retrieved_storage.storage_location_id}")
+print(f" Type: {retrieved_storage.storage_type}")
+print(f" Bucket: {retrieved_storage.bucket}")
+print(f" Base Key: {retrieved_storage.base_key}")
+print(f" STS Enabled: {retrieved_storage.sts_enabled}")
+print(f" Created By: {retrieved_storage.created_by}")
+print(f" Created On: {retrieved_storage.created_on}")
diff --git a/mkdocs.yml b/mkdocs.yml
index 85a237d0c..7461f91f3 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -45,8 +45,10 @@ nav:
# - Team: tutorials/python/team.md
- Upload data in bulk: tutorials/python/upload_data_in_bulk.md
- Download data in bulk: tutorials/python/download_data_in_bulk.md
+ - Manifest Operations: tutorials/python/manifest_operations.md
- Creating JSON Schema: tutorials/python/schema_operations.md
- Working with JSON Schema: tutorials/python/json_schema.md
+ - Storage Location: tutorials/python/storage_location.md
# - Move Files and Folders: tutorials/python/move_files_and_folders.md
# - Migrate data to other storage locations: tutorials/python/migrate_data_to_other_storage_locations.md
- Working with the Command Line Client: tutorials/command_line_client.md
@@ -111,6 +113,7 @@ nav:
- JSONSchema: reference/experimental/sync/json_schema.md
- Wiki: reference/experimental/sync/wiki.md
- FormGroup and Form: reference/experimental/sync/form.md
+ - StorageLocation: reference/experimental/sync/storage_location.md
- Extensions:
- Curator: reference/extensions/curator.md
- Asynchronous:
@@ -139,15 +142,18 @@ nav:
- JSONSchema: reference/experimental/async/json_schema.md
- Wiki: reference/experimental/async/wiki.md
- FormGroup and Form: reference/experimental/async/form.md
+ - StorageLocation: reference/experimental/async/storage_location.md
- Mixins:
- AccessControllable: reference/experimental/mixins/access_controllable.md
- StorableContainer: reference/experimental/mixins/storable_container.md
+ - ManifestGeneratable: reference/experimental/mixins/manifest_generatable.md
- AsynchronousCommunicator: reference/experimental/mixins/asynchronous_communicator.md
- FailureStrategy: reference/experimental/mixins/failure_strategy.md
- BaseJSONSchema: reference/experimental/mixins/base_json_schema.md
- ContainerEntityJSONSchema: reference/experimental/mixins/container_json_schema.md
- FormData: reference/experimental/mixins/form_data.md
- FormGroup: reference/experimental/mixins/form_group.md
+ - StorageLocationConfigurable: reference/experimental/mixins/storage_location_configurable.md
- Further Reading:
- Home: explanations/home.md
@@ -159,6 +165,7 @@ nav:
- Structuring Your Project: explanations/structuring_your_project.md
- Asyncio Changes in Python 3.14: explanations/asyncio_in_python_3_14.md
- Curator Data model: explanations/curator_data_model.md
+ - Storage Location Architecture: explanations/storage_location_architecture.md
- News:
- news.md
- Contact Us: https://sagebionetworks.jira.com/servicedesk/customer/portal/9/group/16/create/206
@@ -201,6 +208,10 @@ theme:
extra_css:
- css/custom.css
+extra_javascript:
+ - https://unpkg.com/mermaid@10/dist/mermaid.min.js
+ - js/mermaid-init.js
+
plugins:
- search
- mkdocstrings:
diff --git a/synapseclient/api/__init__.py b/synapseclient/api/__init__.py
index 6b0961677..13e97c701 100644
--- a/synapseclient/api/__init__.py
+++ b/synapseclient/api/__init__.py
@@ -130,6 +130,14 @@
update_organization_acl,
validate_entity_with_json_schema,
)
+from .storage_location_services import (
+ create_project_setting,
+ create_storage_location_setting,
+ delete_project_setting,
+ get_project_setting,
+ get_storage_location_setting,
+ update_project_setting,
+)
from .table_services import (
ViewEntityType,
ViewTypeMask,
@@ -357,4 +365,11 @@
"create_form_data",
"list_form_data",
"list_form_data_sync",
+ # storage_location_services
+ "create_storage_location_setting",
+ "get_storage_location_setting",
+ "get_project_setting",
+ "create_project_setting",
+ "update_project_setting",
+ "delete_project_setting",
]
diff --git a/synapseclient/api/storage_location_services.py b/synapseclient/api/storage_location_services.py
new file mode 100644
index 000000000..c73c7e8cc
--- /dev/null
+++ b/synapseclient/api/storage_location_services.py
@@ -0,0 +1,169 @@
+"""Services for interacting with storage location settings and project settings in Synapse.
+
+This module provides async REST wrappers for creating, retrieving, and managing
+storage location settings and their associated project settings.
+"""
+
+import json
+from typing import TYPE_CHECKING, Any, Dict, Optional
+
+if TYPE_CHECKING:
+ from synapseclient import Synapse
+
+
+async def create_storage_location_setting(
+ body: Dict[str, Any],
+ *,
+ synapse_client: Optional["Synapse"] = None,
+) -> Dict[str, Any]:
+ """Create a new storage location setting in Synapse.
+
+ Storage location creation is idempotent per user - if the same user creates
+ a storage location with identical properties, the existing one is returned.
+
+ Arguments:
+ body: The storage location setting request body containing concreteType
+ and other type-specific fields.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ The created or existing storage location setting as a dictionary.
+ """
+ from synapseclient import Synapse
+
+ client = Synapse.get_client(synapse_client=synapse_client)
+ return await client.rest_post_async(
+ uri="/storageLocation",
+ body=json.dumps(body),
+ )
+
+
+async def get_storage_location_setting(
+ storage_location_id: int,
+ *,
+ synapse_client: Optional["Synapse"] = None,
+) -> Dict[str, Any]:
+ """Retrieve a storage location setting by its ID.
+
+ Only the creator of a StorageLocationSetting can retrieve it by its ID.
+
+ Arguments:
+ storage_location_id: The ID of the storage location setting to retrieve.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ The storage location setting as a dictionary.
+ """
+ from synapseclient import Synapse
+
+ client = Synapse.get_client(synapse_client=synapse_client)
+ return await client.rest_get_async(
+ uri=f"/storageLocation/{storage_location_id}",
+ )
+
+
+async def get_project_setting(
+ project_id: str,
+ setting_type: str,
+ *,
+ synapse_client: Optional["Synapse"] = None,
+) -> Optional[Dict[str, Any]]:
+ """Get the project setting for an entity.
+
+ Arguments:
+ project_id: The Synapse ID of the project or folder.
+ setting_type: The type of setting to retrieve. One of:
+ 'upload', 'external_sync', 'requester_pays'.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ The project setting as a dictionary, or None if no setting exists.
+ """
+ from synapseclient import Synapse
+
+ client = Synapse.get_client(synapse_client=synapse_client)
+ response = await client.rest_get_async(
+ uri=f"/projectSettings/{project_id}/type/{setting_type}",
+ )
+ # If no project setting, an empty string is returned as the response
+ return response if response else None
+
+
+async def create_project_setting(
+ body: Dict[str, Any],
+ *,
+ synapse_client: Optional["Synapse"] = None,
+) -> Dict[str, Any]:
+ """Create a new project setting.
+
+ Arguments:
+ body: The project setting request body.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ The created project setting as a dictionary.
+ """
+ from synapseclient import Synapse
+
+ client = Synapse.get_client(synapse_client=synapse_client)
+ return await client.rest_post_async(
+ uri="/projectSettings",
+ body=json.dumps(body),
+ )
+
+
+async def update_project_setting(
+ body: Dict[str, Any],
+ *,
+ synapse_client: Optional["Synapse"] = None,
+) -> Dict[str, Any]:
+ """Update an existing project setting.
+
+ Arguments:
+ body: The project setting request body including the id field.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ The updated project setting as a dictionary.
+ """
+ from synapseclient import Synapse
+
+ client = Synapse.get_client(synapse_client=synapse_client)
+ return await client.rest_put_async(
+ uri="/projectSettings",
+ body=json.dumps(body),
+ )
+
+
+async def delete_project_setting(
+ setting_id: str,
+ *,
+ synapse_client: Optional["Synapse"] = None,
+) -> None:
+ """Delete a project setting.
+
+ Arguments:
+ setting_id: The ID of the project setting to delete.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ None
+ """
+ from synapseclient import Synapse
+
+ client = Synapse.get_client(synapse_client=synapse_client)
+ await client.rest_delete_async(
+ uri=f"/projectSettings/{setting_id}",
+ )
diff --git a/synapseclient/client.py b/synapseclient/client.py
index 2e9c543cb..35d521a27 100644
--- a/synapseclient/client.py
+++ b/synapseclient/client.py
@@ -5512,6 +5512,11 @@ def _createExternalObjectStoreFileHandle(
"/externalFileHandle", json.dumps(file_handle), self.fileHandleEndpoint
)
+ @deprecated(
+ version="4.12.0",
+ reason="To be removed in 5.0.0. "
+ "Use `synapseclient.api.post_external_s3_file_handle()` instead.",
+ )
def create_external_s3_file_handle(
self,
bucket_name,
@@ -5650,7 +5655,11 @@ def _getUserCredentials(
# Project/Folder storage location settings #
############################################
- # TODO: Deprecate method in https://sagebionetworks.jira.com/browse/SYNPY-1441
+ @deprecated(
+ version="4.12.0",
+ reason="To be removed in 5.0.0. "
+ "Use `StorageLocation(...).store()` from synapseclient.models instead.",
+ )
def createStorageLocationSetting(self, storage_type, **kwargs):
"""
Creates an IMMUTABLE storage location based on the specified type.
@@ -5707,7 +5716,12 @@ def createStorageLocationSetting(self, storage_type, **kwargs):
return self.restPOST("/storageLocation", body=json.dumps(kwargs))
- # TODO: Deprecate method in https://sagebionetworks.jira.com/browse/SYNPY-1441
+ @deprecated(
+ version="4.12.0",
+ reason="To be removed in 5.0.0. "
+ "Use `StorageLocation(storage_location_id=id).get()` from "
+ "synapseclient.models instead.",
+ )
def getMyStorageLocationSetting(self, storage_location_id):
"""
Get a StorageLocationSetting by its id.
@@ -5721,7 +5735,12 @@ def getMyStorageLocationSetting(self, storage_location_id):
"""
return self.restGET("/storageLocation/%s" % storage_location_id)
- # TODO: Deprecate method in https://sagebionetworks.jira.com/browse/SYNPY-1441
+ @deprecated(
+ version="4.12.0",
+ reason="To be removed in 5.0.0. "
+ "Use `Folder(id=...).set_storage_location(...)` or "
+ "`Project(id=...).set_storage_location(...)` from synapseclient.models instead.",
+ )
def setStorageLocation(self, entity, storage_location_id):
"""
Sets the storage location for a Project or Folder
@@ -5759,7 +5778,12 @@ def setStorageLocation(self, entity, storage_location_id):
"/projectSettings", body=json.dumps(project_destination)
)
- # TODO: Deprecate method in https://sagebionetworks.jira.com/browse/SYNPY-1441
+ @deprecated(
+ version="4.12.0",
+ reason="To be removed in 5.0.0. "
+ "Use `Folder(id=...).get_project_setting(...)` or "
+ "`Project(id=...).get_project_setting(...)` from synapseclient.models instead.",
+ )
def getProjectSetting(self, project, setting_type):
"""
Gets the ProjectSetting for a project.
@@ -5787,7 +5811,12 @@ def getProjectSetting(self, project, setting_type):
response if response else None
) # if no project setting, a empty string is returned as the response
- # TODO: Deprecate method in https://sagebionetworks.jira.com/browse/SYNPY-1441
+ @deprecated(
+ version="4.12.0",
+ reason="To be removed in 5.0.0. "
+ "Use `Folder(id=...).get_sts_storage_token(...)` or "
+ "`Project(id=...).get_sts_storage_token(...)` from synapseclient.models instead.",
+ )
def get_sts_storage_token(
self, entity, permission, *, output_format="json", min_remaining_life=None
):
@@ -5820,7 +5849,11 @@ def get_sts_storage_token(
min_remaining_life=min_remaining_life,
)
- # TODO: Deprecate method in https://sagebionetworks.jira.com/browse/SYNPY-1441
+ @deprecated(
+ version="4.12.0",
+ reason="To be removed in 5.0.0. "
+ "Use `StorageLocation.setup_s3(...)` from synapseclient.models instead.",
+ )
def create_s3_storage_location(
self,
*,
@@ -5862,7 +5895,11 @@ def create_s3_storage_location(
)
)
- # TODO: Deprecate method in https://sagebionetworks.jira.com/browse/SYNPY-1441
+ @deprecated(
+ version="4.12.0",
+ reason="To be removed in 5.0.0. "
+ "Use `StorageLocation.setup_s3_async(...)` from synapseclient.models instead.",
+ )
async def create_s3_storage_location_async(
self,
*,
diff --git a/synapseclient/core/constants/concrete_types.py b/synapseclient/core/constants/concrete_types.py
index fba11dbdb..f34fc3887 100644
--- a/synapseclient/core/constants/concrete_types.py
+++ b/synapseclient/core/constants/concrete_types.py
@@ -9,7 +9,23 @@
EXTERNAL_S3_STORAGE_LOCATION_SETTING = (
"org.sagebionetworks.repo.model.project.ExternalS3StorageLocationSetting"
)
-# EXTERNAL_GCP_STORAGE_LOCATION_SETTING = 'org.sagebionetworks.repo.model.project.ExternalGoogleCloudStorageLocationSetting' # noqa: E501
+EXTERNAL_GCP_STORAGE_LOCATION_SETTING = (
+ "org.sagebionetworks.repo.model.project.ExternalGoogleCloudStorageLocationSetting"
+)
+EXTERNAL_STORAGE_LOCATION_SETTING = (
+ "org.sagebionetworks.repo.model.project.ExternalStorageLocationSetting"
+)
+EXTERNAL_OBJECT_STORAGE_LOCATION_SETTING = (
+ "org.sagebionetworks.repo.model.project.ExternalObjectStorageLocationSetting"
+)
+PROXY_STORAGE_LOCATION_SETTINGS = (
+ "org.sagebionetworks.repo.model.project.ProxyStorageLocationSettings"
+)
+
+# Concrete types for ProjectSettings
+UPLOAD_DESTINATION_LIST_SETTING = (
+ "org.sagebionetworks.repo.model.project.UploadDestinationListSetting"
+)
# Concrete types for UploadDestinations
SYNAPSE_S3_UPLOAD_DESTINATION = (
@@ -117,6 +133,14 @@
"org.sagebionetworks.repo.model.curation.metadata.RecordBasedMetadataTaskProperties"
)
+# Download List Types
+DOWNLOAD_LIST_MANIFEST_REQUEST = (
+ "org.sagebionetworks.repo.model.download.DownloadListManifestRequest"
+)
+DOWNLOAD_LIST_MANIFEST_RESPONSE = (
+ "org.sagebionetworks.repo.model.download.DownloadListManifestResponse"
+)
+
# Grid Session Types
CREATE_GRID_REQUEST = "org.sagebionetworks.repo.model.grid.CreateGridRequest"
GRID_RECORD_SET_EXPORT_REQUEST = (
diff --git a/synapseclient/models/__init__.py b/synapseclient/models/__init__.py
index 554de0bc2..9d5bc90b0 100644
--- a/synapseclient/models/__init__.py
+++ b/synapseclient/models/__init__.py
@@ -14,6 +14,7 @@
RecordBasedMetadataTaskProperties,
)
from synapseclient.models.dataset import Dataset, DatasetCollection, EntityRef
+from synapseclient.models.download_list import DownloadListManifestRequest
from synapseclient.models.entityview import EntityView, ViewTypeMask
from synapseclient.models.evaluation import Evaluation
from synapseclient.models.file import File, FileHandle
@@ -21,11 +22,20 @@
from synapseclient.models.form import FormData, FormGroup
from synapseclient.models.link import Link
from synapseclient.models.materializedview import MaterializedView
+from synapseclient.models.mixins.manifest import (
+ DEFAULT_GENERATED_MANIFEST_KEYS,
+ MANIFEST_FILENAME,
+)
from synapseclient.models.mixins.table_components import QueryMixin
from synapseclient.models.project import Project
from synapseclient.models.recordset import RecordSet
from synapseclient.models.schema_organization import JSONSchema, SchemaOrganization
from synapseclient.models.services import FailureStrategy
+from synapseclient.models.storage_location import (
+ StorageLocation,
+ StorageLocationType,
+ UploadType,
+)
from synapseclient.models.submission import Submission
from synapseclient.models.submission_bundle import SubmissionBundle
from synapseclient.models.submission_status import SubmissionStatus
@@ -153,6 +163,15 @@
# Form models
"FormGroup",
"FormData",
+ # Storage Location models
+ "StorageLocation",
+ "StorageLocationType",
+ "UploadType",
+ # Manifest constants
+ "MANIFEST_FILENAME",
+ "DEFAULT_GENERATED_MANIFEST_KEYS",
+ # Download List models
+ "DownloadListManifestRequest",
]
# Static methods to expose as functions
diff --git a/synapseclient/models/download_list.py b/synapseclient/models/download_list.py
new file mode 100644
index 000000000..e1c0eb866
--- /dev/null
+++ b/synapseclient/models/download_list.py
@@ -0,0 +1,224 @@
+"""Models for interacting with Synapse's Download List functionality.
+
+This module provides classes for generating manifest files from a user's download list
+using the Synapse Asynchronous Job service.
+
+See: https://rest-docs.synapse.org/rest/POST/download/list/manifest/async/start.html
+"""
+
+from dataclasses import dataclass, field
+from typing import Any, Dict, Optional
+
+from typing_extensions import Self
+
+from synapseclient import Synapse
+from synapseclient.core.async_utils import async_to_sync, otel_trace_method
+from synapseclient.core.constants.concrete_types import DOWNLOAD_LIST_MANIFEST_REQUEST
+from synapseclient.core.download import download_by_file_handle
+from synapseclient.core.utils import delete_none_keys
+from synapseclient.models.mixins.asynchronous_job import AsynchronousCommunicator
+from synapseclient.models.protocols.download_list_protocol import (
+ DownloadListManifestRequestSynchronousProtocol,
+)
+from synapseclient.models.table_components import CsvTableDescriptor
+
+
+@dataclass
+@async_to_sync
+class DownloadListManifestRequest(
+ DownloadListManifestRequestSynchronousProtocol, AsynchronousCommunicator
+):
+ """
+ A request to generate a manifest file (CSV) of the current user's download list.
+
+ This class uses the Synapse Asynchronous Job service to generate a manifest file
+ containing metadata about files in the user's download list. The manifest can be
+ used to download files or for record-keeping purposes.
+
+ See: https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/model/download/DownloadListManifestRequest.html
+
+ Attributes:
+ csv_table_descriptor: Optional CSV formatting options for the manifest.
+ result_file_handle_id: The file handle ID of the generated manifest (populated after completion).
+
+ Example: Generate a manifest from download list
+ Generate a CSV manifest from your download list:
+
+ from synapseclient.models import DownloadListManifestRequest
+ import synapseclient
+
+ synapseclient.login()
+
+ # Create and send the request
+ request = DownloadListManifestRequest()
+ request.send_job_and_wait()
+
+ print(f"Manifest file handle: {request.result_file_handle_id}")
+
+ Example: Generate manifest with custom CSV formatting
+ Use custom separator and quote characters:
+
+ from synapseclient.models import DownloadListManifestRequest, CsvTableDescriptor
+ import synapseclient
+
+ synapseclient.login()
+
+ request = DownloadListManifestRequest(
+ csv_table_descriptor=CsvTableDescriptor(
+ separator="\t", # Tab-separated
+ is_first_line_header=True
+ )
+ )
+ request.send_job_and_wait()
+ """
+
+ concrete_type: str = field(
+ default=DOWNLOAD_LIST_MANIFEST_REQUEST, repr=False, compare=False
+ )
+ """The concrete type of this request."""
+
+ csv_table_descriptor: Optional[CsvTableDescriptor] = None
+ """Optional CSV formatting options for the manifest file."""
+
+ result_file_handle_id: Optional[str] = None
+ """The file handle ID of the generated manifest file. Populated after the job completes."""
+
+ def to_synapse_request(self) -> Dict[str, Any]:
+ """
+ Convert this request to the format expected by the Synapse REST API.
+
+ Returns:
+ A dictionary containing the request body for the Synapse API.
+ """
+ request = {
+ "concreteType": self.concrete_type,
+ }
+ if self.csv_table_descriptor:
+ request[
+ "csvTableDescriptor"
+ ] = self.csv_table_descriptor.to_synapse_request()
+ delete_none_keys(request)
+ return request
+
+ def fill_from_dict(self, synapse_response: Dict[str, Any]) -> Self:
+ """
+ Populate this object from a Synapse REST API response.
+
+ Arguments:
+ synapse_response: The response from the REST API.
+
+ Returns:
+ This object with fields populated from the response.
+ """
+ self.result_file_handle_id = synapse_response.get("resultFileHandleId", None)
+ return self
+
+ @otel_trace_method(
+ method_to_trace_name=lambda self, **kwargs: "DownloadListManifestRequest_send_job_and_wait"
+ )
+ async def send_job_and_wait_async(
+ self,
+ post_exchange_args: Optional[Dict[str, Any]] = None,
+ timeout: int = 120,
+ *,
+ synapse_client: Optional[Synapse] = None,
+ ) -> Self:
+ """Send the job to the Asynchronous Job service and wait for it to complete.
+
+ This method sends the manifest generation request to Synapse and waits
+ for the job to complete. After completion, the `result_file_handle_id`
+ attribute will be populated.
+
+ Arguments:
+ post_exchange_args: Additional arguments to pass to the request.
+ timeout: The number of seconds to wait for the job to complete or progress
+ before raising a SynapseTimeoutError. Defaults to 120.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ This instance with `result_file_handle_id` populated.
+
+ Raises:
+ SynapseTimeoutError: If the job does not complete within the timeout.
+ SynapseError: If the job fails.
+
+ Example: Generate a manifest
+ Generate a manifest from the download list:
+
+ from synapseclient.models import DownloadListManifestRequest
+ import synapseclient
+
+ synapseclient.login()
+
+ request = DownloadListManifestRequest()
+ request.send_job_and_wait()
+ print(f"Manifest file handle: {request.result_file_handle_id}")
+ """
+ return await super().send_job_and_wait_async(
+ post_exchange_args=post_exchange_args,
+ timeout=timeout,
+ synapse_client=synapse_client,
+ )
+
+ @otel_trace_method(
+ method_to_trace_name=lambda self, **kwargs: "DownloadListManifestRequest_download_manifest"
+ )
+ async def download_manifest_async(
+ self,
+ download_path: str,
+ *,
+ synapse_client: Optional[Synapse] = None,
+ ) -> str:
+ """
+ Download the generated manifest file to a local path.
+
+ This method should be called after `send_job_and_wait()` has completed
+ successfully and `result_file_handle_id` is populated.
+
+ Arguments:
+ download_path: The local directory path where the manifest will be saved.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ The full path to the downloaded manifest file.
+
+ Raises:
+ ValueError: If the manifest has not been generated yet (no result_file_handle_id).
+
+ Example: Download the manifest after generation
+ Generate and download a manifest:
+
+ from synapseclient.models import DownloadListManifestRequest
+ import synapseclient
+
+ synapseclient.login()
+
+ request = DownloadListManifestRequest()
+ request.send_job_and_wait()
+
+ manifest_path = request.download_manifest(download_path="/path/to/download")
+ print(f"Manifest downloaded to: {manifest_path}")
+ """
+ if not self.result_file_handle_id:
+ raise ValueError(
+ "Manifest has not been generated yet. "
+ "Call send_job_and_wait() before downloading."
+ )
+
+ # Download the file handle using the download module
+ # For download list manifests, the synapse_id parameter is set to the file handle ID
+ # because these manifests are not associated with a specific entity. The download
+ # service handles this case by using the file handle directly.
+ downloaded_path = await download_by_file_handle(
+ file_handle_id=self.result_file_handle_id,
+ synapse_id=self.result_file_handle_id,
+ entity_type="FileEntity",
+ destination=download_path,
+ synapse_client=synapse_client,
+ )
+
+ return downloaded_path
diff --git a/synapseclient/models/folder.py b/synapseclient/models/folder.py
index a0658f521..c4d4e0718 100644
--- a/synapseclient/models/folder.py
+++ b/synapseclient/models/folder.py
@@ -18,6 +18,10 @@
ContainerEntityJSONSchema,
StorableContainer,
)
+from synapseclient.models.mixins.manifest import ManifestGeneratable
+from synapseclient.models.mixins.storage_location_mixin import (
+ StorageLocationConfigurable,
+)
from synapseclient.models.protocols.folder_protocol import FolderSynchronousProtocol
from synapseclient.models.services.search import get_id
from synapseclient.models.services.storable_entity import store_entity
@@ -47,6 +51,8 @@ class Folder(
AccessControllable,
StorableContainer,
ContainerEntityJSONSchema,
+ StorageLocationConfigurable,
+ ManifestGeneratable,
):
"""Folder is a hierarchical container for organizing data in Synapse.
diff --git a/synapseclient/models/mixins/__init__.py b/synapseclient/models/mixins/__init__.py
index 62ddcf017..491ea9616 100644
--- a/synapseclient/models/mixins/__init__.py
+++ b/synapseclient/models/mixins/__init__.py
@@ -20,11 +20,20 @@
JSONSchemaValidationStatistics,
ValidationException,
)
+from synapseclient.models.mixins.manifest import (
+ DEFAULT_GENERATED_MANIFEST_KEYS,
+ MANIFEST_FILENAME,
+ ManifestGeneratable,
+)
from synapseclient.models.mixins.storable_container import StorableContainer
+from synapseclient.models.mixins.storage_location_mixin import (
+ StorageLocationConfigurable,
+)
__all__ = [
"AccessControllable",
"StorableContainer",
+ "StorageLocationConfigurable",
"AsynchronousCommunicator",
"BaseJSONSchema",
"ContainerEntityJSONSchema",
@@ -40,4 +49,7 @@
"FormChangeRequest",
"FormSubmissionStatus",
"StateEnum",
+ "ManifestGeneratable",
+ "MANIFEST_FILENAME",
+ "DEFAULT_GENERATED_MANIFEST_KEYS",
]
diff --git a/synapseclient/models/mixins/asynchronous_job.py b/synapseclient/models/mixins/asynchronous_job.py
index fd3649bc1..407babe92 100644
--- a/synapseclient/models/mixins/asynchronous_job.py
+++ b/synapseclient/models/mixins/asynchronous_job.py
@@ -14,6 +14,7 @@
AGENT_CHAT_REQUEST,
CREATE_GRID_REQUEST,
CREATE_SCHEMA_REQUEST,
+ DOWNLOAD_LIST_MANIFEST_REQUEST,
GET_VALIDATION_SCHEMA_REQUEST,
GRID_RECORD_SET_EXPORT_REQUEST,
QUERY_BUNDLE_REQUEST,
@@ -29,6 +30,7 @@
ASYNC_JOB_URIS = {
AGENT_CHAT_REQUEST: "/agent/chat/async",
CREATE_GRID_REQUEST: "/grid/session/async",
+ DOWNLOAD_LIST_MANIFEST_REQUEST: "/download/list/manifest/async",
GRID_RECORD_SET_EXPORT_REQUEST: "/grid/export/recordset/async",
TABLE_UPDATE_TRANSACTION_REQUEST: "/entity/{entityId}/table/transaction/async",
GET_VALIDATION_SCHEMA_REQUEST: "/schema/type/validation/async",
diff --git a/synapseclient/models/mixins/manifest.py b/synapseclient/models/mixins/manifest.py
new file mode 100644
index 000000000..785a9c7b9
--- /dev/null
+++ b/synapseclient/models/mixins/manifest.py
@@ -0,0 +1,950 @@
+"""Mixin for objects that can generate and read manifest TSV files."""
+
+import csv
+import datetime
+import io
+import os
+import re
+import sys
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
+
+from synapseclient import Synapse
+from synapseclient.core import utils
+from synapseclient.core.async_utils import async_to_sync, otel_trace_method
+from synapseclient.core.utils import is_synapse_id_str, is_url, topolgical_sort
+from synapseclient.models.protocols.manifest_protocol import (
+ ManifestGeneratableSynchronousProtocol,
+)
+
+if TYPE_CHECKING:
+ from synapseclient.models import File
+
+# When new fields are added to the manifest they will also need to be added to
+# file.py#_determine_fields_to_ignore_in_merge
+REQUIRED_FIELDS = ["path", "parent"]
+FILE_CONSTRUCTOR_FIELDS = ["name", "id", "synapseStore", "contentType"]
+STORE_FUNCTION_FIELDS = ["activityName", "activityDescription", "forceVersion"]
+PROVENANCE_FIELDS = ["used", "executed"]
+MANIFEST_FILENAME = "SYNAPSE_METADATA_MANIFEST.tsv"
+DEFAULT_GENERATED_MANIFEST_KEYS = [
+ "path",
+ "parent",
+ "name",
+ "id",
+ "synapseStore",
+ "contentType",
+ "used",
+ "executed",
+ "activityName",
+ "activityDescription",
+]
+ARRAY_BRACKET_PATTERN = re.compile(r"^\[.*\]$")
+SINGLE_OPEN_BRACKET_PATTERN = re.compile(r"^\[")
+SINGLE_CLOSING_BRACKET_PATTERN = re.compile(r"\]$")
+# https://stackoverflow.com/questions/18893390/splitting-on-comma-outside-quotes
+COMMAS_OUTSIDE_DOUBLE_QUOTES_PATTERN = re.compile(r",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)")
+
+
+def _manifest_filename(path: str) -> str:
+ """Get the full path to the manifest file.
+
+ Arguments:
+ path: The directory where the manifest file will be created.
+
+ Returns:
+ The full path to the manifest file.
+ """
+ return os.path.join(path, MANIFEST_FILENAME)
+
+
+def _convert_manifest_data_items_to_string_list(
+ items: List[Union[str, datetime.datetime, bool, int, float]],
+) -> str:
+ """
+ Handle converting an individual key that contains a possible list of data into a
+ list of strings or objects that can be written to the manifest file.
+
+ This has specific logic around how to handle datetime fields.
+
+ When working with datetime fields we are printing the ISO 8601 UTC representation of
+ the datetime.
+
+ When working with non strings we are printing the non-quoted version of the object.
+
+ Example: Examples
+ Several examples of how this function works.
+
+ >>> _convert_manifest_data_items_to_string_list(["a", "b", "c"])
+ '[a,b,c]'
+ >>> _convert_manifest_data_items_to_string_list(["string,with,commas", "string without commas"])
+ '["string,with,commas",string without commas]'
+ >>> _convert_manifest_data_items_to_string_list(["string,with,commas"])
+ 'string,with,commas'
+ >>> _convert_manifest_data_items_to_string_list(
+ [datetime.datetime(2020, 1, 1, 0, 0, 0, 0, tzinfo=datetime.timezone.utc)])
+ '2020-01-01T00:00:00Z'
+ >>> _convert_manifest_data_items_to_string_list([True])
+ 'True'
+ >>> _convert_manifest_data_items_to_string_list([1])
+ '1'
+ >>> _convert_manifest_data_items_to_string_list([1.0])
+ '1.0'
+ >>> _convert_manifest_data_items_to_string_list(
+ [datetime.datetime(2020, 1, 1, 0, 0, 0, 0, tzinfo=datetime.timezone.utc),
+ datetime.datetime(2021, 1, 1, 0, 0, 0, 0, tzinfo=datetime.timezone.utc)])
+ '[2020-01-01T00:00:00Z,2021-01-01T00:00:00Z]'
+
+
+ Args:
+ items: The list of items to convert.
+
+ Returns:
+ The list of items converted to strings.
+ """
+ items_to_write = []
+ for item in items:
+ if isinstance(item, datetime.datetime):
+ items_to_write.append(
+ utils.datetime_to_iso(dt=item, include_milliseconds_if_zero=False)
+ )
+ else:
+ # If a string based annotation has a comma in it
+ # this will wrap the string in quotes so it won't be parsed
+ # as multiple values. For example this is an annotation with 2 values:
+ # [my first annotation, "my, second, annotation"]
+ # This is an annotation with 4 value:
+ # [my first annotation, my, second, annotation]
+ if isinstance(item, str):
+ if len(items) > 1 and "," in item:
+ items_to_write.append(f'"{item}"')
+ else:
+ items_to_write.append(item)
+ else:
+ items_to_write.append(repr(item))
+
+ if len(items_to_write) > 1:
+ return f'[{",".join(items_to_write)}]'
+ elif len(items_to_write) == 1:
+ return items_to_write[0]
+ else:
+ return ""
+
+
+def _convert_manifest_data_row_to_dict(row: dict, keys: List[str]) -> dict:
+ """
+ Convert a row of data to a dict that can be written to a manifest file.
+
+ Args:
+ row: The row of data to convert.
+ keys: The keys of the manifest. Used to select the rows of data.
+
+ Returns:
+ The dict representation of the row.
+ """
+ data_to_write = {}
+ for key in keys:
+ data_for_key = row.get(key, "")
+ if isinstance(data_for_key, list):
+ items_to_write = _convert_manifest_data_items_to_string_list(data_for_key)
+ data_to_write[key] = items_to_write
+ else:
+ data_to_write[key] = data_for_key
+ return data_to_write
+
+
+def _write_manifest_data(filename: str, keys: List[str], data: List[dict]) -> None:
+ """
+ Write a number of keys and a list of data to a manifest file. This will write
+ the data out as a tab separated file.
+
+ For the data we are writing to the TSV file we are not quoting the content with any
+ characters. This is because the syncToSynapse function does not require strings to
+ be quoted. When quote characters were included extra double quotes were being added
+ to the strings when they were written to the manifest file. This was not causing
+ errors, however, it was changing the content of the manifest file when changes
+ were not required.
+
+ Args:
+ filename: The name of the file to write to.
+ keys: The keys of the manifest.
+ data: The data to write to the manifest. This should be a list of dicts where
+ each dict represents a row of data.
+ """
+ with io.open(filename, "w", encoding="utf8") if filename else sys.stdout as fp:
+ csv_writer = csv.DictWriter(
+ fp,
+ keys,
+ restval="",
+ extrasaction="ignore",
+ delimiter="\t",
+ quotechar=None,
+ quoting=csv.QUOTE_NONE,
+ )
+ csv_writer.writeheader()
+ for row in data:
+ csv_writer.writerow(rowdict=_convert_manifest_data_row_to_dict(row, keys))
+
+
+def _extract_entity_metadata_for_file(
+ all_files: List["File"],
+) -> Tuple[List[str], List[Dict[str, str]]]:
+ """
+ Extracts metadata from the list of File Entities and returns them in a form
+ usable by csv.DictWriter
+
+ Arguments:
+ all_files: an iterable that provides File entities
+
+ Returns:
+ keys: a list column headers
+ data: a list of dicts containing data from each row
+ """
+ keys = list(DEFAULT_GENERATED_MANIFEST_KEYS)
+ annotation_keys = set()
+ data = []
+ for entity in all_files:
+ row = {
+ "parent": entity.parent_id,
+ "path": entity.path,
+ "name": entity.name,
+ "id": entity.id,
+ "synapseStore": entity.synapse_store,
+ "contentType": entity.content_type,
+ }
+
+ if entity.annotations:
+ annotation_keys.update(set(entity.annotations.keys()))
+ row.update(
+ {
+ key: (val if len(val) > 0 else "")
+ for key, val in entity.annotations.items()
+ }
+ )
+
+ row_provenance = _get_entity_provenance_dict_for_file(entity=entity)
+ row.update(row_provenance)
+
+ data.append(row)
+ keys.extend(annotation_keys)
+ return keys, data
+
+
+def _get_entity_provenance_dict_for_file(entity: "File") -> Dict[str, str]:
+ """
+ Arguments:
+ entity: File entity object
+
+ Returns:
+ dict: a dict with a subset of the provenance metadata for the entity.
+ An empty dict is returned if the metadata does not have a provenance record.
+ """
+ if not entity.activity:
+ return {}
+
+ used_activities = []
+ for used_activity in entity.activity.used:
+ used_activities.append(used_activity.format_for_manifest())
+
+ executed_activities = []
+ for executed_activity in entity.activity.executed:
+ executed_activities.append(executed_activity.format_for_manifest())
+
+ return {
+ "used": ";".join(used_activities),
+ "executed": ";".join(executed_activities),
+ "activityName": entity.activity.name or "",
+ "activityDescription": entity.activity.description or "",
+ }
+
+
+def _validate_manifest_required_fields(
+ manifest_path: str,
+) -> Tuple[bool, List[str]]:
+ """
+ Validate that a manifest file exists and has the required fields.
+
+ Args:
+ manifest_path: Path to the manifest file.
+
+ Returns:
+ Tuple of (is_valid, list_of_error_messages).
+ """
+ errors = []
+
+ if not os.path.isfile(manifest_path):
+ errors.append(f"Manifest file not found: {manifest_path}")
+ return (False, errors)
+
+ try:
+ with io.open(manifest_path, "r", encoding="utf8") as fp:
+ reader = csv.DictReader(fp, delimiter="\t")
+ headers = reader.fieldnames or []
+
+ # Check for required fields
+ for field in REQUIRED_FIELDS:
+ if field not in headers:
+ errors.append(f"Missing required field: {field}")
+
+ # Validate each row
+ row_num = 1
+ for row in reader:
+ row_num += 1
+ path = row.get("path", "")
+ parent = row.get("parent", "")
+
+ if not path:
+ errors.append(f"Row {row_num}: 'path' is empty")
+
+ if not parent:
+ errors.append(f"Row {row_num}: 'parent' is empty")
+ elif not is_synapse_id_str(parent) and not is_url(parent):
+ errors.append(
+ f"Row {row_num}: 'parent' is not a valid Synapse ID: {parent}"
+ )
+
+ # Check if path exists (skip URLs)
+ if path and not is_url(path):
+ expanded_path = os.path.abspath(
+ os.path.expandvars(os.path.expanduser(path))
+ )
+ if not os.path.isfile(expanded_path):
+ errors.append(f"Row {row_num}: File not found: {path}")
+
+ except Exception as e:
+ errors.append(f"Error reading manifest file: {str(e)}")
+
+ return (len(errors) == 0, errors)
+
+
+@async_to_sync
+class ManifestGeneratable(ManifestGeneratableSynchronousProtocol):
+ """
+ Mixin for objects that can generate and read manifest TSV files.
+
+ In order to use this mixin, the class must have the following attributes:
+
+ - `id`
+ - `name`
+ - `_synced_from_synapse`
+
+ The class must also inherit from `StorableContainer` mixin which provides:
+
+ - `flatten_file_list()`
+ - `map_directory_to_all_contained_files()`
+ """
+
+ id: Optional[str] = None
+ name: Optional[str] = None
+ _synced_from_synapse: bool = False
+
+ @otel_trace_method(
+ method_to_trace_name=lambda self, **kwargs: f"{self.__class__.__name__}_generate_manifest: {self.id}"
+ )
+ async def generate_manifest_async(
+ self,
+ path: str,
+ manifest_scope: str = "all",
+ *,
+ synapse_client: Optional[Synapse] = None,
+ ) -> Optional[str]:
+ """
+ Generate a manifest TSV file for all files in this container.
+
+ This method should be called after `sync_from_synapse()` to generate
+ a manifest of all downloaded files with their metadata.
+
+ Arguments:
+ path: The directory where the manifest file(s) will be written.
+ manifest_scope: Controls manifest file generation:
+
+ - "all": Create a manifest in each directory level
+ - "root": Create a single manifest at the root path only
+ - "suppress": Do not create any manifest files
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ The path to the root manifest file if created, or None if suppressed.
+
+ Raises:
+ ValueError: If the container has not been synced from Synapse.
+ ValueError: If manifest_scope is not one of 'all', 'root', 'suppress'.
+
+ Example: Generate manifest after sync
+ Generate a manifest file after syncing from Synapse:
+
+ from synapseclient.models import Project
+
+ import synapseclient
+ synapseclient.login()
+
+ project = Project(id="syn123").sync_from_synapse(
+ path="/path/to/download"
+ )
+ manifest_path = project.generate_manifest(
+ path="/path/to/download",
+ manifest_scope="root"
+ )
+ print(f"Manifest created at: {manifest_path}")
+ """
+ if manifest_scope not in ("all", "root", "suppress"):
+ raise ValueError(
+ 'Value of manifest_scope should be one of ("all", "root", "suppress")'
+ )
+
+ if manifest_scope == "suppress":
+ return None
+
+ if not self._synced_from_synapse:
+ raise ValueError(
+ "Container has not been synced from Synapse. "
+ "Call sync_from_synapse() before generating a manifest."
+ )
+
+ syn = Synapse.get_client(synapse_client=synapse_client)
+
+ # Expand the path
+ path = os.path.expanduser(path) if path else None
+ if not path:
+ raise ValueError("A path must be provided to generate a manifest.")
+
+ # Get all files from this container
+ all_files = self.flatten_file_list()
+
+ if not all_files:
+ syn.logger.info(
+ f"[{self.id}:{self.name}]: No files found in container, "
+ "skipping manifest generation."
+ )
+ return None
+
+ root_manifest_path = None
+
+ if manifest_scope == "root":
+ # Generate a single manifest at the root
+ keys, data = _extract_entity_metadata_for_file(all_files=all_files)
+ manifest_path = _manifest_filename(path)
+ _write_manifest_data(manifest_path, keys, data)
+ root_manifest_path = manifest_path
+ syn.logger.info(
+ f"[{self.id}:{self.name}]: Created manifest at {manifest_path}"
+ )
+ elif manifest_scope == "all":
+ # Generate a manifest at each directory level
+ directory_map = self.map_directory_to_all_contained_files(root_path=path)
+
+ for directory_path, files_in_directory in directory_map.items():
+ if files_in_directory:
+ keys, data = _extract_entity_metadata_for_file(
+ all_files=files_in_directory
+ )
+ manifest_path = _manifest_filename(directory_path)
+ _write_manifest_data(manifest_path, keys, data)
+
+ # Track the root manifest path
+ if directory_path == path:
+ root_manifest_path = manifest_path
+
+ syn.logger.info(
+ f"[{self.id}:{self.name}]: Created manifest at {manifest_path}"
+ )
+
+ return root_manifest_path
+
+ @otel_trace_method(
+ method_to_trace_name=lambda self, **kwargs: f"{self.__class__.__name__}_get_manifest_data: {self.id}"
+ )
+ async def get_manifest_data_async(
+ self,
+ *,
+ synapse_client: Optional[Synapse] = None,
+ ) -> Tuple[List[str], List[Dict[str, str]]]:
+ """
+ Get manifest data for all files in this container.
+
+ This method extracts metadata from all files that have been synced
+ to this container. The data can be used to generate a manifest file
+ or for other purposes.
+
+ Arguments:
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ Tuple of (keys, data) where keys is a list of column headers
+ and data is a list of dictionaries, one per file, containing
+ the file metadata.
+
+ Raises:
+ ValueError: If the container has not been synced from Synapse.
+
+ Example: Get manifest data
+ Get manifest data for all files in a project:
+
+ from synapseclient.models import Project
+
+ import synapseclient
+ synapseclient.login()
+
+ project = Project(id="syn123").sync_from_synapse(
+ path="/path/to/download"
+ )
+ keys, data = project.get_manifest_data()
+ for row in data:
+ print(f"File: {row['name']} at {row['path']}")
+ """
+ if not self._synced_from_synapse:
+ raise ValueError(
+ "Container has not been synced from Synapse. "
+ "Call sync_from_synapse() before getting manifest data."
+ )
+
+ all_files = self.flatten_file_list()
+ return _extract_entity_metadata_for_file(all_files=all_files)
+
+ @classmethod
+ @otel_trace_method(
+ method_to_trace_name=lambda cls, **kwargs: f"{cls.__name__}_from_manifest"
+ )
+ async def from_manifest_async(
+ cls,
+ manifest_path: str,
+ parent_id: str,
+ dry_run: bool = False,
+ merge_existing_annotations: bool = True,
+ associate_activity_to_new_version: bool = False,
+ *,
+ synapse_client: Optional[Synapse] = None,
+ ) -> List["File"]:
+ """
+ Upload files to Synapse from a manifest TSV file.
+
+ This method reads a manifest TSV file and uploads all files defined in it
+ to Synapse. The manifest file must contain at minimum the 'path' and 'parent'
+ columns.
+
+ Arguments:
+ manifest_path: Path to the manifest TSV file.
+ parent_id: The Synapse ID of the parent container (Project or Folder)
+ where files will be uploaded if not specified in the manifest.
+ dry_run: If True, validate the manifest but do not upload.
+ merge_existing_annotations: If True, merge annotations with existing
+ annotations on the file. If False, replace existing annotations.
+ associate_activity_to_new_version: If True, copy the activity
+ (provenance) from the previous version to the new version.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ List of File objects that were uploaded.
+
+ Raises:
+ ValueError: If the manifest file does not exist.
+ ValueError: If the manifest file is missing required fields.
+ IOError: If a file path in the manifest does not exist.
+
+ Example: Upload files from a manifest
+ Upload files from a manifest TSV file:
+
+ from synapseclient.models import Project
+
+ import synapseclient
+ synapseclient.login()
+
+ files = Project.from_manifest(
+ manifest_path="/path/to/manifest.tsv",
+ parent_id="syn123"
+ )
+ for file in files:
+ print(f"Uploaded: {file.name} ({file.id})")
+
+ Example: Dry run validation
+ Validate a manifest without uploading:
+
+ from synapseclient.models import Project
+
+ import synapseclient
+ synapseclient.login()
+
+ files = Project.from_manifest(
+ manifest_path="/path/to/manifest.tsv",
+ parent_id="syn123",
+ dry_run=True
+ )
+ print("Manifest is valid, ready for upload")
+ """
+ from synapseclient.models import Activity, File
+
+ syn = Synapse.get_client(synapse_client=synapse_client)
+
+ # Validate the manifest
+ is_valid, errors = _validate_manifest_required_fields(manifest_path)
+ if not is_valid:
+ raise ValueError(
+ "Invalid manifest file:\n" + "\n".join(f" - {e}" for e in errors)
+ )
+
+ # Read the manifest
+ rows = []
+ with io.open(manifest_path, "r", encoding="utf8") as fp:
+ reader = csv.DictReader(fp, delimiter="\t")
+ for row in reader:
+ rows.append(row)
+
+ if dry_run:
+ syn.logger.info(
+ f"Dry run: {len(rows)} files would be uploaded from manifest"
+ )
+ return []
+
+ # Build dependency graph for provenance ordering
+ path_to_row = {}
+ upload_order = {}
+
+ for row in rows:
+ path = row.get("path", "")
+ if path and not is_url(path):
+ path = os.path.abspath(os.path.expandvars(os.path.expanduser(path)))
+ path_to_row[path] = row
+
+ # Collect provenance references
+ all_refs = []
+ used = row.get("used", "")
+ if used and used.strip():
+ for item in used.split(";"):
+ item = item.strip()
+ if item:
+ if os.path.isfile(
+ os.path.abspath(
+ os.path.expandvars(os.path.expanduser(item))
+ )
+ ):
+ all_refs.append(
+ os.path.abspath(
+ os.path.expandvars(os.path.expanduser(item))
+ )
+ )
+
+ executed = row.get("executed", "")
+ if executed and executed.strip():
+ for item in executed.split(";"):
+ item = item.strip()
+ if item:
+ if os.path.isfile(
+ os.path.abspath(
+ os.path.expandvars(os.path.expanduser(item))
+ )
+ ):
+ all_refs.append(
+ os.path.abspath(
+ os.path.expandvars(os.path.expanduser(item))
+ )
+ )
+
+ upload_order[path] = all_refs
+
+ # Topologically sort based on provenance dependencies
+ sorted_paths = topolgical_sort(upload_order)
+ sorted_paths = [p[0] for p in sorted_paths]
+
+ # Track uploaded files for provenance resolution
+ path_to_synapse_id: Dict[str, str] = {}
+ uploaded_files: List["File"] = []
+
+ for path in sorted_paths:
+ row = path_to_row[path]
+
+ # Get parent - use manifest value or fall back to provided parent_id
+ file_parent = row.get("parent", "").strip() or parent_id
+
+ # Build the File object
+ file = File(
+ path=path,
+ parent_id=file_parent,
+ name=row.get("name", "").strip() or None,
+ id=row.get("id", "").strip() or None,
+ synapse_store=(
+ row.get("synapseStore", "").strip().lower() != "false"
+ if row.get("synapseStore", "").strip()
+ else True
+ ),
+ content_type=row.get("contentType", "").strip() or None,
+ merge_existing_annotations=merge_existing_annotations,
+ associate_activity_to_new_version=associate_activity_to_new_version,
+ )
+
+ # Build annotations from extra columns
+ annotations = {}
+ skip_keys = set(
+ REQUIRED_FIELDS
+ + FILE_CONSTRUCTOR_FIELDS
+ + STORE_FUNCTION_FIELDS
+ + PROVENANCE_FIELDS
+ )
+ for key, value in row.items():
+ if key not in skip_keys and value and value.strip():
+ annotations[key] = _parse_manifest_value(value.strip())
+ if annotations:
+ file.annotations = annotations
+
+ # Build provenance/activity
+ used_items = []
+ executed_items = []
+
+ used_str = row.get("used", "")
+ if used_str and used_str.strip():
+ for item in used_str.split(";"):
+ item = item.strip()
+ if item:
+ used_items.append(
+ _resolve_provenance_item(item, path_to_synapse_id)
+ )
+
+ executed_str = row.get("executed", "")
+ if executed_str and executed_str.strip():
+ for item in executed_str.split(";"):
+ item = item.strip()
+ if item:
+ executed_items.append(
+ _resolve_provenance_item(item, path_to_synapse_id)
+ )
+
+ if used_items or executed_items:
+ activity = Activity(
+ name=row.get("activityName", "").strip() or None,
+ description=row.get("activityDescription", "").strip() or None,
+ used=used_items,
+ executed=executed_items,
+ )
+ file.activity = activity
+
+ # Upload the file
+ file = await file.store_async(synapse_client=syn)
+
+ # Track for provenance resolution
+ path_to_synapse_id[path] = file.id
+ uploaded_files.append(file)
+
+ syn.logger.info(f"Uploaded: {file.name} ({file.id})")
+
+ return uploaded_files
+
+ @staticmethod
+ @otel_trace_method(method_to_trace_name=lambda **kwargs: "validate_manifest")
+ async def validate_manifest_async(
+ manifest_path: str,
+ *,
+ synapse_client: Optional[Synapse] = None,
+ ) -> Tuple[bool, List[str]]:
+ """
+ Validate a manifest TSV file without uploading.
+
+ This method validates a manifest file to ensure it is properly formatted
+ and all paths exist.
+
+ Arguments:
+ manifest_path: Path to the manifest TSV file.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ Tuple of (is_valid, list_of_error_messages). If the manifest is valid,
+ is_valid will be True and the list will be empty.
+
+ Example: Validate a manifest file
+ Validate a manifest file before uploading:
+
+ from synapseclient.models import Project
+
+ is_valid, errors = Project.validate_manifest(
+ manifest_path="/path/to/manifest.tsv"
+ )
+ if is_valid:
+ print("Manifest is valid")
+ else:
+ for error in errors:
+ print(f"Error: {error}")
+ """
+ return _validate_manifest_required_fields(manifest_path)
+
+ @staticmethod
+ async def generate_download_list_manifest_async(
+ download_path: str,
+ csv_separator: str = ",",
+ include_header: bool = True,
+ timeout: int = 120,
+ *,
+ synapse_client: Optional[Synapse] = None,
+ ) -> str:
+ """
+ Generate a manifest file from the current user's download list using the
+ Synapse REST API.
+
+ This method creates a CSV manifest containing metadata about all files in
+ the user's download list. The manifest is generated server-side by Synapse
+ and then downloaded to the specified path.
+
+ This is interoperable with the Synapse download list feature and provides
+ a way to export the download list as a manifest file that can be used for
+ bulk operations.
+
+ Arguments:
+ download_path: The local directory path where the manifest will be saved.
+ csv_separator: The delimiter character for the CSV file.
+ Defaults to "," for comma-separated values. Use "\t" for tab-separated.
+ include_header: Whether to include column headers in the first row.
+ Defaults to True.
+ timeout: The number of seconds to wait for the job to complete.
+ Defaults to 120 seconds.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ The full path to the downloaded manifest file.
+
+ Example: Generate manifest from download list
+ Generate a manifest from your Synapse download list:
+
+ from synapseclient.models import Project
+
+ import synapseclient
+ synapseclient.login()
+
+ # Generate manifest from download list
+ manifest_path = Project.generate_download_list_manifest(
+ download_path="/path/to/download"
+ )
+ print(f"Manifest downloaded to: {manifest_path}")
+
+ Example: Generate tab-separated manifest
+ Generate a TSV manifest from your download list:
+
+ from synapseclient.models import Project
+
+ import synapseclient
+ synapseclient.login()
+
+ manifest_path = Project.generate_download_list_manifest(
+ download_path="/path/to/download",
+ csv_separator="\t"
+ )
+
+ See Also:
+ - `DownloadListManifestRequest`: The underlying request class for more
+ fine-grained control over the manifest generation process.
+ """
+ from synapseclient.models.download_list import DownloadListManifestRequest
+ from synapseclient.models.table_components import CsvTableDescriptor
+
+ # Create the request with CSV formatting options
+ request = DownloadListManifestRequest(
+ csv_table_descriptor=CsvTableDescriptor(
+ separator=csv_separator,
+ is_first_line_header=include_header,
+ )
+ )
+
+ # Send the job and wait for completion
+ await request.send_job_and_wait_async(
+ timeout=timeout,
+ synapse_client=synapse_client,
+ )
+
+ # Download the manifest
+ manifest_file_path = await request.download_manifest_async(
+ download_path=download_path,
+ synapse_client=synapse_client,
+ )
+
+ return manifest_file_path
+
+
+def _resolve_provenance_item(
+ item: str,
+ path_to_synapse_id: Dict[str, str],
+) -> Any:
+ """
+ Resolve a provenance item to a UsedEntity or UsedURL.
+
+ Args:
+ item: The provenance item string (could be a path, Synapse ID, or URL).
+ path_to_synapse_id: Mapping of local file paths to their Synapse IDs.
+
+ Returns:
+ UsedEntity or UsedURL object.
+ """
+ from synapseclient.models import UsedEntity, UsedURL
+
+ # Check if it's a local file path that was uploaded
+ expanded_path = os.path.abspath(os.path.expandvars(os.path.expanduser(item)))
+ if expanded_path in path_to_synapse_id:
+ return UsedEntity(target_id=path_to_synapse_id[expanded_path])
+
+ # Check if it's a URL
+ if is_url(item):
+ return UsedURL(url=item)
+
+ # Check if it's a Synapse ID
+ if is_synapse_id_str(item):
+ return UsedEntity(target_id=item)
+
+ # Assume it's a Synapse ID
+ return UsedEntity(target_id=item)
+
+
+def _parse_manifest_value(value: str) -> Any:
+ """
+ Parse a manifest cell value into an appropriate Python type.
+
+ Handles:
+ - List syntax: [a,b,c] -> ['a', 'b', 'c']
+ - Boolean strings: 'true', 'false' -> True, False
+ - Numeric strings: '123' -> 123, '1.5' -> 1.5
+ - Everything else: returned as string
+
+ Args:
+ value: The string value from the manifest.
+
+ Returns:
+ The parsed value.
+ """
+ # Check for list syntax
+ if ARRAY_BRACKET_PATTERN.match(value):
+ # Remove brackets
+ inner = value[1:-1]
+ # Split on commas outside quotes
+ items = COMMAS_OUTSIDE_DOUBLE_QUOTES_PATTERN.split(inner)
+ result = []
+ for item in items:
+ item = item.strip()
+ # Remove surrounding quotes if present
+ if item.startswith('"') and item.endswith('"'):
+ item = item[1:-1]
+ result.append(item)
+ return result
+
+ # Check for boolean
+ if value.lower() == "true":
+ return True
+ if value.lower() == "false":
+ return False
+
+ # Check for integer
+ try:
+ return int(value)
+ except ValueError:
+ pass
+
+ # Check for float
+ try:
+ return float(value)
+ except ValueError:
+ pass
+
+ # Return as string
+ return value
diff --git a/synapseclient/models/mixins/storable_container.py b/synapseclient/models/mixins/storable_container.py
index 25432a6b9..1a2d557f2 100644
--- a/synapseclient/models/mixins/storable_container.py
+++ b/synapseclient/models/mixins/storable_container.py
@@ -159,6 +159,7 @@ async def sync_from_synapse_async(
link_hops: int = 1,
queue: asyncio.Queue = None,
include_types: Optional[List[str]] = None,
+ generate_manifest: str = "suppress",
*,
synapse_client: Optional[Synapse] = None,
) -> Self:
@@ -170,9 +171,8 @@ async def sync_from_synapse_async(
If you only want to retrieve the full tree of metadata about your
container specify `download_file` as False.
- This works similar to [synapseutils.syncFromSynapse][], however, this does not
- currently support the writing of data to a manifest TSV file. This will be a
- future enhancement.
+ This works similar to [synapseutils.syncFromSynapse][] and supports
+ generating a manifest TSV file with file metadata.
Supports syncing Files, Folders, Tables, EntityViews, SubmissionViews, Datasets,
DatasetCollections, MaterializedViews, and VirtualTables from Synapse. The
@@ -208,6 +208,13 @@ async def sync_from_synapse_async(
`["folder", "file", "table", "entityview", "dockerrepo",
"submissionview", "dataset", "datasetcollection", "materializedview",
"virtualtable"]`.
+ generate_manifest: Controls manifest file generation. Options:
+
+ - "all": Create a manifest in each directory level
+ - "root": Create a single manifest at the root path only
+ - "suppress": (Default) Do not create any manifest files
+
+ A path must be specified for manifest generation.
synapse_client: If not passed in and caching was not disabled by
`Synapse.allow_client_caching(False)` this will use the last created
instance from the Synapse class constructor.
@@ -386,7 +393,7 @@ async def my_function():
file_size=1, synapse_client=syn, custom_message=custom_message
):
self._synced_from_synapse = True
- return await self._sync_from_synapse_async(
+ await self._sync_from_synapse_async(
path=path,
recursive=recursive,
download_file=download_file,
@@ -400,6 +407,19 @@ async def my_function():
synapse_client=syn,
)
+ # Generate manifest if requested and path is provided
+ if generate_manifest != "suppress" and path:
+ # The manifest generation is handled by ManifestGeneratable mixin
+ # which provides generate_manifest_async method
+ if hasattr(self, "generate_manifest_async"):
+ await self.generate_manifest_async(
+ path=path,
+ manifest_scope=generate_manifest,
+ synapse_client=syn,
+ )
+
+ return self
+
async def _sync_from_synapse_async(
self: Self,
path: Optional[str] = None,
diff --git a/synapseclient/models/mixins/storage_location_mixin.py b/synapseclient/models/mixins/storage_location_mixin.py
new file mode 100644
index 000000000..db3c509a8
--- /dev/null
+++ b/synapseclient/models/mixins/storage_location_mixin.py
@@ -0,0 +1,450 @@
+"""Mixin for entities that can have their storage location configured."""
+
+import asyncio
+from typing import Any, Dict, List, Optional, Union
+
+from synapseclient import Synapse
+from synapseclient.api.storage_location_services import (
+ create_project_setting,
+ delete_project_setting,
+ get_project_setting,
+ update_project_setting,
+)
+from synapseclient.core.async_utils import async_to_sync, otel_trace_method
+from synapseclient.core.constants import concrete_types
+from synapseclient.models.protocols.storage_location_mixin_protocol import (
+ StorageLocationConfigurableSynchronousProtocol,
+)
+from synapseclient.models.services.migration import (
+ index_files_for_migration_async as _index_files_for_migration_async,
+)
+from synapseclient.models.services.migration import (
+ migrate_indexed_files_async as _migrate_indexed_files_async,
+)
+from synapseclient.models.services.migration_types import MigrationResult
+
+# Default storage location ID used by Synapse
+DEFAULT_STORAGE_LOCATION_ID = 1
+
+
+@async_to_sync
+class StorageLocationConfigurable(StorageLocationConfigurableSynchronousProtocol):
+ """Mixin for objects that can have their storage location configured.
+
+ In order to use this mixin, the class must have an `id` attribute.
+
+ This mixin provides methods for:
+ - Setting and getting the upload storage location for an entity
+ - Getting STS (AWS Security Token Service) credentials for direct S3 access
+ - Migrating files to a new storage location
+ """
+
+ id: Optional[str] = None
+ """The unique immutable ID for this entity."""
+
+ @otel_trace_method(
+ method_to_trace_name=lambda self, **kwargs: f"Entity_SetStorageLocation: {self.id}"
+ )
+ async def set_storage_location_async(
+ self,
+ storage_location_id: Optional[Union[int, List[int]]] = None,
+ *,
+ synapse_client: Optional[Synapse] = None,
+ ) -> Dict[str, Any]:
+ """Set the upload storage location for this entity. This configures where
+ files uploaded to this entity will be stored.
+
+ Arguments:
+ storage_location_id: The storage location ID(s) to set. Can be a single
+ ID, a list of IDs (first is default, max 10), or None to use
+ Synapse default storage.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ The project setting dict returned from Synapse.
+
+ Raises:
+ ValueError: If the entity does not have an id set.
+
+ Example: Using this function
+ Set storage location on a folder:
+
+ import asyncio
+ from synapseclient import Synapse
+ from synapseclient.models import Folder
+
+ syn = Synapse()
+ syn.login()
+
+ async def main():
+ folder = await Folder(id="syn123").get_async()
+ setting = await folder.set_storage_location_async(
+ storage_location_id=12345
+ )
+ print(setting)
+
+ asyncio.run(main())
+ """
+ if not self.id:
+ raise ValueError("The entity must have an id set.")
+
+ if storage_location_id is None:
+ storage_location_id = DEFAULT_STORAGE_LOCATION_ID
+
+ locations = (
+ storage_location_id
+ if isinstance(storage_location_id, list)
+ else [storage_location_id]
+ )
+
+ existing_setting = await get_project_setting(
+ project_id=self.id,
+ setting_type="upload",
+ synapse_client=synapse_client,
+ )
+
+ if existing_setting is not None:
+ existing_setting["locations"] = locations
+ await update_project_setting(
+ body=existing_setting,
+ synapse_client=synapse_client,
+ )
+ return await get_project_setting(
+ project_id=self.id,
+ setting_type="upload",
+ synapse_client=synapse_client,
+ )
+ else:
+ project_destination = {
+ "concreteType": concrete_types.UPLOAD_DESTINATION_LIST_SETTING,
+ "settingsType": "upload",
+ "locations": locations,
+ "projectId": self.id,
+ }
+ return await create_project_setting(
+ body=project_destination,
+ synapse_client=synapse_client,
+ )
+
+ @otel_trace_method(
+ method_to_trace_name=lambda self, **kwargs: f"Entity_GetProjectSetting: {self.id}"
+ )
+ async def get_project_setting_async(
+ self,
+ setting_type: str = "upload",
+ *,
+ synapse_client: Optional[Synapse] = None,
+ ) -> Optional[Dict[str, Any]]:
+ """Get the project setting for this entity.
+
+ Arguments:
+ setting_type: The type of setting to retrieve. One of:
+ 'upload', 'external_sync', 'requester_pays'. Default: 'upload'.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ The project setting as a dictionary, or None if no setting exists.
+
+ Raises:
+ ValueError: If the entity does not have an id set.
+
+ Example: Using this function
+ Get the upload settings for a folder:
+
+ import asyncio
+ from synapseclient import Synapse
+ from synapseclient.models import Folder
+
+ syn = Synapse()
+ syn.login()
+
+ async def main():
+ folder = await Folder(id="syn123").get_async()
+ setting = await folder.get_project_setting_async(setting_type="upload")
+ if setting:
+ print(f"Storage locations: {setting.get('locations')}")
+
+ asyncio.run(main())
+ """
+ if not self.id:
+ raise ValueError("The entity must have an id set.")
+
+ if setting_type not in {"upload", "external_sync", "requester_pays"}:
+ raise ValueError(f"Invalid setting_type: {setting_type}")
+
+ return await get_project_setting(
+ project_id=self.id,
+ setting_type=setting_type,
+ synapse_client=synapse_client,
+ )
+
+ @otel_trace_method(
+ method_to_trace_name=lambda self, **kwargs: f"Entity_DeleteProjectSetting: {self.id}"
+ )
+ async def delete_project_setting_async(
+ self,
+ setting_id: str,
+ *,
+ synapse_client: Optional[Synapse] = None,
+ ) -> None:
+ """Delete a project setting by its setting ID.
+
+ Arguments:
+ setting_id: The ID of the project setting to delete.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ None
+
+ Raises:
+ ValueError: If the entity does not have an id set.
+
+ Example: Using this function
+ Delete the upload settings for a folder:
+
+ import asyncio
+ from synapseclient import Synapse
+ from synapseclient.models import Folder
+
+ syn = Synapse()
+ syn.login()
+
+ async def main():
+ folder = await Folder(id="syn123").get_async()
+ setting = await folder.get_project_setting_async(setting_type="upload")
+ if setting:
+ await folder.delete_project_setting_async(setting_id=setting['id'])
+
+ asyncio.run(main())
+ """
+ if not self.id:
+ raise ValueError("The entity must have an id set.")
+
+ await delete_project_setting(
+ setting_id=setting_id,
+ synapse_client=synapse_client,
+ )
+
+ @otel_trace_method(
+ method_to_trace_name=lambda self, **kwargs: f"Entity_GetStsStorageToken: {self.id}"
+ )
+ async def get_sts_storage_token_async(
+ self,
+ permission: str,
+ *,
+ output_format: str = "json",
+ min_remaining_life: Optional[int] = None,
+ synapse_client: Optional[Synapse] = None,
+ ) -> Any:
+ """Get STS (AWS Security Token Service) credentials for direct access to
+ the storage location backing this entity. These credentials can be used
+ with AWS tools like awscli and boto3.
+
+ Arguments:
+ permission: The permission level for the token. Must be 'read_only'
+ or 'read_write'.
+ output_format: The output format for the credentials. Options:
+ 'json' (default), 'boto', 'shell', 'bash', 'cmd', 'powershell'.
+ min_remaining_life: The minimum remaining life (in seconds) for a
+ cached token before a new one is fetched.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ The STS credentials in the requested format.
+
+ Raises:
+ ValueError: If the entity does not have an id set.
+
+ Example: Using credentials with boto3
+ Get STS credentials for an STS-enabled folder and use with boto3:
+
+ import asyncio
+ import boto3
+ from synapseclient import Synapse
+ from synapseclient.models import Folder
+
+ syn = Synapse()
+ syn.login()
+
+ async def main():
+ folder = await Folder(id="syn123").get_async()
+ credentials = await folder.get_sts_storage_token_async(
+ permission="read_write",
+ output_format="boto",
+ )
+ s3_client = boto3.client('s3', **credentials)
+
+ asyncio.run(main())
+ """
+ if not self.id:
+ raise ValueError("The entity must have an id set.")
+
+ from synapseclient.core import sts_transfer
+
+ client = Synapse.get_client(synapse_client=synapse_client)
+
+ return await asyncio.to_thread(
+ sts_transfer.get_sts_credentials,
+ client,
+ self.id,
+ permission,
+ output_format=output_format,
+ min_remaining_life=min_remaining_life,
+ )
+
+ @otel_trace_method(
+ method_to_trace_name=lambda self, **kwargs: f"Entity_IndexFilesForMigration: {self.id}"
+ )
+ async def index_files_for_migration_async(
+ self,
+ dest_storage_location_id: int,
+ db_path: Optional[str] = None,
+ *,
+ source_storage_location_ids: Optional[List[int]] = None,
+ file_version_strategy: str = "new",
+ include_table_files: bool = False,
+ continue_on_error: bool = False,
+ synapse_client: Optional[Synapse] = None,
+ ) -> MigrationResult:
+ """Index files in this entity for migration to a new storage location.
+
+ This is the first step in migrating files to a new storage location.
+ After indexing, use `migrate_indexed_files` to perform the actual migration.
+
+ Arguments:
+ dest_storage_location_id: The destination storage location ID.
+ db_path: Path to the SQLite database file for tracking migration state.
+ If not provided, a temporary directory will be used. The path
+ can be retrieved from the returned MigrationResult.db_path.
+ source_storage_location_ids: Optional list of source storage location IDs
+ to filter which files to migrate. If None, all files are indexed.
+ file_version_strategy: Strategy for handling file versions. Options:
+ 'new' (default) - create new versions, 'all' - migrate all versions,
+ 'latest' - only migrate latest version, 'skip' - skip if file exists.
+ include_table_files: Whether to include files attached to tables.
+ continue_on_error: Whether to continue indexing if an error occurs.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ A MigrationResult object containing indexing statistics and the database
+ path (accessible via result.db_path).
+
+ Example: Indexing files for migration
+ Index files in a project for migration:
+
+ import asyncio
+ from synapseclient import Synapse
+ from synapseclient.models import Project
+
+ syn = Synapse()
+ syn.login()
+
+ async def main():
+ project = await Project(id="syn123").get_async()
+ result = await project.index_files_for_migration_async(
+ dest_storage_location_id=12345,
+ )
+ print(f"Database path: {result.db_path}")
+ print(f"Indexed {result.counts_by_status}")
+
+ asyncio.run(main())
+ """
+ if not self.id:
+ raise ValueError("The entity must have an id set.")
+
+ return await _index_files_for_migration_async(
+ entity_id=self.id,
+ dest_storage_location_id=str(dest_storage_location_id),
+ db_path=db_path,
+ source_storage_location_ids=(
+ [str(s) for s in source_storage_location_ids]
+ if source_storage_location_ids
+ else None
+ ),
+ file_version_strategy=file_version_strategy,
+ include_table_files=include_table_files,
+ continue_on_error=continue_on_error,
+ synapse_client=synapse_client,
+ )
+
+ @otel_trace_method(
+ method_to_trace_name=lambda self, **kwargs: f"Entity_MigrateIndexedFiles: {self.id}"
+ )
+ async def migrate_indexed_files_async(
+ self,
+ db_path: str,
+ *,
+ create_table_snapshots: bool = True,
+ continue_on_error: bool = False,
+ force: bool = False,
+ synapse_client: Optional[Synapse] = None,
+ ) -> Optional[MigrationResult]:
+ """Migrate files that have been indexed with `index_files_for_migration`.
+
+ This is the second step in migrating files to a new storage location.
+ Files must first be indexed using `index_files_for_migration`.
+
+ Arguments:
+ db_path: Path to the SQLite database file created by
+ `index_files_for_migration`. You can get this from the
+ MigrationResult.db_path returned by index_files_for_migration.
+ create_table_snapshots: Whether to create table snapshots before
+ migrating table files.
+ continue_on_error: Whether to continue migration if an error occurs.
+ force: Whether to force migration of files that have already been
+ migrated. Also bypasses interactive confirmation.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ A MigrationResult object containing migration statistics, or None
+ if the user declined the confirmation prompt.
+
+ Example: Migrating indexed files
+ Migrate previously indexed files:
+
+ import asyncio
+ from synapseclient import Synapse
+ from synapseclient.models import Project
+
+ syn = Synapse()
+ syn.login()
+
+ async def main():
+ project = await Project(id="syn123").get_async()
+
+ # Index first
+ index_result = await project.index_files_for_migration_async(
+ dest_storage_location_id=12345,
+ )
+
+ # Then migrate using the db_path from index result
+ result = await project.migrate_indexed_files_async(
+ db_path=index_result.db_path,
+ force=True, # Skip interactive confirmation
+ )
+ print(f"Migrated {result.counts_by_status}")
+
+ asyncio.run(main())
+ """
+ if not self.id:
+ raise ValueError("The entity must have an id set.")
+
+ return await _migrate_indexed_files_async(
+ db_path=db_path,
+ create_table_snapshots=create_table_snapshots,
+ continue_on_error=continue_on_error,
+ force=force,
+ synapse_client=synapse_client,
+ )
diff --git a/synapseclient/models/project.py b/synapseclient/models/project.py
index a1a6a1c21..6686c8ac5 100644
--- a/synapseclient/models/project.py
+++ b/synapseclient/models/project.py
@@ -18,6 +18,10 @@
ContainerEntityJSONSchema,
StorableContainer,
)
+from synapseclient.models.mixins.manifest import ManifestGeneratable
+from synapseclient.models.mixins.storage_location_mixin import (
+ StorageLocationConfigurable,
+)
from synapseclient.models.protocols.project_protocol import ProjectSynchronousProtocol
from synapseclient.models.services.search import get_id
from synapseclient.models.services.storable_entity import store_entity
@@ -46,6 +50,8 @@ class Project(
AccessControllable,
StorableContainer,
ContainerEntityJSONSchema,
+ StorageLocationConfigurable,
+ ManifestGeneratable,
):
"""A Project is a top-level container for organizing data in Synapse.
diff --git a/synapseclient/models/protocols/download_list_protocol.py b/synapseclient/models/protocols/download_list_protocol.py
new file mode 100644
index 000000000..7152d4bf1
--- /dev/null
+++ b/synapseclient/models/protocols/download_list_protocol.py
@@ -0,0 +1,97 @@
+"""Protocol for the specific methods of download list classes that have synchronous counterparts
+generated at runtime."""
+
+from typing import Any, Dict, Optional, Protocol
+
+from typing_extensions import Self
+
+from synapseclient import Synapse
+
+
+class DownloadListManifestRequestSynchronousProtocol(Protocol):
+ """
+ The protocol for methods that are asynchronous but also
+ have a synchronous counterpart that may also be called.
+ """
+
+ def send_job_and_wait(
+ self,
+ post_exchange_args: Optional[Dict[str, Any]] = None,
+ timeout: int = 120,
+ *,
+ synapse_client: Optional[Synapse] = None,
+ ) -> Self:
+ """Send the job to the Asynchronous Job service and wait for it to complete.
+
+ This method sends the manifest generation request to Synapse and waits
+ for the job to complete. After completion, the `result_file_handle_id`
+ attribute will be populated.
+
+ Arguments:
+ post_exchange_args: Additional arguments to pass to the request.
+ timeout: The number of seconds to wait for the job to complete or progress
+ before raising a SynapseTimeoutError. Defaults to 120.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ This instance with `result_file_handle_id` populated.
+
+ Raises:
+ SynapseTimeoutError: If the job does not complete within the timeout.
+ SynapseError: If the job fails.
+
+ Example: Generate a manifest
+ Generate a manifest from the download list:
+
+ from synapseclient.models import DownloadListManifestRequest
+ import synapseclient
+
+ synapseclient.login()
+
+ request = DownloadListManifestRequest()
+ request.send_job_and_wait()
+ print(f"Manifest file handle: {request.result_file_handle_id}")
+ """
+ return self
+
+ def download_manifest(
+ self,
+ download_path: str,
+ *,
+ synapse_client: Optional[Synapse] = None,
+ ) -> str:
+ """
+ Download the generated manifest file to a local path.
+
+ This method should be called after `send_job_and_wait()` has completed
+ successfully and `result_file_handle_id` is populated.
+
+ Arguments:
+ download_path: The local directory path where the manifest will be saved.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ The full path to the downloaded manifest file.
+
+ Raises:
+ ValueError: If the manifest has not been generated yet (no result_file_handle_id).
+
+ Example: Download the manifest after generation
+ Generate and download a manifest:
+
+ from synapseclient.models import DownloadListManifestRequest
+ import synapseclient
+
+ synapseclient.login()
+
+ request = DownloadListManifestRequest()
+ request.send_job_and_wait()
+
+ manifest_path = request.download_manifest(download_path="/path/to/download")
+ print(f"Manifest downloaded to: {manifest_path}")
+ """
+ return ""
diff --git a/synapseclient/models/protocols/manifest_protocol.py b/synapseclient/models/protocols/manifest_protocol.py
new file mode 100644
index 000000000..1da447da0
--- /dev/null
+++ b/synapseclient/models/protocols/manifest_protocol.py
@@ -0,0 +1,240 @@
+"""Protocol for the specific methods of ManifestGeneratable mixin that have
+synchronous counterparts generated at runtime."""
+
+from typing import Dict, List, Optional, Protocol, Tuple
+
+from synapseclient import Synapse
+
+
+class ManifestGeneratableSynchronousProtocol(Protocol):
+ """
+ The protocol for methods that are asynchronous but also
+ have a synchronous counterpart that may also be called.
+ """
+
+ def generate_manifest(
+ self,
+ path: str,
+ manifest_scope: str = "all",
+ *,
+ synapse_client: Optional[Synapse] = None,
+ ) -> Optional[str]:
+ """Generate a manifest TSV file for all files in this container.
+
+ This method should be called after `sync_from_synapse()` to generate
+ a manifest of all downloaded files with their metadata.
+
+ Arguments:
+ path: The directory where the manifest file(s) will be written.
+ manifest_scope: Controls manifest file generation:
+
+ - "all": Create a manifest in each directory level
+ - "root": Create a single manifest at the root path only
+ - "suppress": Do not create any manifest files
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ The path to the root manifest file if created, or None if suppressed.
+
+ Raises:
+ ValueError: If the container has not been synced from Synapse.
+ ValueError: If manifest_scope is not one of 'all', 'root', 'suppress'.
+
+ Example: Generate manifest after sync
+ Generate a manifest file after syncing from Synapse:
+
+ from synapseclient.models import Project
+
+ import synapseclient
+ synapseclient.login()
+
+ project = Project(id="syn123").sync_from_synapse(
+ path="/path/to/download"
+ )
+ manifest_path = project.generate_manifest(
+ path="/path/to/download",
+ manifest_scope="root"
+ )
+ print(f"Manifest created at: {manifest_path}")
+ """
+ return None
+
+ @classmethod
+ def from_manifest(
+ cls,
+ manifest_path: str,
+ parent_id: str,
+ dry_run: bool = False,
+ merge_existing_annotations: bool = True,
+ associate_activity_to_new_version: bool = False,
+ *,
+ synapse_client: Optional[Synapse] = None,
+ ) -> List:
+ """Upload files to Synapse from a manifest TSV file.
+
+ This method reads a manifest TSV file and uploads all files defined in it
+ to Synapse. The manifest file must contain at minimum the 'path' and 'parent'
+ columns.
+
+ Arguments:
+ manifest_path: Path to the manifest TSV file.
+ parent_id: The Synapse ID of the parent container (Project or Folder)
+ where files will be uploaded if not specified in the manifest.
+ dry_run: If True, validate the manifest but do not upload.
+ merge_existing_annotations: If True, merge annotations with existing
+ annotations on the file. If False, replace existing annotations.
+ associate_activity_to_new_version: If True, copy the activity
+ (provenance) from the previous version to the new version.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ List of File objects that were uploaded.
+
+ Example: Upload files from a manifest
+ Upload files from a manifest TSV file:
+
+ from synapseclient.models import Project
+
+ import synapseclient
+ synapseclient.login()
+
+ files = Project.from_manifest(
+ manifest_path="/path/to/manifest.tsv",
+ parent_id="syn123"
+ )
+ for file in files:
+ print(f"Uploaded: {file.name} ({file.id})")
+ """
+ return []
+
+ @staticmethod
+ def validate_manifest(
+ manifest_path: str,
+ *,
+ synapse_client: Optional[Synapse] = None,
+ ) -> Tuple[bool, List[str]]:
+ """Validate a manifest TSV file without uploading.
+
+ This method validates a manifest file to ensure it is properly formatted
+ and all paths exist.
+
+ Arguments:
+ manifest_path: Path to the manifest TSV file.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ Tuple of (is_valid, list_of_error_messages). If the manifest is valid,
+ is_valid will be True and the list will be empty.
+
+ Example: Validate a manifest file
+ Validate a manifest file before uploading:
+
+ from synapseclient.models import Project
+
+ is_valid, errors = Project.validate_manifest(
+ manifest_path="/path/to/manifest.tsv"
+ )
+ if is_valid:
+ print("Manifest is valid")
+ else:
+ for error in errors:
+ print(f"Error: {error}")
+ """
+ return (True, [])
+
+ def get_manifest_data(
+ self,
+ *,
+ synapse_client: Optional[Synapse] = None,
+ ) -> Tuple[List[str], List[Dict[str, str]]]:
+ """Get manifest data for all files in this container.
+
+ This method extracts metadata from all files that have been synced
+ to this container. The data can be used to generate a manifest file
+ or for other purposes.
+
+ Arguments:
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ Tuple of (keys, data) where keys is a list of column headers
+ and data is a list of dictionaries, one per file, containing
+ the file metadata.
+
+ Raises:
+ ValueError: If the container has not been synced from Synapse.
+
+ Example: Get manifest data
+ Get manifest data for all files in a project:
+
+ from synapseclient.models import Project
+
+ import synapseclient
+ synapseclient.login()
+
+ project = Project(id="syn123").sync_from_synapse(
+ path="/path/to/download"
+ )
+ keys, data = project.get_manifest_data()
+ for row in data:
+ print(f"File: {row['name']} at {row['path']}")
+ """
+ return ([], [])
+
+ @staticmethod
+ def generate_download_list_manifest(
+ download_path: str,
+ csv_separator: str = ",",
+ include_header: bool = True,
+ timeout: int = 120,
+ *,
+ synapse_client: Optional[Synapse] = None,
+ ) -> str:
+ """Generate a manifest file from the current user's download list.
+
+ This method creates a CSV manifest containing metadata about all files in
+ the user's download list. The manifest is generated server-side by Synapse
+ and then downloaded to the specified path.
+
+ This is interoperable with the Synapse download list feature and provides
+ a way to export the download list as a manifest file that can be used for
+ bulk operations.
+
+ Arguments:
+ download_path: The local directory path where the manifest will be saved.
+ csv_separator: The delimiter character for the CSV file.
+ Defaults to "," for comma-separated values. Use "\t" for tab-separated.
+ include_header: Whether to include column headers in the first row.
+ Defaults to True.
+ timeout: The number of seconds to wait for the job to complete.
+ Defaults to 120 seconds.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ The full path to the downloaded manifest file.
+
+ Example: Generate manifest from download list
+ Generate a manifest from your Synapse download list:
+
+ from synapseclient.models import Project
+
+ import synapseclient
+ synapseclient.login()
+
+ # Generate manifest from download list
+ manifest_path = Project.generate_download_list_manifest(
+ download_path="/path/to/download"
+ )
+ print(f"Manifest downloaded to: {manifest_path}")
+ """
+ return ""
diff --git a/synapseclient/models/protocols/storable_container_protocol.py b/synapseclient/models/protocols/storable_container_protocol.py
index 0352132d1..245836adf 100644
--- a/synapseclient/models/protocols/storable_container_protocol.py
+++ b/synapseclient/models/protocols/storable_container_protocol.py
@@ -29,6 +29,7 @@ def sync_from_synapse(
link_hops: int = 1,
queue: asyncio.Queue = None,
include_types: Optional[List[str]] = None,
+ generate_manifest: str = "suppress",
*,
synapse_client: Optional[Synapse] = None,
) -> Self:
@@ -40,9 +41,8 @@ def sync_from_synapse(
If you only want to retrieve the full tree of metadata about your
container specify `download_file` as False.
- This works similar to [synapseutils.syncFromSynapse][], however, this does not
- currently support the writing of data to a manifest TSV file. This will be a
- future enhancement.
+ This works similar to [synapseutils.syncFromSynapse][] and supports
+ generating a manifest TSV file with file metadata.
Supports syncing Files, Folders, Tables, EntityViews, SubmissionViews, Datasets,
DatasetCollections, MaterializedViews, and VirtualTables from Synapse. The
@@ -74,6 +74,13 @@ def sync_from_synapse(
include_types: Must be a list of entity types (ie. ["folder","file"]) which
can be found
[here](https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/model/EntityType.html)
+ generate_manifest: Controls manifest file generation. Options:
+
+ - "all": Create a manifest in each directory level
+ - "root": Create a single manifest at the root path only
+ - "suppress": (Default) Do not create any manifest files
+
+ A path must be specified for manifest generation.
synapse_client: If not passed in and caching was not disabled by
`Synapse.allow_client_caching(False)` this will use the last created
instance from the Synapse class constructor.
diff --git a/synapseclient/models/protocols/storage_location_mixin_protocol.py b/synapseclient/models/protocols/storage_location_mixin_protocol.py
new file mode 100644
index 000000000..7403972a6
--- /dev/null
+++ b/synapseclient/models/protocols/storage_location_mixin_protocol.py
@@ -0,0 +1,279 @@
+"""Protocol for the specific methods of StorageLocationConfigurable mixin that have
+synchronous counterparts generated at runtime."""
+
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Protocol, Union
+
+from synapseclient import Synapse
+
+if TYPE_CHECKING:
+ from synapseclient.models.services.migration_types import MigrationResult
+
+
+class StorageLocationConfigurableSynchronousProtocol(Protocol):
+ """
+ The protocol for methods that are asynchronous but also
+ have a synchronous counterpart that may also be called.
+ """
+
+ def set_storage_location(
+ self,
+ storage_location_id: Optional[Union[int, List[int]]] = None,
+ *,
+ synapse_client: Optional[Synapse] = None,
+ ) -> Dict[str, Any]:
+ """Set the upload storage location for this entity. This configures where
+ files uploaded to this entity will be stored.
+
+ Arguments:
+ storage_location_id: The storage location ID(s) to set. Can be a single
+ ID, a list of IDs (first is default, max 10), or None to use
+ Synapse default storage.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ The project setting dict returned from Synapse.
+
+ Raises:
+ ValueError: If the entity does not have an id set.
+
+ Example: Setting storage location on a folder
+ Set storage location on a folder:
+
+ from synapseclient.models import Folder
+
+ import synapseclient
+ synapseclient.login()
+
+ folder = Folder(id="syn123").get()
+ setting = folder.set_storage_location(storage_location_id=12345)
+ print(setting)
+ """
+ return {}
+
+ def get_project_setting(
+ self,
+ setting_type: str = "upload",
+ *,
+ synapse_client: Optional[Synapse] = None,
+ ) -> Optional[Dict[str, Any]]:
+ """Get the project setting for this entity.
+
+ Arguments:
+ setting_type: The type of setting to retrieve. One of:
+ 'upload', 'external_sync', 'requester_pays'. Default: 'upload'.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ The project setting as a dictionary, or None if no setting exists.
+
+ Raises:
+ ValueError: If the entity does not have an id set.
+
+ Example: Getting project settings
+ Get the upload settings for a folder:
+
+ from synapseclient.models import Folder
+
+ import synapseclient
+ synapseclient.login()
+
+ folder = Folder(id="syn123").get()
+ setting = folder.get_project_setting(setting_type="upload")
+ if setting:
+ print(f"Storage locations: {setting.get('locations')}")
+ """
+ return {}
+
+ def delete_project_setting(
+ self,
+ setting_id: str,
+ *,
+ synapse_client: Optional[Synapse] = None,
+ ) -> None:
+ """Delete a project setting by its setting ID.
+
+ Arguments:
+ setting_id: The ID of the project setting to delete.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ None
+
+ Raises:
+ ValueError: If the entity does not have an id set.
+
+ Example: Deleting a project setting
+ Delete the upload settings for a folder:
+
+ from synapseclient.models import Folder
+
+ import synapseclient
+ synapseclient.login()
+
+ folder = Folder(id="syn123").get()
+ setting = folder.get_project_setting(setting_type="upload")
+ if setting:
+ folder.delete_project_setting(setting_id=setting['id'])
+ """
+ return None
+
+ def get_sts_storage_token(
+ self,
+ permission: str,
+ *,
+ output_format: str = "json",
+ min_remaining_life: Optional[int] = None,
+ synapse_client: Optional[Synapse] = None,
+ ) -> Any:
+ """Get STS (AWS Security Token Service) credentials for direct access to
+ the storage location backing this entity. These credentials can be used
+ with AWS tools like awscli and boto3.
+
+ Arguments:
+ permission: The permission level for the token. Must be 'read_only'
+ or 'read_write'.
+ output_format: The output format for the credentials. Options:
+ 'json' (default), 'boto', 'shell', 'bash', 'cmd', 'powershell'.
+ min_remaining_life: The minimum remaining life (in seconds) for a
+ cached token before a new one is fetched.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ The STS credentials in the requested format.
+
+ Raises:
+ ValueError: If the entity does not have an id set.
+
+ Example: Using credentials with boto3
+ Get STS credentials for an STS-enabled folder and use with boto3:
+
+ import boto3
+ from synapseclient.models import Folder
+
+ import synapseclient
+ synapseclient.login()
+
+ folder = Folder(id="syn123").get()
+ credentials = folder.get_sts_storage_token(
+ permission="read_write",
+ output_format="boto",
+ )
+ s3_client = boto3.client('s3', **credentials)
+ """
+ return {}
+
+ def index_files_for_migration(
+ self,
+ dest_storage_location_id: int,
+ db_path: Optional[str] = None,
+ *,
+ source_storage_location_ids: Optional[List[int]] = None,
+ file_version_strategy: str = "new",
+ include_table_files: bool = False,
+ continue_on_error: bool = False,
+ synapse_client: Optional[Synapse] = None,
+ ) -> "MigrationResult":
+ """Index files in this entity for migration to a new storage location.
+
+ This is the first step in migrating files to a new storage location.
+ After indexing, use `migrate_indexed_files` to perform the actual migration.
+
+ Arguments:
+ dest_storage_location_id: The destination storage location ID.
+ db_path: Path to the SQLite database file for tracking migration state.
+ If not provided, a temporary directory will be used. The path
+ can be retrieved from the returned MigrationResult.db_path.
+ source_storage_location_ids: Optional list of source storage location IDs
+ to filter which files to migrate. If None, all files are indexed.
+ file_version_strategy: Strategy for handling file versions. Options:
+ 'new' (default) - create new versions, 'all' - migrate all versions,
+ 'latest' - only migrate latest version, 'skip' - skip if file exists.
+ include_table_files: Whether to include files attached to tables.
+ continue_on_error: Whether to continue indexing if an error occurs.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ A MigrationResult object containing indexing statistics and the database
+ path (accessible via result.db_path).
+
+ Example: Indexing files for migration
+ Index files in a project for migration:
+
+ from synapseclient.models import Project
+
+ import synapseclient
+ synapseclient.login()
+
+ project = Project(id="syn123").get()
+ result = project.index_files_for_migration(
+ dest_storage_location_id=12345,
+ )
+ print(f"Database path: {result.db_path}")
+ print(f"Indexed {result.counts_by_status}")
+ """
+ return None
+
+ def migrate_indexed_files(
+ self,
+ db_path: str,
+ *,
+ create_table_snapshots: bool = True,
+ continue_on_error: bool = False,
+ force: bool = False,
+ synapse_client: Optional[Synapse] = None,
+ ) -> Optional["MigrationResult"]:
+ """Migrate files that have been indexed with `index_files_for_migration`.
+
+ This is the second step in migrating files to a new storage location.
+ Files must first be indexed using `index_files_for_migration`.
+
+ Arguments:
+ db_path: Path to the SQLite database file created by
+ `index_files_for_migration`. You can get this from the
+ MigrationResult.db_path returned by index_files_for_migration.
+ create_table_snapshots: Whether to create table snapshots before
+ migrating table files.
+ continue_on_error: Whether to continue migration if an error occurs.
+ force: Whether to force migration of files that have already been
+ migrated. Also bypasses interactive confirmation.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ A MigrationResult object containing migration statistics, or None
+ if the user declined the confirmation prompt.
+
+ Example: Migrating indexed files
+ Migrate previously indexed files:
+
+ from synapseclient.models import Project
+
+ import synapseclient
+ synapseclient.login()
+
+ project = Project(id="syn123").get()
+
+ # Index first
+ index_result = project.index_files_for_migration(
+ dest_storage_location_id=12345,
+ )
+
+ # Then migrate using the db_path from index result
+ result = project.migrate_indexed_files(
+ db_path=index_result.db_path,
+ force=True, # Skip interactive confirmation
+ )
+ print(f"Migrated {result.counts_by_status}")
+ """
+ return None
diff --git a/synapseclient/models/protocols/storage_location_protocol.py b/synapseclient/models/protocols/storage_location_protocol.py
new file mode 100644
index 000000000..e602daaa6
--- /dev/null
+++ b/synapseclient/models/protocols/storage_location_protocol.py
@@ -0,0 +1,159 @@
+"""Protocol for the specific methods of StorageLocation that have synchronous counterparts
+generated at runtime."""
+
+from typing import TYPE_CHECKING, Optional, Protocol, Tuple
+
+from synapseclient import Synapse
+
+if TYPE_CHECKING:
+ from synapseclient.models import Folder
+ from synapseclient.models.storage_location import StorageLocation
+
+
+class StorageLocationSynchronousProtocol(Protocol):
+ """
+ The protocol for methods that are asynchronous but also
+ have a synchronous counterpart that may also be called.
+ """
+
+ def store(
+ self,
+ *,
+ synapse_client: Optional[Synapse] = None,
+ ) -> "StorageLocation":
+ """Create this storage location in Synapse. Storage locations are immutable;
+ this always creates a new one. If a storage location with identical properties
+ already exists for this user, the existing one is returned (idempotent).
+
+ Arguments:
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ The StorageLocation object with server-assigned fields populated.
+
+ Raises:
+ ValueError: If `storage_type` is not set.
+
+ Example: Creating an external S3 storage location
+ Create a storage location backed by your own S3 bucket:
+
+ from synapseclient.models import StorageLocation, StorageLocationType
+
+ import synapseclient
+ synapseclient.login()
+
+ storage = StorageLocation(
+ storage_type=StorageLocationType.EXTERNAL_S3,
+ bucket="my-external-synapse-bucket",
+ base_key="path/within/bucket",
+ ).store()
+
+ print(f"Storage location ID: {storage.storage_location_id}")
+ """
+ return self
+
+ def get(
+ self,
+ *,
+ synapse_client: Optional[Synapse] = None,
+ ) -> "StorageLocation":
+ """Retrieve this storage location from Synapse by its ID. Only the creator of
+ a StorageLocationSetting can retrieve it by its id.
+
+ Arguments:
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ The StorageLocation object populated with data from Synapse.
+
+ Raises:
+ ValueError: If `storage_location_id` is not set.
+
+ Example: Retrieving a storage location
+ Retrieve a storage location by ID:
+
+ from synapseclient.models import StorageLocation
+
+ import synapseclient
+ synapseclient.login()
+
+ storage = StorageLocation(storage_location_id=12345).get()
+ print(f"Type: {storage.storage_type}, Bucket: {storage.bucket}")
+ """
+ return self
+
+ @classmethod
+ def setup_s3(
+ cls,
+ *,
+ parent: str,
+ folder_name: Optional[str] = None,
+ folder: Optional["Folder"] = None,
+ bucket_name: Optional[str] = None,
+ base_key: Optional[str] = None,
+ sts_enabled: bool = False,
+ synapse_client: Optional[Synapse] = None,
+ ) -> Tuple["Folder", "StorageLocation"]:
+ """Convenience method to create a folder backed by S3 storage. This will:
+
+ 1. Create or retrieve the folder
+ 2. Create the storage location setting
+ 3. Apply the storage location to the folder via project settings
+
+ Arguments:
+ parent: The parent project or folder ID (e.g., "syn123").
+ folder_name: Name for a new folder. Either `folder_name` or `folder`
+ must be provided.
+ folder: An existing Folder object or Synapse ID. Either `folder_name`
+ or `folder` must be provided.
+ bucket_name: The S3 bucket name. If None, uses Synapse default storage.
+ base_key: The base key (prefix) within the bucket. Optional.
+ sts_enabled: Whether to enable STS credentials for this storage location.
+ Default: False.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ A tuple of (Folder, StorageLocation).
+
+ Raises:
+ ValueError: If neither `folder_name` nor `folder` is provided, or if both
+ are provided.
+
+ Example: Creating an STS-enabled folder with external S3 storage
+ Create a folder with STS-enabled storage:
+
+ from synapseclient.models import StorageLocation
+
+ import synapseclient
+ synapseclient.login()
+
+ folder, storage = StorageLocation.setup_s3(
+ folder_name="my-sts-folder",
+ parent="syn123",
+ bucket_name="my-external-synapse-bucket",
+ base_key="path/within/bucket",
+ sts_enabled=True,
+ )
+ print(f"Folder: {folder.id}, Storage: {storage.storage_location_id}")
+
+ Example: Using an existing folder
+ Apply S3 storage to an existing folder:
+
+ from synapseclient.models import StorageLocation, Folder
+
+ import synapseclient
+ synapseclient.login()
+
+ existing_folder = Folder(id="syn456").get()
+ folder, storage = StorageLocation.setup_s3(
+ folder=existing_folder,
+ bucket_name="my-bucket",
+ )
+ """
+ return None
diff --git a/synapseclient/models/services/__init__.py b/synapseclient/models/services/__init__.py
index d1e7227ca..fea05d199 100644
--- a/synapseclient/models/services/__init__.py
+++ b/synapseclient/models/services/__init__.py
@@ -1,3 +1,16 @@
+from synapseclient.models.services.migration import (
+ index_files_for_migration_async,
+ migrate_indexed_files_async,
+)
+from synapseclient.models.services.migration_types import (
+ MigrationEntry,
+ MigrationError,
+ MigrationKey,
+ MigrationResult,
+ MigrationSettings,
+ MigrationStatus,
+ MigrationType,
+)
from synapseclient.models.services.search import get_id
from synapseclient.models.services.storable_entity import store_entity
from synapseclient.models.services.storable_entity_components import (
@@ -5,4 +18,18 @@
store_entity_components,
)
-__all__ = ["store_entity_components", "store_entity", "FailureStrategy", "get_id"]
+__all__ = [
+ "store_entity_components",
+ "store_entity",
+ "FailureStrategy",
+ "get_id",
+ "index_files_for_migration_async",
+ "migrate_indexed_files_async",
+ "MigrationResult",
+ "MigrationStatus",
+ "MigrationType",
+ "MigrationKey",
+ "MigrationEntry",
+ "MigrationSettings",
+ "MigrationError",
+]
diff --git a/synapseclient/models/services/migration.py b/synapseclient/models/services/migration.py
new file mode 100644
index 000000000..0186e8b77
--- /dev/null
+++ b/synapseclient/models/services/migration.py
@@ -0,0 +1,1650 @@
+"""
+Async migration service for migrating files between storage locations.
+
+This module provides native async implementations of the migration functionality,
+replacing the threading-based approach in synapseutils.migrate_functions.
+"""
+
+import asyncio
+import collections.abc
+import json
+import logging
+import os
+import sys
+import tempfile
+import traceback
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ AsyncGenerator,
+ Dict,
+ List,
+ Optional,
+ Set,
+ Tuple,
+ Union,
+)
+
+from synapseclient.api.entity_services import get_children
+from synapseclient.api.file_services import get_file_handle_for_download_async
+from synapseclient.api.table_services import create_table_snapshot, get_columns
+from synapseclient.core import utils
+from synapseclient.core.constants import concrete_types
+from synapseclient.core.upload.multipart_upload import (
+ MAX_NUMBER_OF_PARTS,
+ multipart_copy,
+)
+from synapseclient.models.table_components import (
+ AppendableRowSetRequest,
+ PartialRow,
+ PartialRowSet,
+ TableUpdateTransaction,
+)
+
+from .migration_types import (
+ IndexingError,
+ MigrationError,
+ MigrationKey,
+ MigrationResult,
+ MigrationSettings,
+ MigrationStatus,
+ MigrationType,
+)
+
+if TYPE_CHECKING:
+ from synapseclient import Synapse
+
+# Default part size for multipart copy (100 MB)
+DEFAULT_PART_SIZE = 100 * utils.MB
+
+# Batch size for database operations
+BATCH_SIZE = 500
+
+logger = logging.getLogger(__name__)
+
+
+# =============================================================================
+# Temp Directory Helpers
+# =============================================================================
+
+
+def _get_default_db_path(entity_id: str) -> str:
+ """Generate a default temp database path for migration tracking.
+
+ Arguments:
+ entity_id: The Synapse entity ID being migrated.
+
+ Returns:
+ Path to a SQLite database file in a temp directory.
+ """
+ temp_dir = tempfile.mkdtemp(prefix="synapse_migration_")
+ return os.path.join(temp_dir, f"migration_{entity_id}.db")
+
+
+# =============================================================================
+# Column Name Helpers (replaces legacy synapseclient.table functions)
+# =============================================================================
+
+
+def _escape_column_name(column: Union[str, collections.abc.Mapping]) -> str:
+ """Escape a column name for use in a Synapse table query statement.
+
+ Arguments:
+ column: A string column name or a dictionary with a 'name' key.
+
+ Returns:
+ Escaped column name wrapped in double quotes.
+ """
+ col_name = (
+ column["name"] if isinstance(column, collections.abc.Mapping) else str(column)
+ )
+ escaped_name = col_name.replace('"', '""')
+ return f'"{escaped_name}"'
+
+
+def _join_column_names(columns: List[Any]) -> str:
+ """Join column names into a comma-delimited list for table queries.
+
+ Arguments:
+ columns: A list of column names or column objects with 'name' keys.
+
+ Returns:
+ Comma-separated string of escaped column names.
+ """
+ return ",".join(_escape_column_name(c) for c in columns)
+
+
+# =============================================================================
+# Database Helper Functions (Synchronous - wrapped with asyncio.to_thread)
+# =============================================================================
+
+
+def _ensure_schema(cursor) -> None:
+ """Ensure the SQLite database has the required schema."""
+ # Settings table - stores JSON configuration
+ cursor.execute(
+ "CREATE TABLE IF NOT EXISTS migration_settings (settings TEXT NOT NULL)"
+ )
+
+ # Main migrations table
+ cursor.execute(
+ """
+ CREATE TABLE IF NOT EXISTS migrations (
+ id TEXT NOT NULL,
+ type INTEGER NOT NULL,
+ version INTEGER NULL,
+ row_id INTEGER NULL,
+ col_id INTEGER NULL,
+ parent_id NULL,
+ status INTEGER NOT NULL,
+ exception TEXT NULL,
+ from_storage_location_id NULL,
+ from_file_handle_id TEXT NULL,
+ to_file_handle_id TEXT NULL,
+ file_size INTEGER NULL,
+ PRIMARY KEY (id, type, row_id, col_id, version)
+ )
+ """
+ )
+
+ # Indexes for common queries
+ cursor.execute("CREATE INDEX IF NOT EXISTS ix_status ON migrations(status)")
+ cursor.execute(
+ "CREATE INDEX IF NOT EXISTS ix_file_handle_ids "
+ "ON migrations(from_file_handle_id, to_file_handle_id)"
+ )
+
+
+def _initialize_database(
+ db_path: str,
+ root_id: str,
+ dest_storage_location_id: str,
+ source_storage_location_ids: List[str],
+ file_version_strategy: str,
+ include_table_files: bool,
+) -> None:
+ """Initialize the migration database with schema and settings.
+
+ Arguments:
+ db_path: Path to the SQLite database file.
+ root_id: The root entity ID being migrated.
+ dest_storage_location_id: Destination storage location ID.
+ source_storage_location_ids: List of source storage location IDs to filter.
+ file_version_strategy: Strategy for handling file versions.
+ include_table_files: Whether to include table-attached files.
+ """
+ import sqlite3
+
+ with sqlite3.connect(db_path) as conn:
+ cursor = conn.cursor()
+ _ensure_schema(cursor)
+
+ # Check if settings already exist
+ existing = cursor.execute("SELECT settings FROM migration_settings").fetchone()
+
+ settings = MigrationSettings(
+ root_id=root_id,
+ dest_storage_location_id=dest_storage_location_id,
+ source_storage_location_ids=source_storage_location_ids,
+ file_version_strategy=file_version_strategy,
+ include_table_files=include_table_files,
+ )
+
+ if existing:
+ # Verify settings match
+ existing_settings = json.loads(existing[0])
+ if existing_settings.get("root_id") != root_id:
+ raise ValueError(
+ f"Root entity ID mismatch: database has {existing_settings.get('root_id')}, "
+ f"but {root_id} was provided"
+ )
+ if (
+ existing_settings.get("dest_storage_location_id")
+ != dest_storage_location_id
+ ):
+ raise ValueError(
+ f"Destination storage location mismatch: database has "
+ f"{existing_settings.get('dest_storage_location_id')}, "
+ f"but {dest_storage_location_id} was provided"
+ )
+ else:
+ # Insert new settings
+ settings_json = json.dumps(
+ {
+ "root_id": settings.root_id,
+ "dest_storage_location_id": settings.dest_storage_location_id,
+ "source_storage_location_ids": settings.source_storage_location_ids,
+ "file_version_strategy": settings.file_version_strategy,
+ "include_table_files": settings.include_table_files,
+ }
+ )
+ cursor.execute(
+ "INSERT INTO migration_settings (settings) VALUES (?)",
+ (settings_json,),
+ )
+
+ conn.commit()
+
+
+def _retrieve_index_settings(db_path: str) -> Optional[Dict[str, Any]]:
+ """Retrieve index settings from the database.
+
+ Arguments:
+ db_path: Path to the SQLite database file.
+
+ Returns:
+ Dictionary of settings or None if not found.
+ """
+ import sqlite3
+
+ with sqlite3.connect(db_path) as conn:
+ cursor = conn.cursor()
+ _ensure_schema(cursor)
+
+ row = cursor.execute("SELECT settings FROM migration_settings").fetchone()
+ if row:
+ return json.loads(row[0])
+ return None
+
+
+def _check_indexed(db_path: str, entity_id: str) -> bool:
+ """Check if an entity has already been indexed.
+
+ Arguments:
+ db_path: Path to the SQLite database file.
+ entity_id: The entity ID to check.
+
+ Returns:
+ True if the entity is already indexed, False otherwise.
+ """
+ import sqlite3
+
+ with sqlite3.connect(db_path) as conn:
+ cursor = conn.cursor()
+ row = cursor.execute(
+ "SELECT 1 FROM migrations WHERE id = ? LIMIT 1",
+ (entity_id,),
+ ).fetchone()
+ return row is not None
+
+
+def _mark_container_indexed(
+ db_path: str,
+ entity_id: str,
+ parent_id: Optional[str],
+ migration_type: MigrationType,
+) -> None:
+ """Mark a container (Project or Folder) as indexed.
+
+ Arguments:
+ db_path: Path to the SQLite database file.
+ entity_id: The entity ID.
+ parent_id: The parent entity ID.
+ migration_type: The type of container.
+ """
+ import sqlite3
+
+ with sqlite3.connect(db_path) as conn:
+ cursor = conn.cursor()
+ cursor.execute(
+ """
+ INSERT OR IGNORE INTO migrations (id, type, parent_id, status)
+ VALUES (?, ?, ?, ?)
+ """,
+ (
+ entity_id,
+ migration_type.value,
+ parent_id,
+ MigrationStatus.INDEXED.value,
+ ),
+ )
+ conn.commit()
+
+
+def _insert_file_migration(
+ db_path: str,
+ entity_id: str,
+ version: Optional[int],
+ parent_id: Optional[str],
+ from_storage_location_id: int,
+ from_file_handle_id: str,
+ file_size: int,
+ status: MigrationStatus,
+) -> None:
+ """Insert a file migration entry.
+
+ Arguments:
+ db_path: Path to the SQLite database file.
+ entity_id: The file entity ID.
+ version: The file version (None for new version).
+ parent_id: The parent entity ID.
+ from_storage_location_id: Source storage location ID.
+ from_file_handle_id: Source file handle ID.
+ file_size: File size in bytes.
+ status: Migration status.
+ """
+ import sqlite3
+
+ with sqlite3.connect(db_path) as conn:
+ cursor = conn.cursor()
+ cursor.execute(
+ """
+ INSERT OR IGNORE INTO migrations (
+ id, type, version, parent_id,
+ from_storage_location_id, from_file_handle_id,
+ file_size, status
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+ """,
+ (
+ entity_id,
+ MigrationType.FILE.value,
+ version,
+ parent_id,
+ from_storage_location_id,
+ from_file_handle_id,
+ file_size,
+ status.value,
+ ),
+ )
+ conn.commit()
+
+
+def _insert_table_file_migration(
+ db_path: str,
+ entity_id: str,
+ row_id: int,
+ col_id: int,
+ row_version: int,
+ parent_id: Optional[str],
+ from_storage_location_id: int,
+ from_file_handle_id: str,
+ file_size: int,
+ status: MigrationStatus,
+) -> None:
+ """Insert a table-attached file migration entry.
+
+ Arguments:
+ db_path: Path to the SQLite database file.
+ entity_id: The table entity ID.
+ row_id: The table row ID.
+ col_id: The table column ID.
+ row_version: The row version.
+ parent_id: The parent entity ID.
+ from_storage_location_id: Source storage location ID.
+ from_file_handle_id: Source file handle ID.
+ file_size: File size in bytes.
+ status: Migration status.
+ """
+ import sqlite3
+
+ with sqlite3.connect(db_path) as conn:
+ cursor = conn.cursor()
+ cursor.execute(
+ """
+ INSERT OR IGNORE INTO migrations (
+ id, type, row_id, col_id, version, parent_id,
+ from_storage_location_id, from_file_handle_id,
+ file_size, status
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+ """,
+ (
+ entity_id,
+ MigrationType.TABLE_ATTACHED_FILE.value,
+ row_id,
+ col_id,
+ row_version,
+ parent_id,
+ from_storage_location_id,
+ from_file_handle_id,
+ file_size,
+ status.value,
+ ),
+ )
+ conn.commit()
+
+
+def _record_indexing_error(
+ db_path: str,
+ entity_id: str,
+ parent_id: Optional[str],
+ exception: Exception,
+) -> None:
+ """Record an indexing error in the database.
+
+ Arguments:
+ db_path: Path to the SQLite database file.
+ entity_id: The entity ID that failed.
+ parent_id: The parent entity ID.
+ exception: The exception that occurred.
+ """
+ import sqlite3
+
+ tb_str = "".join(
+ traceback.format_exception(type(exception), exception, exception.__traceback__)
+ )
+
+ with sqlite3.connect(db_path) as conn:
+ cursor = conn.cursor()
+ cursor.execute(
+ """
+ INSERT OR IGNORE INTO migrations (
+ id, type, parent_id, status, exception
+ ) VALUES (?, ?, ?, ?, ?)
+ """,
+ (
+ entity_id,
+ MigrationType.FILE.value, # Default type for errors
+ parent_id,
+ MigrationStatus.ERRORED.value,
+ tb_str,
+ ),
+ )
+ conn.commit()
+
+
+def _check_file_handle_exists(db_path: str, from_file_handle_id: str) -> Optional[str]:
+ """Check if a file handle has already been copied.
+
+ Arguments:
+ db_path: Path to the SQLite database file.
+ from_file_handle_id: The source file handle ID.
+
+ Returns:
+ The destination file handle ID if found, None otherwise.
+ """
+ import sqlite3
+
+ with sqlite3.connect(db_path) as conn:
+ cursor = conn.cursor()
+ row = cursor.execute(
+ """
+ SELECT to_file_handle_id FROM migrations
+ WHERE from_file_handle_id = ? AND to_file_handle_id IS NOT NULL
+ """,
+ (from_file_handle_id,),
+ ).fetchone()
+ return row[0] if row else None
+
+
+def _query_migration_batch(
+ db_path: str,
+ last_id: str,
+ last_version: int,
+ last_row_id: int,
+ last_col_id: int,
+ pending_file_handles: Set[str],
+ completed_file_handles: Set[str],
+ limit: int,
+) -> List[Dict[str, Any]]:
+ """Query the next batch of items to migrate.
+
+ This matches the original synapseutils query logic:
+ - Forward progress through entities ordered by id, type, row_id, col_id, version
+ - Backtracking to pick up files with completed file handles that were skipped
+
+ Arguments:
+ db_path: Path to the SQLite database file.
+ last_id: Last processed entity ID.
+ last_version: Last processed version.
+ last_row_id: Last processed row ID.
+ last_col_id: Last processed column ID.
+ pending_file_handles: Set of file handles currently being processed.
+ completed_file_handles: Set of file handles already completed.
+ limit: Maximum number of items to return.
+
+ Returns:
+ List of migration entries as dictionaries.
+ """
+ import sqlite3
+
+ if limit <= 0:
+ return []
+
+ with sqlite3.connect(db_path) as conn:
+ cursor = conn.cursor()
+
+ file_type = MigrationType.FILE.value
+ table_type = MigrationType.TABLE_ATTACHED_FILE.value
+ indexed_status = MigrationStatus.INDEXED.value
+
+ # Build the IN clauses for file handles
+ # We use string formatting for the IN clause since sqlite3 doesn't support array parameters
+ pending_in = (
+ "('" + "','".join(pending_file_handles) + "')"
+ if pending_file_handles
+ else "('')"
+ )
+ completed_in = (
+ "('" + "','".join(completed_file_handles) + "')"
+ if completed_file_handles
+ else "('')"
+ )
+
+ # Match the original synapseutils query structure exactly
+ # This handles:
+ # 1. Forward progress: entities after the current position
+ # 2. Backtracking: entities before current position that share completed file handles
+ query = f"""
+ SELECT
+ id,
+ type,
+ version,
+ row_id,
+ col_id,
+ from_file_handle_id,
+ file_size
+ FROM migrations
+ WHERE
+ status = :indexed_status
+ AND (
+ (
+ ((id > :id AND type IN (:file_type, :table_type))
+ OR (id = :id AND type = :file_type AND version IS NOT NULL AND version > :version)
+ OR (id = :id AND type = :table_type AND (row_id > :row_id OR (row_id = :row_id AND col_id > :col_id))))
+ AND from_file_handle_id NOT IN {pending_in}
+ ) OR
+ (
+ id <= :id
+ AND from_file_handle_id IN {completed_in}
+ )
+ )
+ ORDER BY
+ id,
+ type,
+ row_id,
+ col_id,
+ version
+ LIMIT :limit
+ """
+
+ params = {
+ "indexed_status": indexed_status,
+ "id": last_id,
+ "file_type": file_type,
+ "table_type": table_type,
+ "version": last_version,
+ "row_id": last_row_id,
+ "col_id": last_col_id,
+ "limit": limit,
+ }
+
+ results = cursor.execute(query, params)
+
+ batch = []
+ for row in results:
+ batch.append(
+ {
+ "id": row[0],
+ "type": MigrationType(row[1]),
+ "version": row[2],
+ "row_id": row[3],
+ "col_id": row[4],
+ "from_file_handle_id": row[5],
+ "file_size": row[6],
+ }
+ )
+ return batch
+
+
+def _update_migration_success(
+ db_path: str,
+ key: MigrationKey,
+ to_file_handle_id: str,
+) -> None:
+ """Update a migration entry as successful.
+
+ Arguments:
+ db_path: Path to the SQLite database file.
+ key: The migration key.
+ to_file_handle_id: The destination file handle ID.
+ """
+ import sqlite3
+
+ with sqlite3.connect(db_path) as conn:
+ cursor = conn.cursor()
+
+ update_sql = """
+ UPDATE migrations SET status = ?, to_file_handle_id = ?
+ WHERE id = ? AND type = ?
+ """
+ params = [
+ MigrationStatus.MIGRATED.value,
+ to_file_handle_id,
+ key.id,
+ key.type.value,
+ ]
+
+ if key.version is not None:
+ update_sql += " AND version = ?"
+ params.append(key.version)
+ else:
+ update_sql += " AND version IS NULL"
+
+ if key.row_id is not None:
+ update_sql += " AND row_id = ?"
+ params.append(key.row_id)
+
+ if key.col_id is not None:
+ update_sql += " AND col_id = ?"
+ params.append(key.col_id)
+
+ cursor.execute(update_sql, tuple(params))
+ conn.commit()
+
+
+def _update_migration_error(
+ db_path: str,
+ key: MigrationKey,
+ exception: Exception,
+) -> None:
+ """Update a migration entry with an error.
+
+ Arguments:
+ db_path: Path to the SQLite database file.
+ key: The migration key.
+ exception: The exception that occurred.
+ """
+ import sqlite3
+
+ tb_str = "".join(
+ traceback.format_exception(type(exception), exception, exception.__traceback__)
+ )
+
+ with sqlite3.connect(db_path) as conn:
+ cursor = conn.cursor()
+
+ update_sql = """
+ UPDATE migrations SET status = ?, exception = ?
+ WHERE id = ? AND type = ?
+ """
+ params = [MigrationStatus.ERRORED.value, tb_str, key.id, key.type.value]
+
+ if key.version is not None:
+ update_sql += " AND version = ?"
+ params.append(key.version)
+ else:
+ update_sql += " AND version IS NULL"
+
+ if key.row_id is not None:
+ update_sql += " AND row_id = ?"
+ params.append(key.row_id)
+
+ if key.col_id is not None:
+ update_sql += " AND col_id = ?"
+ params.append(key.col_id)
+
+ cursor.execute(update_sql, tuple(params))
+ conn.commit()
+
+
+def _confirm_migration(
+ db_path: str, dest_storage_location_id: str, force: bool
+) -> bool:
+ """Confirm migration with user if in interactive mode.
+
+ Arguments:
+ db_path: Path to the SQLite database file.
+ dest_storage_location_id: Destination storage location ID.
+ force: Whether to skip confirmation.
+
+ Returns:
+ True if migration should proceed, False otherwise.
+ """
+ import sqlite3
+
+ if force:
+ return True
+
+ with sqlite3.connect(db_path) as conn:
+ cursor = conn.cursor()
+ count = cursor.execute(
+ "SELECT count(*) FROM migrations WHERE status = ?",
+ (MigrationStatus.INDEXED.value,),
+ ).fetchone()[0]
+
+ if count == 0:
+ logger.info("No items for migration.")
+ return False
+
+ if sys.stdout.isatty():
+ user_input = input(
+ f"{count} items for migration to {dest_storage_location_id}. Proceed? (y/n)? "
+ )
+ return user_input.strip().lower() == "y"
+ else:
+ logger.info(
+ "%s items for migration. "
+ "force option not used, and console input not available to confirm migration, aborting. "
+ "Use the force option or run from an interactive shell to proceed with migration.",
+ count,
+ )
+ return False
+
+
+def _get_part_size(file_size: int) -> int:
+ """Calculate the part size for multipart copy.
+
+ Arguments:
+ file_size: The file size in bytes.
+
+ Returns:
+ The part size in bytes.
+ """
+ import math
+
+ # Ensure we don't exceed max parts
+ min_part_size = math.ceil(file_size / MAX_NUMBER_OF_PARTS)
+ return max(DEFAULT_PART_SIZE, min_part_size)
+
+
+# =============================================================================
+# Storage Location Validation
+# =============================================================================
+
+
+async def _verify_storage_location_ownership_async(
+ storage_location_id: str,
+ *,
+ synapse_client: "Synapse",
+) -> None:
+ """Verify the user owns the destination storage location.
+
+ Arguments:
+ storage_location_id: The storage location ID to verify.
+ synapse_client: The Synapse client.
+
+ Raises:
+ ValueError: If the user does not own the storage location.
+ """
+ try:
+ await synapse_client.rest_get_async(f"/storageLocation/{storage_location_id}")
+ except Exception as ex:
+ raise ValueError(
+ f"Unable to verify ownership of storage location {storage_location_id}. "
+ f"You must be the creator of the destination storage location. Error: {ex}"
+ ) from ex
+
+
+def _include_file_in_migration(
+ file_handle: Dict[str, Any],
+ source_storage_location_ids: List[str],
+ dest_storage_location_id: str,
+) -> Optional[MigrationStatus]:
+ """Determine if a file should be included in migration.
+
+ Only S3 file handles can be migrated. External URLs and other file handle types
+ are skipped.
+
+ Arguments:
+ file_handle: The file handle metadata.
+ source_storage_location_ids: List of source storage locations to filter.
+ dest_storage_location_id: Destination storage location ID.
+
+ Returns:
+ MigrationStatus if file should be included, None otherwise.
+ """
+ # Only S3 file handles can be migrated
+ if file_handle.get("concreteType") != concrete_types.S3_FILE_HANDLE:
+ return None
+
+ from_storage_location_id = str(file_handle.get("storageLocationId", 1))
+
+ # Check if file matches the migration criteria:
+ # - If source_storage_location_ids is specified, from_storage_location must be in it
+ # OR already at the destination
+ # - If not specified, include all files not already at destination
+ if source_storage_location_ids:
+ if (
+ from_storage_location_id not in source_storage_location_ids
+ and from_storage_location_id != dest_storage_location_id
+ ):
+ return None
+
+ # Already at destination - mark as already migrated
+ if from_storage_location_id == dest_storage_location_id:
+ return MigrationStatus.ALREADY_MIGRATED
+
+ return MigrationStatus.INDEXED
+
+
+# =============================================================================
+# Public API Functions
+# =============================================================================
+
+
+async def index_files_for_migration_async(
+ entity_id: str,
+ dest_storage_location_id: str,
+ db_path: Optional[str] = None,
+ *,
+ source_storage_location_ids: Optional[List[str]] = None,
+ file_version_strategy: str = "new",
+ include_table_files: bool = False,
+ continue_on_error: bool = False,
+ synapse_client: Optional["Synapse"] = None,
+) -> MigrationResult:
+ """Index files for migration to a new storage location.
+
+ This is the first step in migrating files to a new storage location.
+ After indexing, use `migrate_indexed_files_async` to perform the actual migration.
+
+ Arguments:
+ entity_id: The Synapse entity ID to migrate (Project, Folder, File, or Table).
+ dest_storage_location_id: The destination storage location ID.
+ db_path: Path to create SQLite database. If None, uses temp directory.
+ source_storage_location_ids: Optional list of source storage locations to filter.
+ file_version_strategy: Strategy for file versions: "new", "all", "latest", "skip".
+ include_table_files: Whether to include files attached to tables.
+ continue_on_error: Whether to continue on individual errors.
+ synapse_client: Optional Synapse client instance.
+
+ Returns:
+ MigrationResult object for inspecting the index.
+ """
+ from synapseclient import Synapse
+
+ client = Synapse.get_client(synapse_client=synapse_client)
+
+ # Validate parameters
+ valid_strategies = {"new", "all", "latest", "skip"}
+ if file_version_strategy not in valid_strategies:
+ raise ValueError(
+ f"Invalid file_version_strategy: {file_version_strategy}, "
+ f"must be one of {valid_strategies}"
+ )
+
+ if file_version_strategy == "skip" and not include_table_files:
+ raise ValueError(
+ "Skipping both file entities and table attached files, nothing to migrate"
+ )
+
+ # Convert to strings
+ dest_storage_location_id = str(dest_storage_location_id)
+ source_storage_location_ids = [str(s) for s in (source_storage_location_ids or [])]
+
+ # Verify ownership
+ await _verify_storage_location_ownership_async(
+ storage_location_id=dest_storage_location_id,
+ synapse_client=client,
+ )
+
+ # Create database path if not provided
+ if db_path is None:
+ db_path = _get_default_db_path(entity_id)
+
+ # Initialize database
+ await asyncio.to_thread(
+ _initialize_database,
+ db_path,
+ entity_id,
+ dest_storage_location_id,
+ source_storage_location_ids,
+ file_version_strategy,
+ include_table_files,
+ )
+
+ # Get entity and start indexing
+ entity = await client.get_async(entity_id, downloadFile=False)
+
+ try:
+ await _index_entity_async(
+ entity=entity,
+ parent_id=None,
+ db_path=db_path,
+ dest_storage_location_id=dest_storage_location_id,
+ source_storage_location_ids=source_storage_location_ids,
+ file_version_strategy=file_version_strategy,
+ include_table_files=include_table_files,
+ continue_on_error=continue_on_error,
+ synapse_client=client,
+ )
+ except IndexingError as ex:
+ logger.exception(
+ "Aborted due to failure to index entity %s of type %s. "
+ "Use continue_on_error=True to skip individual failures.",
+ ex.entity_id,
+ ex.concrete_type,
+ )
+ raise ex
+
+ return MigrationResult(db_path=db_path, synapse_client=client)
+
+
+async def migrate_indexed_files_async(
+ db_path: str,
+ *,
+ create_table_snapshots: bool = True,
+ continue_on_error: bool = False,
+ force: bool = False,
+ max_concurrent_copies: Optional[int] = None,
+ synapse_client: Optional["Synapse"] = None,
+) -> Optional[MigrationResult]:
+ """Migrate files that have been indexed.
+
+ This is the second step in migrating files to a new storage location.
+ Files must first be indexed using `index_files_for_migration_async`.
+
+ Arguments:
+ db_path: Path to SQLite database created by index_files_for_migration_async.
+ create_table_snapshots: Whether to create table snapshots before migrating.
+ continue_on_error: Whether to continue on individual migration errors.
+ force: Whether to skip interactive confirmation.
+ max_concurrent_copies: Maximum concurrent file copy operations.
+ synapse_client: Optional Synapse client instance.
+
+ Returns:
+ MigrationResult object or None if migration was aborted.
+ """
+ from synapseclient import Synapse
+
+ client = Synapse.get_client(synapse_client=synapse_client)
+
+ # Retrieve settings
+ settings = await asyncio.to_thread(_retrieve_index_settings, db_path)
+ if settings is None:
+ raise ValueError(
+ f"Unable to retrieve existing index settings from '{db_path}'. "
+ "Either this path does not represent a previously created migration index "
+ "or the file is corrupt."
+ )
+
+ dest_storage_location_id = settings["dest_storage_location_id"]
+
+ # Confirm migration
+ confirmed = await asyncio.to_thread(
+ _confirm_migration, db_path, dest_storage_location_id, force
+ )
+ if not confirmed:
+ logger.info("Migration aborted.")
+ return None
+
+ # Determine concurrency
+ max_concurrent = max_concurrent_copies or max(client.max_threads // 2, 1)
+
+ # Execute migration
+ await _execute_migration_async(
+ db_path=db_path,
+ dest_storage_location_id=dest_storage_location_id,
+ create_table_snapshots=create_table_snapshots,
+ continue_on_error=continue_on_error,
+ max_concurrent=max_concurrent,
+ synapse_client=client,
+ )
+
+ return MigrationResult(db_path=db_path, synapse_client=client)
+
+
+# =============================================================================
+# Indexing Implementation
+# =============================================================================
+
+
+async def _index_entity_async(
+ entity: Any,
+ parent_id: Optional[str],
+ db_path: str,
+ dest_storage_location_id: str,
+ source_storage_location_ids: List[str],
+ file_version_strategy: str,
+ include_table_files: bool,
+ continue_on_error: bool,
+ *,
+ synapse_client: "Synapse",
+) -> None:
+ """Recursively index an entity and its children.
+
+ Arguments:
+ entity: The Synapse entity object.
+ parent_id: The parent entity ID.
+ db_path: Path to the SQLite database.
+ dest_storage_location_id: Destination storage location ID.
+ source_storage_location_ids: List of source storage locations to filter.
+ file_version_strategy: Strategy for file versions.
+ include_table_files: Whether to include table-attached files.
+ continue_on_error: Whether to continue on errors.
+ synapse_client: The Synapse client.
+ """
+ entity_id = utils.id_of(entity)
+ concrete_type = utils.concrete_type_of(entity)
+
+ # Check if already indexed
+ is_indexed = await asyncio.to_thread(_check_indexed, db_path, entity_id)
+ if is_indexed:
+ return
+
+ try:
+ if concrete_type == concrete_types.FILE_ENTITY:
+ if file_version_strategy != "skip":
+ await _index_file_entity_async(
+ entity_id=entity_id,
+ parent_id=parent_id,
+ db_path=db_path,
+ dest_storage_location_id=dest_storage_location_id,
+ source_storage_location_ids=source_storage_location_ids,
+ file_version_strategy=file_version_strategy,
+ synapse_client=synapse_client,
+ )
+
+ elif concrete_type == concrete_types.TABLE_ENTITY:
+ if include_table_files:
+ await _index_table_entity_async(
+ entity_id=entity_id,
+ parent_id=parent_id,
+ db_path=db_path,
+ dest_storage_location_id=dest_storage_location_id,
+ source_storage_location_ids=source_storage_location_ids,
+ synapse_client=synapse_client,
+ )
+
+ elif concrete_type in (
+ concrete_types.FOLDER_ENTITY,
+ concrete_types.PROJECT_ENTITY,
+ ):
+ await _index_container_async(
+ entity_id=entity_id,
+ parent_id=parent_id,
+ db_path=db_path,
+ concrete_type=concrete_type,
+ dest_storage_location_id=dest_storage_location_id,
+ source_storage_location_ids=source_storage_location_ids,
+ file_version_strategy=file_version_strategy,
+ include_table_files=include_table_files,
+ continue_on_error=continue_on_error,
+ synapse_client=synapse_client,
+ )
+
+ except IndexingError:
+ raise
+ except Exception as ex:
+ if continue_on_error:
+ logger.warning("Error indexing entity %s: %s", entity_id, ex)
+ await asyncio.to_thread(
+ _record_indexing_error, db_path, entity_id, parent_id, ex
+ )
+ else:
+ raise IndexingError(entity_id, concrete_type) from ex
+
+
+async def _index_file_entity_async(
+ entity_id: str,
+ parent_id: Optional[str],
+ db_path: str,
+ dest_storage_location_id: str,
+ source_storage_location_ids: List[str],
+ file_version_strategy: str,
+ *,
+ synapse_client: "Synapse",
+) -> None:
+ """Index a file entity for migration.
+
+ Arguments:
+ entity_id: The file entity ID.
+ parent_id: The parent entity ID.
+ db_path: Path to the SQLite database.
+ dest_storage_location_id: Destination storage location ID.
+ source_storage_location_ids: List of source storage locations to filter.
+ file_version_strategy: Strategy for file versions.
+ synapse_client: The Synapse client.
+ """
+ logger.info("Indexing file entity %s", entity_id)
+
+ entity_versions: List[Tuple[Any, Optional[int]]] = []
+
+ if file_version_strategy == "new":
+ entity = await synapse_client.get_async(entity_id, downloadFile=False)
+ entity_versions.append((entity, None))
+
+ elif file_version_strategy == "all":
+ # Get all versions
+ async for version in _get_version_numbers_async(entity_id, synapse_client):
+ entity = await synapse_client.get_async(
+ entity_id, version=version, downloadFile=False
+ )
+ entity_versions.append((entity, version))
+
+ elif file_version_strategy == "latest":
+ entity = await synapse_client.get_async(entity_id, downloadFile=False)
+ entity_versions.append((entity, entity.versionNumber))
+
+ for entity, version in entity_versions:
+ file_handle = entity._file_handle
+ status = _include_file_in_migration(
+ file_handle, source_storage_location_ids, dest_storage_location_id
+ )
+ if status:
+ await asyncio.to_thread(
+ _insert_file_migration,
+ db_path,
+ entity_id,
+ version,
+ parent_id,
+ file_handle["storageLocationId"],
+ entity.dataFileHandleId,
+ file_handle["contentSize"],
+ status,
+ )
+
+
+async def _get_version_numbers_async(
+ entity_id: str,
+ synapse_client: "Synapse",
+) -> AsyncGenerator[int, None]:
+ """Get all version numbers for an entity.
+
+ Arguments:
+ entity_id: The entity ID.
+ synapse_client: The Synapse client.
+
+ Yields:
+ Version numbers.
+ """
+ offset = 0
+ limit = 100
+
+ while True:
+ response = await synapse_client.rest_get_async(
+ f"/entity/{entity_id}/version?offset={offset}&limit={limit}"
+ )
+ results = response.get("results", [])
+
+ for version_info in results:
+ yield version_info["versionNumber"]
+
+ if len(results) < limit:
+ break
+ offset += limit
+
+
+async def _index_table_entity_async(
+ entity_id: str,
+ parent_id: Optional[str],
+ db_path: str,
+ dest_storage_location_id: str,
+ source_storage_location_ids: List[str],
+ *,
+ synapse_client: "Synapse",
+) -> None:
+ """Index a table entity's file attachments for migration.
+
+ Arguments:
+ entity_id: The table entity ID.
+ parent_id: The parent entity ID.
+ db_path: Path to the SQLite database.
+ dest_storage_location_id: Destination storage location ID.
+ source_storage_location_ids: List of source storage locations to filter.
+ synapse_client: The Synapse client.
+ """
+ logger.info("Indexing table entity %s", entity_id)
+
+ # Get file handle columns using the async API
+ columns = await get_columns(table_id=entity_id, synapse_client=synapse_client)
+ file_handle_columns = [c for c in columns if c.column_type == "FILEHANDLEID"]
+
+ if not file_handle_columns:
+ return
+
+ # Query table for file handles using local helper
+ file_column_select = _join_column_names(file_handle_columns)
+
+ # tableQuery is still a synchronous method on the Synapse client
+ results = await asyncio.to_thread(
+ synapse_client.tableQuery,
+ f"SELECT {file_column_select} FROM {entity_id}",
+ )
+
+ for row in results:
+ row_id, row_version = row[:2]
+ file_handle_ids = row[2:]
+
+ for i, file_handle_id in enumerate(file_handle_ids):
+ if not file_handle_id:
+ continue
+
+ col_id = file_handle_columns[i].id
+
+ # Get file handle metadata using the async API
+ fh_response = await get_file_handle_for_download_async(
+ file_handle_id=str(file_handle_id),
+ synapse_id=entity_id,
+ entity_type="TableEntity",
+ synapse_client=synapse_client,
+ )
+ file_handle = fh_response["fileHandle"]
+
+ status = _include_file_in_migration(
+ file_handle, source_storage_location_ids, dest_storage_location_id
+ )
+ if status:
+ await asyncio.to_thread(
+ _insert_table_file_migration,
+ db_path,
+ entity_id,
+ row_id,
+ int(col_id),
+ row_version,
+ parent_id,
+ file_handle["storageLocationId"],
+ file_handle_id,
+ file_handle["contentSize"],
+ status,
+ )
+
+
+async def _index_container_async(
+ entity_id: str,
+ parent_id: Optional[str],
+ db_path: str,
+ concrete_type: str,
+ dest_storage_location_id: str,
+ source_storage_location_ids: List[str],
+ file_version_strategy: str,
+ include_table_files: bool,
+ continue_on_error: bool,
+ *,
+ synapse_client: "Synapse",
+) -> None:
+ """Index a container (Project or Folder) and its children.
+
+ Arguments:
+ entity_id: The container entity ID.
+ parent_id: The parent entity ID.
+ db_path: Path to the SQLite database.
+ concrete_type: The concrete type of the container.
+ dest_storage_location_id: Destination storage location ID.
+ source_storage_location_ids: List of source storage locations to filter.
+ file_version_strategy: Strategy for file versions.
+ include_table_files: Whether to include table-attached files.
+ continue_on_error: Whether to continue on errors.
+ synapse_client: The Synapse client.
+ """
+ logger.info("Indexing container %s", entity_id)
+
+ # Determine included types
+ include_types = []
+ if file_version_strategy != "skip":
+ include_types.extend(["folder", "file"])
+ if include_table_files:
+ include_types.append("table")
+
+ # Get children using the async API
+ children = []
+ async for child in get_children(
+ parent=entity_id,
+ include_types=include_types,
+ synapse_client=synapse_client,
+ ):
+ children.append(child)
+
+ # Use bounded concurrency for indexing children
+ semaphore = asyncio.Semaphore(10)
+
+ async def index_child(child: Dict[str, Any]) -> None:
+ async with semaphore:
+ child_entity = await synapse_client.get_async(
+ child["id"], downloadFile=False
+ )
+ await _index_entity_async(
+ entity=child_entity,
+ parent_id=entity_id,
+ db_path=db_path,
+ dest_storage_location_id=dest_storage_location_id,
+ source_storage_location_ids=source_storage_location_ids,
+ file_version_strategy=file_version_strategy,
+ include_table_files=include_table_files,
+ continue_on_error=continue_on_error,
+ synapse_client=synapse_client,
+ )
+
+ # Process children with as_completed for progress tracking
+ tasks = [asyncio.create_task(index_child(child)) for child in children]
+ for task in asyncio.as_completed(tasks):
+ await task
+
+ # Mark container as indexed
+ migration_type = (
+ MigrationType.PROJECT
+ if concrete_type == concrete_types.PROJECT_ENTITY
+ else MigrationType.FOLDER
+ )
+ await asyncio.to_thread(
+ _mark_container_indexed, db_path, entity_id, parent_id, migration_type
+ )
+
+
+# =============================================================================
+# Migration Execution
+# =============================================================================
+
+
+async def _execute_migration_async(
+ db_path: str,
+ dest_storage_location_id: str,
+ create_table_snapshots: bool,
+ continue_on_error: bool,
+ max_concurrent: int,
+ *,
+ synapse_client: "Synapse",
+) -> None:
+ """Execute the actual file migration.
+
+ Arguments:
+ db_path: Path to the SQLite database.
+ dest_storage_location_id: Destination storage location ID.
+ create_table_snapshots: Whether to create table snapshots.
+ continue_on_error: Whether to continue on errors.
+ max_concurrent: Maximum concurrent operations.
+ synapse_client: The Synapse client.
+ """
+ pending_file_handles: Set[str] = set()
+ completed_file_handles: Set[str] = set()
+ pending_keys: Set[MigrationKey] = set()
+ table_snapshots_created: Set[str] = set()
+
+ semaphore = asyncio.Semaphore(max_concurrent)
+ active_tasks: Set[asyncio.Task] = set()
+
+ last_id = ""
+ last_version = -1
+ last_row_id = -1
+ last_col_id = -1
+
+ while True:
+ # Query next batch
+ batch = await asyncio.to_thread(
+ _query_migration_batch,
+ db_path,
+ last_id,
+ last_version,
+ last_row_id,
+ last_col_id,
+ pending_file_handles,
+ completed_file_handles,
+ min(BATCH_SIZE, max_concurrent - len(active_tasks)),
+ )
+
+ if not batch and not active_tasks:
+ break
+
+ # Process batch items
+ for item in batch:
+ key = MigrationKey(
+ id=item["id"],
+ type=item["type"],
+ version=item["version"],
+ row_id=item["row_id"],
+ col_id=item["col_id"],
+ )
+
+ if key in pending_keys:
+ continue
+
+ pending_keys.add(key)
+ from_file_handle_id = item["from_file_handle_id"]
+
+ # Check for existing copy
+ to_file_handle_id = await asyncio.to_thread(
+ _check_file_handle_exists, db_path, from_file_handle_id
+ )
+
+ if not to_file_handle_id:
+ pending_file_handles.add(from_file_handle_id)
+
+ # Create table snapshot if needed using the async API
+ if (
+ item["type"] == MigrationType.TABLE_ATTACHED_FILE
+ and create_table_snapshots
+ and item["id"] not in table_snapshots_created
+ ):
+ await create_table_snapshot(
+ table_id=item["id"],
+ synapse_client=synapse_client,
+ )
+ table_snapshots_created.add(item["id"])
+
+ # Create migration task
+ task = asyncio.create_task(
+ _migrate_item_async(
+ key=key,
+ from_file_handle_id=from_file_handle_id,
+ to_file_handle_id=to_file_handle_id,
+ file_size=item["file_size"] or 0,
+ dest_storage_location_id=dest_storage_location_id,
+ semaphore=semaphore,
+ synapse_client=synapse_client,
+ )
+ )
+ active_tasks.add(task)
+
+ # Update tracking for next batch
+ last_id = item["id"]
+ last_version = item["version"] if item["version"] is not None else -1
+ last_row_id = item["row_id"] if item["row_id"] is not None else -1
+ last_col_id = item["col_id"] if item["col_id"] is not None else -1
+
+ # Wait for tasks if at capacity or end of batch
+ if active_tasks and (
+ len(active_tasks) >= max_concurrent or len(batch) < BATCH_SIZE
+ ):
+ done, active_tasks = await asyncio.wait(
+ active_tasks,
+ return_when=asyncio.FIRST_COMPLETED,
+ )
+
+ for completed_task in done:
+ try:
+ result = completed_task.result()
+ key = result["key"]
+ from_fh_id = result["from_file_handle_id"]
+ to_fh_id = result["to_file_handle_id"]
+
+ # Update database
+ await asyncio.to_thread(
+ _update_migration_success, db_path, key, to_fh_id
+ )
+
+ completed_file_handles.add(from_fh_id)
+ pending_file_handles.discard(from_fh_id)
+ pending_keys.discard(key)
+
+ except Exception as ex:
+ if hasattr(ex, "key"):
+ key = ex.key
+ await asyncio.to_thread(
+ _update_migration_error, db_path, key, ex.__cause__ or ex
+ )
+ pending_keys.discard(key)
+
+ if not continue_on_error:
+ # Cancel remaining tasks
+ for task in active_tasks:
+ task.cancel()
+ raise
+
+ # Wait for any remaining tasks
+ if active_tasks:
+ done, _ = await asyncio.wait(active_tasks)
+ for completed_task in done:
+ try:
+ result = completed_task.result()
+ await asyncio.to_thread(
+ _update_migration_success,
+ db_path,
+ result["key"],
+ result["to_file_handle_id"],
+ )
+ except Exception as ex:
+ if hasattr(ex, "key"):
+ await asyncio.to_thread(
+ _update_migration_error, db_path, ex.key, ex.__cause__ or ex
+ )
+ if not continue_on_error:
+ raise
+
+
+async def _migrate_item_async(
+ key: MigrationKey,
+ from_file_handle_id: str,
+ to_file_handle_id: Optional[str],
+ file_size: int,
+ dest_storage_location_id: str,
+ semaphore: asyncio.Semaphore,
+ *,
+ synapse_client: "Synapse",
+) -> Dict[str, Any]:
+ """Migrate a single item.
+
+ Arguments:
+ key: The migration key.
+ from_file_handle_id: Source file handle ID.
+ to_file_handle_id: Destination file handle ID (if already copied).
+ file_size: File size in bytes.
+ dest_storage_location_id: Destination storage location ID.
+ semaphore: Concurrency semaphore.
+ synapse_client: The Synapse client.
+
+ Returns:
+ Dictionary with key, from_file_handle_id, to_file_handle_id.
+ """
+ async with semaphore:
+ try:
+ # Copy file handle if needed
+ if not to_file_handle_id:
+ source_association = {
+ "fileHandleId": from_file_handle_id,
+ "associateObjectId": key.id,
+ "associateObjectType": (
+ "FileEntity"
+ if key.type == MigrationType.FILE
+ else "TableEntity"
+ ),
+ }
+
+ # Use thread for multipart_copy (it uses threading internally)
+ to_file_handle_id = await asyncio.to_thread(
+ multipart_copy,
+ synapse_client,
+ source_association,
+ dest_storage_location_id,
+ part_size=_get_part_size(file_size),
+ )
+
+ # Update entity with new file handle
+ if key.type == MigrationType.FILE:
+ if key.version is None:
+ await _create_new_file_version_async(
+ entity_id=key.id,
+ to_file_handle_id=to_file_handle_id,
+ synapse_client=synapse_client,
+ )
+ else:
+ await _update_file_version_async(
+ entity_id=key.id,
+ version=key.version,
+ from_file_handle_id=from_file_handle_id,
+ to_file_handle_id=to_file_handle_id,
+ synapse_client=synapse_client,
+ )
+ elif key.type == MigrationType.TABLE_ATTACHED_FILE:
+ await _update_table_file_async(
+ entity_id=key.id,
+ row_id=key.row_id,
+ col_id=key.col_id,
+ to_file_handle_id=to_file_handle_id,
+ synapse_client=synapse_client,
+ )
+
+ return {
+ "key": key,
+ "from_file_handle_id": from_file_handle_id,
+ "to_file_handle_id": to_file_handle_id,
+ }
+
+ except Exception as ex:
+ error = MigrationError(key, from_file_handle_id, to_file_handle_id)
+ error.__cause__ = ex
+ raise error
+
+
+async def _create_new_file_version_async(
+ entity_id: str,
+ to_file_handle_id: str,
+ *,
+ synapse_client: "Synapse",
+) -> None:
+ """Create a new version of a file entity with the new file handle.
+
+ Arguments:
+ entity_id: The file entity ID.
+ to_file_handle_id: The new file handle ID.
+ synapse_client: The Synapse client.
+ """
+ entity = await synapse_client.get_async(entity_id, downloadFile=False)
+ entity.dataFileHandleId = to_file_handle_id
+ await synapse_client.store_async(entity)
+
+
+async def _update_file_version_async(
+ entity_id: str,
+ version: int,
+ from_file_handle_id: str,
+ to_file_handle_id: str,
+ *,
+ synapse_client: "Synapse",
+) -> None:
+ """Update an existing file version's file handle.
+
+ Arguments:
+ entity_id: The file entity ID.
+ version: The version number.
+ from_file_handle_id: The original file handle ID.
+ to_file_handle_id: The new file handle ID.
+ synapse_client: The Synapse client.
+ """
+ await synapse_client.rest_put_async(
+ f"/entity/{entity_id}/version/{version}/filehandle",
+ body=json.dumps(
+ {
+ "oldFileHandleId": from_file_handle_id,
+ "newFileHandleId": to_file_handle_id,
+ }
+ ),
+ )
+
+
+async def _update_table_file_async(
+ entity_id: str,
+ row_id: int,
+ col_id: int,
+ to_file_handle_id: str,
+ *,
+ synapse_client: "Synapse",
+) -> None:
+ """Update a table cell with a new file handle.
+
+ Arguments:
+ entity_id: The table entity ID.
+ row_id: The row ID.
+ col_id: The column ID.
+ to_file_handle_id: The new file handle ID.
+ synapse_client: The Synapse client.
+ """
+ # Create the partial row update using new OOP models
+ partial_row = PartialRow(
+ row_id=str(row_id),
+ values=[{"key": str(col_id), "value": to_file_handle_id}],
+ )
+ partial_row_set = PartialRowSet(
+ table_id=entity_id,
+ rows=[partial_row],
+ )
+ appendable_request = AppendableRowSetRequest(
+ entity_id=entity_id,
+ to_append=partial_row_set,
+ )
+
+ # Execute the update using TableUpdateTransaction
+ transaction = TableUpdateTransaction(
+ entity_id=entity_id,
+ changes=[appendable_request],
+ )
+ await transaction.send_job_and_wait_async(synapse_client=synapse_client)
diff --git a/synapseclient/models/services/migration_types.py b/synapseclient/models/services/migration_types.py
new file mode 100644
index 000000000..a20cc008d
--- /dev/null
+++ b/synapseclient/models/services/migration_types.py
@@ -0,0 +1,371 @@
+"""
+Data classes and enums for the async migration service.
+
+These types are used to track the state of file migrations between storage locations.
+"""
+
+import asyncio
+import csv
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional
+
+from synapseclient.core.constants import concrete_types
+
+if TYPE_CHECKING:
+ from synapseclient import Synapse
+
+
+class MigrationStatus(Enum):
+ """Status of a migration entry in the tracking database."""
+
+ INDEXED = 1
+ """The file has been indexed and is ready to be migrated."""
+
+ MIGRATED = 2
+ """The file has been successfully migrated to the new storage location."""
+
+ ALREADY_MIGRATED = 3
+ """The file was already at the destination storage location."""
+
+ ERRORED = 4
+ """An error occurred during indexing or migration."""
+
+
+class MigrationType(Enum):
+ """Type of entity being tracked in the migration database."""
+
+ PROJECT = 1
+ """A project container (used for tracking indexed containers)."""
+
+ FOLDER = 2
+ """A folder container (used for tracking indexed containers)."""
+
+ FILE = 3
+ """A file entity."""
+
+ TABLE_ATTACHED_FILE = 4
+ """A file attached to a table column."""
+
+ @classmethod
+ def from_concrete_type(cls, concrete_type: str) -> "MigrationType":
+ """Convert a Synapse concrete type string to a MigrationType.
+
+ Arguments:
+ concrete_type: The concrete type string from Synapse API.
+
+ Returns:
+ The corresponding MigrationType enum value.
+
+ Raises:
+ ValueError: If the concrete type is not recognized.
+ """
+ if concrete_type == concrete_types.PROJECT_ENTITY:
+ return cls.PROJECT
+ elif concrete_type == concrete_types.FOLDER_ENTITY:
+ return cls.FOLDER
+ elif concrete_type == concrete_types.FILE_ENTITY:
+ return cls.FILE
+ elif concrete_type == concrete_types.TABLE_ENTITY:
+ return cls.TABLE_ATTACHED_FILE
+
+ raise ValueError(f"Unhandled concrete type: {concrete_type}")
+
+
+@dataclass
+class MigrationKey:
+ """Unique identifier for a migration entry in the tracking database.
+
+ Attributes:
+ id: The Synapse entity ID.
+ type: The type of entity being migrated.
+ version: The file version number (None for new versions or containers).
+ row_id: The table row ID (for table attached files).
+ col_id: The table column ID (for table attached files).
+ """
+
+ id: str
+ type: MigrationType
+ version: Optional[int] = None
+ row_id: Optional[int] = None
+ col_id: Optional[int] = None
+
+ def __hash__(self) -> int:
+ return hash((self.id, self.type, self.version, self.row_id, self.col_id))
+
+ def __eq__(self, other: object) -> bool:
+ if not isinstance(other, MigrationKey):
+ return False
+ return (
+ self.id == other.id
+ and self.type == other.type
+ and self.version == other.version
+ and self.row_id == other.row_id
+ and self.col_id == other.col_id
+ )
+
+
+@dataclass
+class MigrationEntry:
+ """A single migration entry with full details.
+
+ Attributes:
+ key: The unique identifier for this migration entry.
+ parent_id: The parent entity ID.
+ from_storage_location_id: The original storage location ID.
+ from_file_handle_id: The original file handle ID.
+ to_file_handle_id: The new file handle ID after migration.
+ file_size: The file size in bytes.
+ status: The current migration status.
+ exception: Stack trace if an error occurred.
+ """
+
+ key: MigrationKey
+ parent_id: Optional[str] = None
+ from_storage_location_id: Optional[int] = None
+ from_file_handle_id: Optional[str] = None
+ to_file_handle_id: Optional[str] = None
+ file_size: Optional[int] = None
+ status: MigrationStatus = MigrationStatus.INDEXED
+ exception: Optional[str] = None
+
+
+@dataclass
+class MigrationSettings:
+ """Settings for a migration index stored in the database.
+
+ Attributes:
+ root_id: The root entity ID being migrated.
+ dest_storage_location_id: The destination storage location ID.
+ source_storage_location_ids: List of source storage location IDs to filter.
+ file_version_strategy: Strategy for handling file versions.
+ include_table_files: Whether to include files attached to tables.
+ """
+
+ root_id: str
+ dest_storage_location_id: str
+ source_storage_location_ids: List[str] = field(default_factory=list)
+ file_version_strategy: str = "new"
+ include_table_files: bool = False
+
+
+@dataclass
+class MigrationResult:
+ """Result of a migration operation - proxy to the SQLite tracking database.
+
+ This class provides methods to query the migration database for status counts,
+ individual migration entries, and CSV export.
+
+ Attributes:
+ db_path: Path to the SQLite database file.
+ synapse_client: Optional Synapse client for column name lookups.
+ """
+
+ db_path: str
+ synapse_client: Optional["Synapse"] = None
+
+ @property
+ def counts_by_status(self) -> Dict[str, int]:
+ """Get counts by migration status (synchronous).
+
+ Returns:
+ Dictionary mapping status names to counts.
+ """
+ return self.get_counts_by_status()
+
+ def get_counts_by_status(self) -> Dict[str, int]:
+ """Get counts by migration status (synchronous).
+
+ Returns:
+ Dictionary mapping status names to counts.
+ """
+ import sqlite3
+
+ with sqlite3.connect(self.db_path) as conn:
+ cursor = conn.cursor()
+
+ # Only count FILE and TABLE_ATTACHED_FILE entries
+ result = cursor.execute(
+ "SELECT status, count(*) FROM migrations "
+ "WHERE type IN (?, ?) GROUP BY status",
+ (MigrationType.FILE.value, MigrationType.TABLE_ATTACHED_FILE.value),
+ )
+
+ counts = {status.name: 0 for status in MigrationStatus}
+ for row in result:
+ status_value = row[0]
+ count = row[1]
+ counts[MigrationStatus(status_value).name] = count
+
+ return counts
+
+ async def get_counts_by_status_async(self) -> Dict[str, int]:
+ """Get counts by migration status (asynchronous).
+
+ Returns:
+ Dictionary mapping status names to counts.
+ """
+ return await asyncio.to_thread(self.get_counts_by_status)
+
+ def get_migrations(self) -> Iterator[Dict[str, Any]]:
+ """Iterate over all migration entries (synchronous).
+
+ Yields:
+ Dictionary for each migration entry with keys:
+ id, type, version, row_id, col_name, from_storage_location_id,
+ from_file_handle_id, to_file_handle_id, file_size, status, exception.
+ """
+ import sqlite3
+
+ with sqlite3.connect(self.db_path) as conn:
+ cursor = conn.cursor()
+
+ batch_size = 500
+ rowid = -1
+ column_names_cache: Dict[int, str] = {}
+
+ while True:
+ results = cursor.execute(
+ """
+ SELECT
+ rowid,
+ id,
+ type,
+ version,
+ row_id,
+ col_id,
+ from_storage_location_id,
+ from_file_handle_id,
+ to_file_handle_id,
+ file_size,
+ status,
+ exception
+ FROM migrations
+ WHERE
+ rowid > ?
+ AND type IN (?, ?)
+ ORDER BY rowid
+ LIMIT ?
+ """,
+ (
+ rowid,
+ MigrationType.FILE.value,
+ MigrationType.TABLE_ATTACHED_FILE.value,
+ batch_size,
+ ),
+ )
+
+ rows = results.fetchall()
+ if not rows:
+ break
+
+ for row in rows:
+ rowid = row[0]
+ col_id = row[5]
+
+ # Resolve column name if needed
+ col_name = None
+ if col_id is not None and self.synapse_client:
+ if col_id not in column_names_cache:
+ try:
+ col_info = self.synapse_client.restGET(
+ f"/column/{col_id}"
+ )
+ column_names_cache[col_id] = col_info.get("name", "")
+ except Exception:
+ column_names_cache[col_id] = ""
+ col_name = column_names_cache[col_id]
+
+ yield {
+ "id": row[1],
+ "type": (
+ "file" if row[2] == MigrationType.FILE.value else "table"
+ ),
+ "version": row[3],
+ "row_id": row[4],
+ "col_name": col_name,
+ "from_storage_location_id": row[6],
+ "from_file_handle_id": row[7],
+ "to_file_handle_id": row[8],
+ "file_size": row[9],
+ "status": MigrationStatus(row[10]).name,
+ "exception": row[11],
+ }
+
+ async def get_migrations_async(self) -> List[Dict[str, Any]]:
+ """Get all migration entries (asynchronous).
+
+ Returns:
+ List of dictionaries for each migration entry.
+ """
+ # Convert to list since generators can't be returned from to_thread
+ return await asyncio.to_thread(lambda: list(self.get_migrations()))
+
+ def as_csv(self, path: str) -> None:
+ """Export migration results to a CSV file (synchronous).
+
+ Arguments:
+ path: Path to write the CSV file.
+ """
+ fieldnames = [
+ "id",
+ "type",
+ "version",
+ "row_id",
+ "col_name",
+ "from_storage_location_id",
+ "from_file_handle_id",
+ "to_file_handle_id",
+ "file_size",
+ "status",
+ "exception",
+ ]
+
+ with open(path, "w", newline="") as csvfile:
+ writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+ writer.writeheader()
+ for migration in self.get_migrations():
+ writer.writerow(migration)
+
+ async def as_csv_async(self, path: str) -> None:
+ """Export migration results to a CSV file (asynchronous).
+
+ Arguments:
+ path: Path to write the CSV file.
+ """
+ await asyncio.to_thread(self.as_csv, path)
+
+
+class MigrationError(Exception):
+ """Error during a migration operation.
+
+ Attributes:
+ key: The migration key that failed.
+ from_file_handle_id: The source file handle ID.
+ to_file_handle_id: The destination file handle ID (if partially complete).
+ """
+
+ def __init__(
+ self,
+ key: MigrationKey,
+ from_file_handle_id: str,
+ to_file_handle_id: Optional[str] = None,
+ ):
+ self.key = key
+ self.from_file_handle_id = from_file_handle_id
+ self.to_file_handle_id = to_file_handle_id
+ super().__init__(f"Migration failed for {key.id}")
+
+
+class IndexingError(Exception):
+ """Error during an indexing operation.
+
+ Attributes:
+ entity_id: The entity ID that failed to index.
+ concrete_type: The concrete type of the entity.
+ """
+
+ def __init__(self, entity_id: str, concrete_type: str):
+ self.entity_id = entity_id
+ self.concrete_type = concrete_type
+ super().__init__(f"Indexing failed for {entity_id} ({concrete_type})")
diff --git a/synapseclient/models/storage_location.py b/synapseclient/models/storage_location.py
new file mode 100644
index 000000000..664276855
--- /dev/null
+++ b/synapseclient/models/storage_location.py
@@ -0,0 +1,600 @@
+"""StorageLocation model for managing storage location settings in Synapse."""
+
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
+
+from synapseclient import Synapse
+from synapseclient.api.storage_location_services import (
+ create_storage_location_setting,
+ get_storage_location_setting,
+)
+from synapseclient.core.async_utils import async_to_sync, otel_trace_method
+from synapseclient.models.protocols.storage_location_protocol import (
+ StorageLocationSynchronousProtocol,
+)
+
+if TYPE_CHECKING:
+ from synapseclient.models import Folder
+
+
+class StorageLocationType(str, Enum):
+ """Enumeration of storage location types supported by Synapse.
+
+ Each type maps to a specific concreteType suffix in the REST API.
+
+ Attributes:
+ SYNAPSE_S3: Synapse-managed S3 storage (default).
+ EXTERNAL_S3: User-owned Amazon S3 bucket accessed by Synapse.
+ EXTERNAL_GOOGLE_CLOUD: User-owned Google Cloud Storage bucket.
+ EXTERNAL_SFTP: External SFTP server not accessed by Synapse.
+ EXTERNAL_OBJECT_STORE: S3-like bucket (e.g., AWS S3 or OpenStack) not
+ accessed by Synapse.
+ PROXY: A proxy server that controls access to storage.
+ """
+
+ SYNAPSE_S3 = "S3StorageLocationSetting"
+ EXTERNAL_S3 = "ExternalS3StorageLocationSetting"
+ EXTERNAL_GOOGLE_CLOUD = "ExternalGoogleCloudStorageLocationSetting"
+ EXTERNAL_SFTP = "ExternalStorageLocationSetting"
+ EXTERNAL_OBJECT_STORE = "ExternalObjectStorageLocationSetting"
+ PROXY = "ProxyStorageLocationSettings"
+
+
+class UploadType(str, Enum):
+ """Enumeration of upload types for storage locations.
+
+ Attributes:
+ S3: Amazon S3 compatible upload.
+ GOOGLE_CLOUD_STORAGE: Google Cloud Storage upload.
+ SFTP: SFTP upload.
+ HTTPS: HTTPS upload (typically used with proxy storage).
+ NONE: No upload type specified.
+ """
+
+ S3 = "S3"
+ GOOGLE_CLOUD_STORAGE = "GOOGLECLOUDSTORAGE"
+ SFTP = "SFTP"
+ HTTPS = "HTTPS"
+ NONE = "NONE"
+
+
+# Mapping from StorageLocationType to default UploadType
+_STORAGE_TYPE_TO_UPLOAD_TYPE: Dict[StorageLocationType, UploadType] = {
+ StorageLocationType.SYNAPSE_S3: UploadType.S3,
+ StorageLocationType.EXTERNAL_S3: UploadType.S3,
+ StorageLocationType.EXTERNAL_GOOGLE_CLOUD: UploadType.GOOGLE_CLOUD_STORAGE,
+ StorageLocationType.EXTERNAL_SFTP: UploadType.SFTP,
+ StorageLocationType.EXTERNAL_OBJECT_STORE: UploadType.S3,
+ StorageLocationType.PROXY: UploadType.HTTPS,
+}
+
+# Mapping from concreteType suffix to StorageLocationType
+_CONCRETE_TYPE_TO_STORAGE_TYPE: Dict[str, StorageLocationType] = {
+ storage_type.value: storage_type for storage_type in StorageLocationType
+}
+
+
+@dataclass()
+@async_to_sync
+class StorageLocation(StorageLocationSynchronousProtocol):
+ """A storage location setting describes where files are uploaded to and
+ downloaded from via Synapse. Storage location settings may be created for
+ external locations, such as user-owned Amazon S3 buckets, Google Cloud
+ Storage buckets, SFTP servers, or proxy storage.
+
+ Attributes:
+ storage_location_id: (Read Only) The unique ID for this storage location,
+ assigned by the server on creation.
+ storage_type: The type of storage location. Required when creating a new
+ storage location via `store()`. Determines the `concreteType` sent to
+ the Synapse REST API.
+ banner: The banner text to display to a user every time a file is uploaded.
+ This field is optional.
+ description: A description of the storage location. This description is
+ shown when a user has to choose which upload destination to use.
+
+ Attributes:
+ bucket: The name of the S3 or Google Cloud Storage bucket. Applicable to
+ SYNAPSE_S3, EXTERNAL_S3, EXTERNAL_GOOGLE_CLOUD, and
+ EXTERNAL_OBJECT_STORE types.
+ base_key: The optional base key (prefix/folder) within the bucket.
+ Applicable to SYNAPSE_S3, EXTERNAL_S3, and EXTERNAL_GOOGLE_CLOUD types.
+ sts_enabled: Whether STS (AWS Security Token Service) is enabled on this
+ storage location. Applicable to SYNAPSE_S3 and EXTERNAL_S3 types.
+ endpoint_url: The endpoint URL of the S3 service. Applicable to
+ EXTERNAL_S3 (default: https://s3.amazonaws.com) and
+ EXTERNAL_OBJECT_STORE types.
+
+ Attributes:
+ url: The base URL for uploading to the external destination. Applicable to
+ EXTERNAL_SFTP type.
+ supports_subfolders: Whether the destination supports creating subfolders
+ under the base url. Applicable to EXTERNAL_SFTP type. Default: False.
+
+ Attributes:
+ proxy_url: The HTTPS URL of the proxy used for upload and download.
+ Applicable to PROXY type.
+ secret_key: The encryption key used to sign all pre-signed URLs used to
+ communicate with the proxy. Applicable to PROXY type.
+ benefactor_id: An Entity ID (such as a Project ID). When set, any user with
+ the 'create' permission on the given benefactorId will be allowed to
+ create ProxyFileHandle using its storage location ID. Applicable to
+ PROXY type.
+
+ Attributes:
+ upload_type: (Read Only) The upload type for this storage location.
+ Automatically derived from `storage_type`.
+ etag: (Read Only) Synapse employs an Optimistic Concurrency Control (OCC)
+ scheme. The E-Tag changes every time the setting is updated.
+ created_on: (Read Only) The date this storage location setting was created.
+ created_by: (Read Only) The ID of the user that created this storage
+ location setting.
+
+ Example: Creating an external S3 storage location
+ Create a storage location backed by your own S3 bucket:
+
+ from synapseclient.models import StorageLocation, StorageLocationType
+
+ import synapseclient
+ synapseclient.login()
+
+ storage = StorageLocation(
+ storage_type=StorageLocationType.EXTERNAL_S3,
+ bucket="my-external-synapse-bucket",
+ base_key="path/within/bucket",
+ ).store()
+
+ print(f"Storage location ID: {storage.storage_location_id}")
+
+ Example: Creating an STS-enabled S3 storage location with a folder
+ Use the convenience classmethod to create a folder with STS-enabled
+ storage:
+
+ from synapseclient.models import StorageLocation
+
+ import synapseclient
+ synapseclient.login()
+
+ folder, storage = StorageLocation.setup_s3(
+ folder_name="my-sts-folder",
+ parent="syn123",
+ bucket_name="my-external-synapse-bucket",
+ base_key="path/within/bucket",
+ sts_enabled=True,
+ )
+ print(f"Folder: {folder.id}, Storage: {storage.storage_location_id}")
+
+ Example: Creating a Google Cloud storage location
+ Create a storage location backed by your own GCS bucket:
+
+ from synapseclient.models import StorageLocation, StorageLocationType
+
+ import synapseclient
+ synapseclient.login()
+
+ storage = StorageLocation(
+ storage_type=StorageLocationType.EXTERNAL_GOOGLE_CLOUD,
+ bucket="my-gcs-bucket",
+ base_key="path/within/bucket",
+ ).store()
+ """
+
+ # Core fields - present on all storage locations
+ storage_location_id: Optional[int] = None
+ """(Read Only) The unique ID for this storage location, assigned by the server
+ on creation."""
+
+ storage_type: Optional[StorageLocationType] = None
+ """The type of storage location. Required when creating a new storage location
+ via `store()`. Determines the `concreteType` sent to the Synapse REST API."""
+
+ banner: Optional[str] = None
+ """The banner text to display to a user every time a file is uploaded."""
+
+ description: Optional[str] = None
+ """A description of the storage location. This description is shown when a user
+ has to choose which upload destination to use."""
+
+ # S3/GCS specific fields
+ bucket: Optional[str] = None
+ """The name of the S3 or Google Cloud Storage bucket. Applicable to SYNAPSE_S3,
+ EXTERNAL_S3, EXTERNAL_GOOGLE_CLOUD, and EXTERNAL_OBJECT_STORE types."""
+
+ base_key: Optional[str] = None
+ """The optional base key (prefix/folder) within the bucket. Applicable to
+ SYNAPSE_S3, EXTERNAL_S3, and EXTERNAL_GOOGLE_CLOUD types."""
+
+ sts_enabled: Optional[bool] = None
+ """Whether STS (AWS Security Token Service) is enabled on this storage location.
+ Applicable to SYNAPSE_S3 and EXTERNAL_S3 types."""
+
+ endpoint_url: Optional[str] = None
+ """The endpoint URL of the S3 service. Applicable to EXTERNAL_S3
+ (default: https://s3.amazonaws.com) and EXTERNAL_OBJECT_STORE types."""
+
+ # SFTP specific fields
+ url: Optional[str] = None
+ """The base URL for uploading to the external destination. Applicable to
+ EXTERNAL_SFTP type."""
+
+ supports_subfolders: Optional[bool] = None
+ """Whether the destination supports creating subfolders under the base url.
+ Applicable to EXTERNAL_SFTP type. Default: False."""
+
+ # Proxy specific fields
+ proxy_url: Optional[str] = None
+ """The HTTPS URL of the proxy used for upload and download. Applicable to
+ PROXY type."""
+
+ secret_key: Optional[str] = None
+ """The encryption key used to sign all pre-signed URLs used to communicate
+ with the proxy. Applicable to PROXY type."""
+
+ benefactor_id: Optional[str] = None
+ """An Entity ID (such as a Project ID). When set, any user with the 'create'
+ permission on the given benefactorId will be allowed to create ProxyFileHandle
+ using its storage location ID. Applicable to PROXY type."""
+
+ # Read-only fields
+ upload_type: Optional[UploadType] = field(default=None, repr=False, compare=False)
+ """(Read Only) The upload type for this storage location. Automatically derived
+ from `storage_type`."""
+
+ etag: Optional[str] = field(default=None, compare=False)
+ """(Read Only) Synapse employs an Optimistic Concurrency Control (OCC) scheme.
+ The E-Tag changes every time the setting is updated."""
+
+ created_on: Optional[str] = field(default=None, compare=False)
+ """(Read Only) The date this storage location setting was created."""
+
+ created_by: Optional[int] = field(default=None, compare=False)
+ """(Read Only) The ID of the user that created this storage location setting."""
+
+ def fill_from_dict(self, synapse_response: Dict[str, Any]) -> "StorageLocation":
+ """Converts a response from the REST API into this dataclass.
+
+ Arguments:
+ synapse_response: The response from the REST API.
+
+ Returns:
+ The StorageLocation object.
+ """
+ self.storage_location_id = synapse_response.get("storageLocationId", None)
+ self.banner = synapse_response.get("banner", None)
+ self.description = synapse_response.get("description", None)
+ self.etag = synapse_response.get("etag", None)
+ self.created_on = synapse_response.get("createdOn", None)
+ self.created_by = synapse_response.get("createdBy", None)
+
+ # Parse upload type
+ upload_type_str = synapse_response.get("uploadType", None)
+ if upload_type_str:
+ try:
+ self.upload_type = UploadType(upload_type_str)
+ except ValueError:
+ self.upload_type = None
+
+ # Parse storage type from concreteType
+ concrete_type = synapse_response.get("concreteType", "")
+ if concrete_type:
+ # Extract the suffix after the last dot
+ type_suffix = concrete_type.split(".")[-1] if "." in concrete_type else ""
+ if type_suffix in _CONCRETE_TYPE_TO_STORAGE_TYPE:
+ self.storage_type = _CONCRETE_TYPE_TO_STORAGE_TYPE[type_suffix]
+
+ # S3/GCS fields
+ self.bucket = synapse_response.get("bucket", None)
+ self.base_key = synapse_response.get("baseKey", None)
+ self.sts_enabled = synapse_response.get("stsEnabled", None)
+ self.endpoint_url = synapse_response.get("endpointUrl", None)
+
+ # SFTP fields
+ self.url = synapse_response.get("url", None)
+ self.supports_subfolders = synapse_response.get("supportsSubfolders", None)
+
+ # Proxy fields
+ self.proxy_url = synapse_response.get("proxyUrl", None)
+ self.secret_key = synapse_response.get("secretKey", None)
+ self.benefactor_id = synapse_response.get("benefactorId", None)
+
+ return self
+
+ def _to_synapse_request(self) -> Dict[str, Any]:
+ """Convert this dataclass to a request body for the REST API.
+
+ Returns:
+ A dictionary suitable for the REST API.
+ """
+ if not self.storage_type:
+ raise ValueError(
+ "storage_type is required when creating a storage location"
+ )
+
+ # Build the concrete type
+ concrete_type = (
+ f"org.sagebionetworks.repo.model.project.{self.storage_type.value}"
+ )
+
+ # Determine upload type
+ upload_type = self.upload_type or _STORAGE_TYPE_TO_UPLOAD_TYPE.get(
+ self.storage_type, UploadType.S3
+ )
+
+ body: Dict[str, Any] = {
+ "concreteType": concrete_type,
+ "uploadType": upload_type.value,
+ }
+
+ # Add optional common fields
+ if self.banner is not None:
+ body["banner"] = self.banner
+ if self.description is not None:
+ body["description"] = self.description
+
+ # Add type-specific fields
+ if self.storage_type in (
+ StorageLocationType.SYNAPSE_S3,
+ StorageLocationType.EXTERNAL_S3,
+ StorageLocationType.EXTERNAL_GOOGLE_CLOUD,
+ StorageLocationType.EXTERNAL_OBJECT_STORE,
+ ):
+ if self.bucket is not None:
+ body["bucket"] = self.bucket
+ if self.base_key is not None:
+ body["baseKey"] = self.base_key
+
+ if self.storage_type in (
+ StorageLocationType.SYNAPSE_S3,
+ StorageLocationType.EXTERNAL_S3,
+ ):
+ if self.sts_enabled is not None:
+ body["stsEnabled"] = self.sts_enabled
+
+ if self.storage_type in (
+ StorageLocationType.EXTERNAL_S3,
+ StorageLocationType.EXTERNAL_OBJECT_STORE,
+ ):
+ if self.endpoint_url is not None:
+ body["endpointUrl"] = self.endpoint_url
+
+ if self.storage_type == StorageLocationType.EXTERNAL_SFTP:
+ if self.url is not None:
+ body["url"] = self.url
+ if self.supports_subfolders is not None:
+ body["supportsSubfolders"] = self.supports_subfolders
+
+ if self.storage_type == StorageLocationType.PROXY:
+ if self.proxy_url is not None:
+ body["proxyUrl"] = self.proxy_url
+ if self.secret_key is not None:
+ body["secretKey"] = self.secret_key
+ if self.benefactor_id is not None:
+ body["benefactorId"] = self.benefactor_id
+
+ return body
+
+ @otel_trace_method(
+ method_to_trace_name=lambda self, **kwargs: f"StorageLocation_Store: {self.storage_type}"
+ )
+ async def store_async(
+ self,
+ *,
+ synapse_client: Optional[Synapse] = None,
+ ) -> "StorageLocation":
+ """Create this storage location in Synapse. Storage locations are immutable;
+ this always creates a new one. If a storage location with identical properties
+ already exists for this user, the existing one is returned (idempotent).
+
+ Arguments:
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ The StorageLocation object with server-assigned fields populated.
+
+ Raises:
+ ValueError: If `storage_type` is not set.
+
+ Example: Using this function
+ Create an external S3 storage location:
+
+ import asyncio
+ from synapseclient import Synapse
+ from synapseclient.models import StorageLocation, StorageLocationType
+
+ syn = Synapse()
+ syn.login()
+
+ async def main():
+ storage = await StorageLocation(
+ storage_type=StorageLocationType.EXTERNAL_S3,
+ bucket="my-bucket",
+ base_key="my/prefix",
+ ).store_async()
+ print(f"Created storage location: {storage.storage_location_id}")
+
+ asyncio.run(main())
+ """
+ body = self._to_synapse_request()
+ response = await create_storage_location_setting(
+ body=body,
+ synapse_client=synapse_client,
+ )
+ self.fill_from_dict(response)
+ return self
+
+ @otel_trace_method(
+ method_to_trace_name=lambda self, **kwargs: f"StorageLocation_Get: {self.storage_location_id}"
+ )
+ async def get_async(
+ self,
+ *,
+ synapse_client: Optional[Synapse] = None,
+ ) -> "StorageLocation":
+ """Retrieve this storage location from Synapse by its ID. Only the creator of
+ a StorageLocationSetting can retrieve it by its id.
+
+ Arguments:
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ The StorageLocation object populated with data from Synapse.
+
+ Raises:
+ ValueError: If `storage_location_id` is not set.
+
+ Example: Using this function
+ Retrieve a storage location by ID:
+
+ import asyncio
+ from synapseclient import Synapse
+ from synapseclient.models import StorageLocation
+
+ syn = Synapse()
+ syn.login()
+
+ async def main():
+ storage = await StorageLocation(storage_location_id=12345).get_async()
+ print(f"Type: {storage.storage_type}, Bucket: {storage.bucket}")
+
+ asyncio.run(main())
+ """
+ if not self.storage_location_id:
+ raise ValueError(
+ "storage_location_id is required to retrieve a storage location"
+ )
+
+ response = await get_storage_location_setting(
+ storage_location_id=self.storage_location_id,
+ synapse_client=synapse_client,
+ )
+ self.fill_from_dict(response)
+ return self
+
+ @classmethod
+ async def setup_s3_async(
+ cls,
+ *,
+ parent: str,
+ folder_name: Optional[str] = None,
+ folder: Optional[Union["Folder", str]] = None,
+ bucket_name: Optional[str] = None,
+ base_key: Optional[str] = None,
+ sts_enabled: bool = False,
+ synapse_client: Optional[Synapse] = None,
+ ) -> Tuple["Folder", "StorageLocation"]:
+ """Convenience method to create a folder backed by S3 storage. This will:
+
+ 1. Create or retrieve the folder
+ 2. Create the storage location setting
+ 3. Apply the storage location to the folder via project settings
+
+ Arguments:
+ parent: The parent project or folder ID (e.g., "syn123").
+ folder_name: Name for a new folder. Either `folder_name` or `folder`
+ must be provided.
+ folder: An existing Folder object or Synapse ID. Either `folder_name`
+ or `folder` must be provided.
+ bucket_name: The S3 bucket name. If None, uses Synapse default storage.
+ base_key: The base key (prefix) within the bucket. Optional.
+ sts_enabled: Whether to enable STS credentials for this storage location.
+ Default: False.
+ synapse_client: If not passed in and caching was not disabled by
+ `Synapse.allow_client_caching(False)` this will use the last created
+ instance from the Synapse class constructor.
+
+ Returns:
+ A tuple of (Folder, StorageLocation).
+
+ Raises:
+ ValueError: If neither `folder_name` nor `folder` is provided, or if both
+ are provided.
+
+ Example: Using this function
+ Create an STS-enabled folder with external S3 storage:
+
+ import asyncio
+ from synapseclient import Synapse
+ from synapseclient.models import StorageLocation
+
+ syn = Synapse()
+ syn.login()
+
+ async def main():
+ folder, storage = await StorageLocation.setup_s3_async(
+ folder_name="my-sts-folder",
+ parent="syn123",
+ bucket_name="my-external-synapse-bucket",
+ base_key="path/within/bucket",
+ sts_enabled=True,
+ )
+ print(f"Folder: {folder.id}, Storage: {storage.storage_location_id}")
+
+ asyncio.run(main())
+
+ Example: Using existing folder
+ Apply S3 storage to an existing folder:
+
+ import asyncio
+ from synapseclient import Synapse
+ from synapseclient.models import StorageLocation
+
+ syn = Synapse()
+ syn.login()
+
+ async def main():
+ folder, storage = await StorageLocation.setup_s3_async(
+ folder="syn456",
+ bucket_name="my-bucket",
+ )
+
+ asyncio.run(main())
+ """
+ # Import here to avoid circular imports
+ from synapseclient.models import Folder as FolderModel
+
+ # Validate parameters
+ if folder_name and folder:
+ raise ValueError(
+ "folder and folder_name are mutually exclusive, only one should be passed"
+ )
+ if not folder_name and not folder:
+ raise ValueError("Either folder or folder_name is required")
+
+ # Create or get the folder
+ if folder_name:
+ target_folder = await FolderModel(
+ name=folder_name, parent_id=parent
+ ).store_async(synapse_client=synapse_client)
+ elif isinstance(folder, str):
+ target_folder = await FolderModel(id=folder).get_async(
+ synapse_client=synapse_client
+ )
+ else:
+ target_folder = folder
+
+ # Determine storage type
+ if bucket_name:
+ storage_type = StorageLocationType.EXTERNAL_S3
+ else:
+ storage_type = StorageLocationType.SYNAPSE_S3
+
+ # Create the storage location
+ storage_location = await cls(
+ storage_type=storage_type,
+ bucket=bucket_name,
+ base_key=base_key,
+ sts_enabled=sts_enabled,
+ ).store_async(synapse_client=synapse_client)
+
+ # Apply the storage location to the folder
+ await target_folder.set_storage_location_async(
+ storage_location_id=storage_location.storage_location_id,
+ synapse_client=synapse_client,
+ )
+
+ return target_folder, storage_location
diff --git a/tests/unit/synapseclient/api/unit_test_storage_location_services.py b/tests/unit/synapseclient/api/unit_test_storage_location_services.py
new file mode 100644
index 000000000..bebc80d50
--- /dev/null
+++ b/tests/unit/synapseclient/api/unit_test_storage_location_services.py
@@ -0,0 +1,215 @@
+"""Unit tests for storage_location_services utility functions."""
+
+from unittest.mock import AsyncMock, patch
+
+import pytest
+
+import synapseclient.api.storage_location_services as storage_location_services
+
+
+class TestCreateStorageLocationSetting:
+ """Tests for create_storage_location_setting function."""
+
+ @pytest.mark.asyncio
+ @patch("synapseclient.Synapse")
+ async def test_create_storage_location_setting(self, mock_synapse):
+ """Test create_storage_location_setting creates a storage location."""
+ # GIVEN a mock client that returns a storage location
+ mock_client = AsyncMock()
+ mock_synapse.get_client.return_value = mock_client
+ mock_client.rest_post_async.return_value = {
+ "storageLocationId": 12345,
+ "concreteType": "org.sagebionetworks.repo.model.project.ExternalS3StorageLocationSetting",
+ "uploadType": "S3",
+ "bucket": "my-bucket",
+ }
+
+ # WHEN I call create_storage_location_setting
+ body = {
+ "concreteType": "org.sagebionetworks.repo.model.project.ExternalS3StorageLocationSetting",
+ "uploadType": "S3",
+ "bucket": "my-bucket",
+ }
+ result = await storage_location_services.create_storage_location_setting(
+ body=body,
+ synapse_client=None,
+ )
+
+ # THEN I expect the storage location to be returned
+ assert result["storageLocationId"] == 12345
+ assert result["bucket"] == "my-bucket"
+ mock_client.rest_post_async.assert_awaited_once()
+
+
+class TestGetStorageLocationSetting:
+ """Tests for get_storage_location_setting function."""
+
+ @pytest.mark.asyncio
+ @patch("synapseclient.Synapse")
+ async def test_get_storage_location_setting(self, mock_synapse):
+ """Test get_storage_location_setting retrieves a storage location."""
+ # GIVEN a mock client that returns a storage location
+ mock_client = AsyncMock()
+ mock_synapse.get_client.return_value = mock_client
+ mock_client.rest_get_async.return_value = {
+ "storageLocationId": 12345,
+ "concreteType": "org.sagebionetworks.repo.model.project.ExternalS3StorageLocationSetting",
+ "uploadType": "S3",
+ "bucket": "my-bucket",
+ }
+
+ # WHEN I call get_storage_location_setting
+ result = await storage_location_services.get_storage_location_setting(
+ storage_location_id=12345,
+ synapse_client=None,
+ )
+
+ # THEN I expect the storage location to be returned
+ assert result["storageLocationId"] == 12345
+ assert result["bucket"] == "my-bucket"
+ mock_client.rest_get_async.assert_awaited_once_with(
+ uri="/storageLocation/12345",
+ )
+
+
+class TestGetProjectSetting:
+ """Tests for get_project_setting function."""
+
+ @pytest.mark.asyncio
+ @patch("synapseclient.Synapse")
+ async def test_get_project_setting_exists(self, mock_synapse):
+ """Test get_project_setting when setting exists."""
+ # GIVEN a mock client that returns a project setting
+ mock_client = AsyncMock()
+ mock_synapse.get_client.return_value = mock_client
+ mock_client.rest_get_async.return_value = {
+ "id": "setting123",
+ "projectId": "syn456",
+ "settingsType": "upload",
+ "locations": [12345],
+ }
+
+ # WHEN I call get_project_setting
+ result = await storage_location_services.get_project_setting(
+ project_id="syn456",
+ setting_type="upload",
+ synapse_client=None,
+ )
+
+ # THEN I expect the project setting to be returned
+ assert result["id"] == "setting123"
+ assert result["locations"] == [12345]
+ mock_client.rest_get_async.assert_awaited_once_with(
+ uri="/projectSettings/syn456/type/upload",
+ )
+
+ @pytest.mark.asyncio
+ @patch("synapseclient.Synapse")
+ async def test_get_project_setting_not_exists(self, mock_synapse):
+ """Test get_project_setting when setting does not exist."""
+ # GIVEN a mock client that returns empty response
+ mock_client = AsyncMock()
+ mock_synapse.get_client.return_value = mock_client
+ mock_client.rest_get_async.return_value = ""
+
+ # WHEN I call get_project_setting
+ result = await storage_location_services.get_project_setting(
+ project_id="syn456",
+ setting_type="upload",
+ synapse_client=None,
+ )
+
+ # THEN I expect None to be returned
+ assert result is None
+
+
+class TestCreateProjectSetting:
+ """Tests for create_project_setting function."""
+
+ @pytest.mark.asyncio
+ @patch("synapseclient.Synapse")
+ async def test_create_project_setting(self, mock_synapse):
+ """Test create_project_setting creates a project setting."""
+ # GIVEN a mock client that returns a project setting
+ mock_client = AsyncMock()
+ mock_synapse.get_client.return_value = mock_client
+ mock_client.rest_post_async.return_value = {
+ "id": "setting123",
+ "projectId": "syn456",
+ "settingsType": "upload",
+ "locations": [12345],
+ }
+
+ # WHEN I call create_project_setting
+ body = {
+ "concreteType": "org.sagebionetworks.repo.model.project.UploadDestinationListSetting",
+ "settingsType": "upload",
+ "locations": [12345],
+ "projectId": "syn456",
+ }
+ result = await storage_location_services.create_project_setting(
+ body=body,
+ synapse_client=None,
+ )
+
+ # THEN I expect the project setting to be returned
+ assert result["id"] == "setting123"
+ mock_client.rest_post_async.assert_awaited_once()
+
+
+class TestUpdateProjectSetting:
+ """Tests for update_project_setting function."""
+
+ @pytest.mark.asyncio
+ @patch("synapseclient.Synapse")
+ async def test_update_project_setting(self, mock_synapse):
+ """Test update_project_setting updates a project setting."""
+ # GIVEN a mock client that returns an updated project setting
+ mock_client = AsyncMock()
+ mock_synapse.get_client.return_value = mock_client
+ mock_client.rest_put_async.return_value = {
+ "id": "setting123",
+ "projectId": "syn456",
+ "settingsType": "upload",
+ "locations": [12345, 67890],
+ }
+
+ # WHEN I call update_project_setting
+ body = {
+ "id": "setting123",
+ "projectId": "syn456",
+ "settingsType": "upload",
+ "locations": [12345, 67890],
+ }
+ result = await storage_location_services.update_project_setting(
+ body=body,
+ synapse_client=None,
+ )
+
+ # THEN I expect the updated project setting to be returned
+ assert result["locations"] == [12345, 67890]
+ mock_client.rest_put_async.assert_awaited_once()
+
+
+class TestDeleteProjectSetting:
+ """Tests for delete_project_setting function."""
+
+ @pytest.mark.asyncio
+ @patch("synapseclient.Synapse")
+ async def test_delete_project_setting(self, mock_synapse):
+ """Test delete_project_setting deletes a project setting."""
+ # GIVEN a mock client
+ mock_client = AsyncMock()
+ mock_synapse.get_client.return_value = mock_client
+ mock_client.rest_delete_async.return_value = None
+
+ # WHEN I call delete_project_setting
+ await storage_location_services.delete_project_setting(
+ setting_id="setting123",
+ synapse_client=None,
+ )
+
+ # THEN I expect the delete to be called
+ mock_client.rest_delete_async.assert_awaited_once_with(
+ uri="/projectSettings/setting123",
+ )
diff --git a/tests/unit/synapseclient/models/unit_test_manifest.py b/tests/unit/synapseclient/models/unit_test_manifest.py
new file mode 100644
index 000000000..4c65ac7c3
--- /dev/null
+++ b/tests/unit/synapseclient/models/unit_test_manifest.py
@@ -0,0 +1,499 @@
+"""Unit tests for the synapseclient.models.mixins.manifest module."""
+
+import datetime
+import os
+import tempfile
+
+import pytest
+
+from synapseclient.models.mixins.manifest import (
+ DEFAULT_GENERATED_MANIFEST_KEYS,
+ MANIFEST_FILENAME,
+ _convert_manifest_data_items_to_string_list,
+ _convert_manifest_data_row_to_dict,
+ _extract_entity_metadata_for_file,
+ _get_entity_provenance_dict_for_file,
+ _manifest_filename,
+ _parse_manifest_value,
+ _validate_manifest_required_fields,
+ _write_manifest_data,
+)
+
+
+class TestManifestConstants:
+ """Tests for manifest constants."""
+
+ def test_manifest_filename_constant(self):
+ """Test the MANIFEST_FILENAME constant."""
+ assert MANIFEST_FILENAME == "SYNAPSE_METADATA_MANIFEST.tsv"
+
+ def test_default_manifest_keys(self):
+ """Test the DEFAULT_GENERATED_MANIFEST_KEYS constant."""
+ expected_keys = [
+ "path",
+ "parent",
+ "name",
+ "id",
+ "synapseStore",
+ "contentType",
+ "used",
+ "executed",
+ "activityName",
+ "activityDescription",
+ ]
+ assert DEFAULT_GENERATED_MANIFEST_KEYS == expected_keys
+
+
+class TestManifestFilename:
+ """Tests for _manifest_filename function."""
+
+ def test_manifest_filename(self):
+ """Test generating manifest filename."""
+ # GIVEN a path
+ path = "/path/to/directory"
+
+ # WHEN we generate the manifest filename
+ result = _manifest_filename(path)
+
+ # THEN it should be the path joined with MANIFEST_FILENAME
+ assert result == os.path.join(path, MANIFEST_FILENAME)
+
+
+class TestConvertManifestDataItemsToStringList:
+ """Tests for _convert_manifest_data_items_to_string_list function."""
+
+ def test_single_string(self):
+ """Test converting a single string."""
+ # GIVEN a list with a single string
+ items = ["hello"]
+
+ # WHEN we convert to string
+ result = _convert_manifest_data_items_to_string_list(items)
+
+ # THEN it should return the string directly
+ assert result == "hello"
+
+ def test_multiple_strings(self):
+ """Test converting multiple strings."""
+ # GIVEN a list with multiple strings
+ items = ["a", "b", "c"]
+
+ # WHEN we convert to string
+ result = _convert_manifest_data_items_to_string_list(items)
+
+ # THEN it should return a bracketed list
+ assert result == "[a,b,c]"
+
+ def test_string_with_comma(self):
+ """Test converting a string with comma."""
+ # GIVEN a single item with comma (no quotes needed for single item)
+ items = ["hello,world"]
+
+ # WHEN we convert to string
+ result = _convert_manifest_data_items_to_string_list(items)
+
+ # THEN it should return the string directly
+ assert result == "hello,world"
+
+ def test_multiple_strings_with_comma(self):
+ """Test converting multiple strings where one has a comma."""
+ # GIVEN multiple strings where one contains commas
+ items = ["string,with,commas", "string without commas"]
+
+ # WHEN we convert to string
+ result = _convert_manifest_data_items_to_string_list(items)
+
+ # THEN the comma-containing string should be quoted
+ assert result == '["string,with,commas",string without commas]'
+
+ def test_datetime(self):
+ """Test converting a datetime."""
+ # GIVEN a datetime value
+ dt = datetime.datetime(2020, 1, 1, 0, 0, 0, 0, tzinfo=datetime.timezone.utc)
+
+ # WHEN we convert to string
+ result = _convert_manifest_data_items_to_string_list([dt])
+
+ # THEN it should return ISO format
+ assert result == "2020-01-01T00:00:00Z"
+
+ def test_multiple_datetimes(self):
+ """Test converting multiple datetimes."""
+ # GIVEN multiple datetime values
+ dt1 = datetime.datetime(2020, 1, 1, 0, 0, 0, 0, tzinfo=datetime.timezone.utc)
+ dt2 = datetime.datetime(2021, 1, 1, 0, 0, 0, 0, tzinfo=datetime.timezone.utc)
+
+ # WHEN we convert to string
+ result = _convert_manifest_data_items_to_string_list([dt1, dt2])
+
+ # THEN it should return a bracketed list of ISO dates
+ assert result == "[2020-01-01T00:00:00Z,2021-01-01T00:00:00Z]"
+
+ def test_boolean_true(self):
+ """Test converting True."""
+ # GIVEN a True value
+ items = [True]
+
+ # WHEN we convert to string
+ result = _convert_manifest_data_items_to_string_list(items)
+
+ # THEN it should return "True"
+ assert result == "True"
+
+ def test_boolean_false(self):
+ """Test converting False."""
+ # GIVEN a False value
+ items = [False]
+
+ # WHEN we convert to string
+ result = _convert_manifest_data_items_to_string_list(items)
+
+ # THEN it should return "False"
+ assert result == "False"
+
+ def test_integer(self):
+ """Test converting an integer."""
+ # GIVEN an integer value
+ items = [1]
+
+ # WHEN we convert to string
+ result = _convert_manifest_data_items_to_string_list(items)
+
+ # THEN it should return the string representation
+ assert result == "1"
+
+ def test_float(self):
+ """Test converting a float."""
+ # GIVEN a float value
+ items = [1.5]
+
+ # WHEN we convert to string
+ result = _convert_manifest_data_items_to_string_list(items)
+
+ # THEN it should return the string representation
+ assert result == "1.5"
+
+ def test_empty_list(self):
+ """Test converting an empty list."""
+ # GIVEN an empty list
+ items = []
+
+ # WHEN we convert to string
+ result = _convert_manifest_data_items_to_string_list(items)
+
+ # THEN it should return an empty string
+ assert result == ""
+
+
+class TestConvertManifestDataRowToDict:
+ """Tests for _convert_manifest_data_row_to_dict function."""
+
+ def test_simple_row(self):
+ """Test converting a simple row."""
+ # GIVEN a row with simple values
+ row = {"path": "/path/to/file", "name": "file.txt"}
+ keys = ["path", "name"]
+
+ # WHEN we convert it
+ result = _convert_manifest_data_row_to_dict(row, keys)
+
+ # THEN it should return the same values
+ assert result == {"path": "/path/to/file", "name": "file.txt"}
+
+ def test_row_with_list(self):
+ """Test converting a row with a list value."""
+ # GIVEN a row with a list value
+ row = {"annotations": ["a", "b", "c"]}
+ keys = ["annotations"]
+
+ # WHEN we convert it
+ result = _convert_manifest_data_row_to_dict(row, keys)
+
+ # THEN the list should be converted to a string
+ assert result == {"annotations": "[a,b,c]"}
+
+ def test_missing_key(self):
+ """Test converting a row with a missing key."""
+ # GIVEN a row missing a key
+ row = {"path": "/path/to/file"}
+ keys = ["path", "name"]
+
+ # WHEN we convert it
+ result = _convert_manifest_data_row_to_dict(row, keys)
+
+ # THEN the missing key should be empty string
+ assert result == {"path": "/path/to/file", "name": ""}
+
+
+class TestParseManifestValue:
+ """Tests for _parse_manifest_value function."""
+
+ def test_simple_string(self):
+ """Test parsing a simple string."""
+ assert _parse_manifest_value("hello") == "hello"
+
+ def test_list_syntax(self):
+ """Test parsing list syntax."""
+ assert _parse_manifest_value("[a,b,c]") == ["a", "b", "c"]
+
+ def test_list_with_quoted_string(self):
+ """Test parsing list with quoted string containing comma."""
+ result = _parse_manifest_value('["hello,world",other]')
+ assert result == ["hello,world", "other"]
+
+ def test_boolean_true(self):
+ """Test parsing 'true' string."""
+ assert _parse_manifest_value("true") is True
+ assert _parse_manifest_value("True") is True
+ assert _parse_manifest_value("TRUE") is True
+
+ def test_boolean_false(self):
+ """Test parsing 'false' string."""
+ assert _parse_manifest_value("false") is False
+ assert _parse_manifest_value("False") is False
+ assert _parse_manifest_value("FALSE") is False
+
+ def test_integer(self):
+ """Test parsing an integer string."""
+ assert _parse_manifest_value("123") == 123
+
+ def test_float(self):
+ """Test parsing a float string."""
+ assert _parse_manifest_value("1.5") == 1.5
+
+ def test_non_numeric_string(self):
+ """Test that non-numeric strings stay as strings."""
+ assert _parse_manifest_value("hello123") == "hello123"
+
+
+class TestWriteManifestData:
+ """Tests for _write_manifest_data function."""
+
+ def test_write_simple_manifest(self):
+ """Test writing a simple manifest file."""
+ # GIVEN simple data
+ keys = ["path", "name", "id"]
+ data = [
+ {"path": "/path/to/file1.txt", "name": "file1.txt", "id": "syn123"},
+ {"path": "/path/to/file2.txt", "name": "file2.txt", "id": "syn456"},
+ ]
+
+ # WHEN we write it to a temp file
+ with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".tsv") as f:
+ filename = f.name
+
+ try:
+ _write_manifest_data(filename, keys, data)
+
+ # THEN the file should contain the expected content
+ with open(filename, "r") as f:
+ content = f.read()
+
+ lines = content.strip().split("\n")
+ assert len(lines) == 3 # header + 2 data rows
+ assert lines[0] == "path\tname\tid"
+ assert lines[1] == "/path/to/file1.txt\tfile1.txt\tsyn123"
+ assert lines[2] == "/path/to/file2.txt\tfile2.txt\tsyn456"
+ finally:
+ os.unlink(filename)
+
+
+class TestValidateManifestRequiredFields:
+ """Tests for _validate_manifest_required_fields function."""
+
+ def test_valid_manifest(self):
+ """Test validating a valid manifest file."""
+ # GIVEN a valid manifest file
+ with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".tsv") as f:
+ f.write("path\tparent\n")
+ f.write(f"{f.name}\tsyn123\n")
+ filename = f.name
+
+ try:
+ # Create the file referenced in path column
+ with open(filename, "a") as f:
+ pass # File already exists
+
+ # WHEN we validate it
+ is_valid, errors = _validate_manifest_required_fields(filename)
+
+ # THEN it should be valid
+ assert is_valid is True
+ assert errors == []
+ finally:
+ os.unlink(filename)
+
+ def test_missing_file(self):
+ """Test validating a non-existent manifest file."""
+ # WHEN we validate a non-existent file
+ is_valid, errors = _validate_manifest_required_fields("/nonexistent/file.tsv")
+
+ # THEN it should be invalid
+ assert is_valid is False
+ assert len(errors) == 1
+ assert "not found" in errors[0]
+
+ def test_missing_required_field(self):
+ """Test validating a manifest missing a required field."""
+ # GIVEN a manifest missing the 'parent' field
+ with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".tsv") as f:
+ f.write("path\tname\n")
+ f.write("/path/to/file.txt\tfile.txt\n")
+ filename = f.name
+
+ try:
+ # WHEN we validate it
+ is_valid, errors = _validate_manifest_required_fields(filename)
+
+ # THEN it should be invalid
+ assert is_valid is False
+ assert any("parent" in e for e in errors)
+ finally:
+ os.unlink(filename)
+
+ def test_empty_path(self):
+ """Test validating a manifest with empty path."""
+ # GIVEN a manifest with empty path
+ with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".tsv") as f:
+ f.write("path\tparent\n")
+ f.write("\tsyn123\n")
+ filename = f.name
+
+ try:
+ # WHEN we validate it
+ is_valid, errors = _validate_manifest_required_fields(filename)
+
+ # THEN it should be invalid
+ assert is_valid is False
+ assert any("'path' is empty" in e for e in errors)
+ finally:
+ os.unlink(filename)
+
+ def test_invalid_parent_id(self):
+ """Test validating a manifest with invalid parent ID."""
+ # GIVEN a manifest with invalid parent ID
+ with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".tsv") as f:
+ f.write("path\tparent\n")
+ f.write(f"{f.name}\tinvalid_parent\n")
+ filename = f.name
+
+ try:
+ # WHEN we validate it
+ is_valid, errors = _validate_manifest_required_fields(filename)
+
+ # THEN it should be invalid
+ assert is_valid is False
+ assert any("not a valid Synapse ID" in e for e in errors)
+ finally:
+ os.unlink(filename)
+
+
+class TestExtractEntityMetadataForFile:
+ """Tests for _extract_entity_metadata_for_file function."""
+
+ def test_extract_basic_metadata(self):
+ """Test extracting basic file metadata."""
+
+ # GIVEN a mock File object
+ class MockFile:
+ def __init__(self):
+ self.parent_id = "syn123"
+ self.path = "/path/to/file.txt"
+ self.name = "file.txt"
+ self.id = "syn456"
+ self.synapse_store = True
+ self.content_type = "text/plain"
+ self.annotations = None
+ self.activity = None
+
+ file = MockFile()
+
+ # WHEN we extract metadata
+ keys, data = _extract_entity_metadata_for_file([file])
+
+ # THEN we should get the expected data
+ assert "path" in keys
+ assert "parent" in keys
+ assert "name" in keys
+ assert "id" in keys
+ assert len(data) == 1
+ assert data[0]["path"] == "/path/to/file.txt"
+ assert data[0]["parent"] == "syn123"
+ assert data[0]["name"] == "file.txt"
+ assert data[0]["id"] == "syn456"
+
+ def test_extract_with_annotations(self):
+ """Test extracting metadata with annotations."""
+
+ # GIVEN a mock File object with annotations
+ class MockFile:
+ def __init__(self):
+ self.parent_id = "syn123"
+ self.path = "/path/to/file.txt"
+ self.name = "file.txt"
+ self.id = "syn456"
+ self.synapse_store = True
+ self.content_type = "text/plain"
+ self.annotations = {"study": ["Study1"], "dataType": ["RNA-seq"]}
+ self.activity = None
+
+ file = MockFile()
+
+ # WHEN we extract metadata
+ keys, data = _extract_entity_metadata_for_file([file])
+
+ # THEN annotation keys should be included
+ assert "study" in keys
+ assert "dataType" in keys
+ assert data[0]["study"] == ["Study1"]
+ assert data[0]["dataType"] == ["RNA-seq"]
+
+
+class TestGetEntityProvenanceDictForFile:
+ """Tests for _get_entity_provenance_dict_for_file function."""
+
+ def test_no_activity(self):
+ """Test extracting provenance when there is no activity."""
+
+ # GIVEN a mock File object with no activity
+ class MockFile:
+ def __init__(self):
+ self.activity = None
+
+ file = MockFile()
+
+ # WHEN we extract provenance
+ result = _get_entity_provenance_dict_for_file(file)
+
+ # THEN we should get an empty dict
+ assert result == {}
+
+ def test_with_activity(self):
+ """Test extracting provenance when there is an activity."""
+
+ # GIVEN mock objects
+ class MockUsedEntity:
+ def format_for_manifest(self):
+ return "syn789"
+
+ class MockActivity:
+ def __init__(self):
+ self.name = "Analysis"
+ self.description = "Processing data"
+ self.used = [MockUsedEntity()]
+ self.executed = []
+
+ class MockFile:
+ def __init__(self):
+ self.activity = MockActivity()
+
+ file = MockFile()
+
+ # WHEN we extract provenance
+ result = _get_entity_provenance_dict_for_file(file)
+
+ # THEN we should get the expected dict
+ assert result["activityName"] == "Analysis"
+ assert result["activityDescription"] == "Processing data"
+ assert result["used"] == "syn789"
+ assert result["executed"] == ""
diff --git a/tests/unit/synapseclient/models/unit_test_storage_location.py b/tests/unit/synapseclient/models/unit_test_storage_location.py
new file mode 100644
index 000000000..400e28566
--- /dev/null
+++ b/tests/unit/synapseclient/models/unit_test_storage_location.py
@@ -0,0 +1,355 @@
+"""Unit tests for the synapseclient.models.StorageLocation class."""
+
+import pytest
+
+from synapseclient.models import StorageLocation, StorageLocationType, UploadType
+
+
+class TestStorageLocation:
+ """Unit tests for basic StorageLocation model functionality."""
+
+ def test_storage_location_type_enum_values(self):
+ """Test that StorageLocationType enum has correct values."""
+ assert StorageLocationType.SYNAPSE_S3.value == "S3StorageLocationSetting"
+ assert (
+ StorageLocationType.EXTERNAL_S3.value == "ExternalS3StorageLocationSetting"
+ )
+ assert (
+ StorageLocationType.EXTERNAL_GOOGLE_CLOUD.value
+ == "ExternalGoogleCloudStorageLocationSetting"
+ )
+ assert (
+ StorageLocationType.EXTERNAL_SFTP.value == "ExternalStorageLocationSetting"
+ )
+ assert (
+ StorageLocationType.EXTERNAL_OBJECT_STORE.value
+ == "ExternalObjectStorageLocationSetting"
+ )
+ assert StorageLocationType.PROXY.value == "ProxyStorageLocationSettings"
+
+ def test_upload_type_enum_values(self):
+ """Test that UploadType enum has correct values."""
+ assert UploadType.S3.value == "S3"
+ assert UploadType.GOOGLE_CLOUD_STORAGE.value == "GOOGLECLOUDSTORAGE"
+ assert UploadType.SFTP.value == "SFTP"
+ assert UploadType.HTTPS.value == "HTTPS"
+ assert UploadType.NONE.value == "NONE"
+
+ def test_to_synapse_request_external_s3(self):
+ """Test generating a request body for EXTERNAL_S3 storage location."""
+ # GIVEN an EXTERNAL_S3 storage location
+ storage = StorageLocation(
+ storage_type=StorageLocationType.EXTERNAL_S3,
+ bucket="my-bucket",
+ base_key="my/prefix",
+ sts_enabled=True,
+ banner="Upload banner",
+ description="Test storage location",
+ )
+
+ # WHEN we generate a request body
+ request_body = storage._to_synapse_request()
+
+ # THEN it should have the correct structure
+ assert request_body == {
+ "concreteType": "org.sagebionetworks.repo.model.project.ExternalS3StorageLocationSetting",
+ "uploadType": "S3",
+ "bucket": "my-bucket",
+ "baseKey": "my/prefix",
+ "stsEnabled": True,
+ "banner": "Upload banner",
+ "description": "Test storage location",
+ }
+
+ def test_to_synapse_request_synapse_s3(self):
+ """Test generating a request body for SYNAPSE_S3 storage location."""
+ # GIVEN a SYNAPSE_S3 storage location
+ storage = StorageLocation(
+ storage_type=StorageLocationType.SYNAPSE_S3,
+ sts_enabled=False,
+ )
+
+ # WHEN we generate a request body
+ request_body = storage._to_synapse_request()
+
+ # THEN it should have the correct structure
+ assert request_body == {
+ "concreteType": "org.sagebionetworks.repo.model.project.S3StorageLocationSetting",
+ "uploadType": "S3",
+ "stsEnabled": False,
+ }
+
+ def test_to_synapse_request_google_cloud(self):
+ """Test generating a request body for EXTERNAL_GOOGLE_CLOUD storage location."""
+ # GIVEN a EXTERNAL_GOOGLE_CLOUD storage location
+ storage = StorageLocation(
+ storage_type=StorageLocationType.EXTERNAL_GOOGLE_CLOUD,
+ bucket="my-gcs-bucket",
+ base_key="gcs/prefix",
+ )
+
+ # WHEN we generate a request body
+ request_body = storage._to_synapse_request()
+
+ # THEN it should have the correct structure
+ assert request_body == {
+ "concreteType": "org.sagebionetworks.repo.model.project.ExternalGoogleCloudStorageLocationSetting",
+ "uploadType": "GOOGLECLOUDSTORAGE",
+ "bucket": "my-gcs-bucket",
+ "baseKey": "gcs/prefix",
+ }
+
+ def test_to_synapse_request_sftp(self):
+ """Test generating a request body for EXTERNAL_SFTP storage location."""
+ # GIVEN an EXTERNAL_SFTP storage location
+ storage = StorageLocation(
+ storage_type=StorageLocationType.EXTERNAL_SFTP,
+ url="sftp://example.com/path",
+ supports_subfolders=True,
+ )
+
+ # WHEN we generate a request body
+ request_body = storage._to_synapse_request()
+
+ # THEN it should have the correct structure
+ assert request_body == {
+ "concreteType": "org.sagebionetworks.repo.model.project.ExternalStorageLocationSetting",
+ "uploadType": "SFTP",
+ "url": "sftp://example.com/path",
+ "supportsSubfolders": True,
+ }
+
+ def test_to_synapse_request_proxy(self):
+ """Test generating a request body for PROXY storage location."""
+ # GIVEN a PROXY storage location
+ storage = StorageLocation(
+ storage_type=StorageLocationType.PROXY,
+ proxy_url="https://proxy.example.com",
+ secret_key="my-secret-key",
+ benefactor_id="syn123",
+ )
+
+ # WHEN we generate a request body
+ request_body = storage._to_synapse_request()
+
+ # THEN it should have the correct structure
+ assert request_body == {
+ "concreteType": "org.sagebionetworks.repo.model.project.ProxyStorageLocationSettings",
+ "uploadType": "HTTPS",
+ "proxyUrl": "https://proxy.example.com",
+ "secretKey": "my-secret-key",
+ "benefactorId": "syn123",
+ }
+
+ def test_to_synapse_request_external_object_store(self):
+ """Test generating a request body for EXTERNAL_OBJECT_STORE storage location."""
+ # GIVEN an EXTERNAL_OBJECT_STORE storage location
+ storage = StorageLocation(
+ storage_type=StorageLocationType.EXTERNAL_OBJECT_STORE,
+ bucket="my-s3-like-bucket",
+ endpoint_url="https://s3.custom.com",
+ )
+
+ # WHEN we generate a request body
+ request_body = storage._to_synapse_request()
+
+ # THEN it should have the correct structure
+ assert request_body == {
+ "concreteType": "org.sagebionetworks.repo.model.project.ExternalObjectStorageLocationSetting",
+ "uploadType": "S3",
+ "bucket": "my-s3-like-bucket",
+ "endpointUrl": "https://s3.custom.com",
+ }
+
+ def test_to_synapse_request_missing_storage_type(self):
+ """Test that _to_synapse_request raises ValueError when storage_type is missing."""
+ # GIVEN a storage location without a storage_type
+ storage = StorageLocation(
+ bucket="my-bucket",
+ )
+
+ # THEN it should raise ValueError
+ with pytest.raises(ValueError, match="storage_type is required"):
+ storage._to_synapse_request()
+
+ def test_fill_from_dict_external_s3(self):
+ """Test filling from a REST API response for EXTERNAL_S3."""
+ # GIVEN a storage location
+ storage = StorageLocation()
+
+ # AND a response from the REST API
+ response = {
+ "storageLocationId": 12345,
+ "concreteType": "org.sagebionetworks.repo.model.project.ExternalS3StorageLocationSetting",
+ "uploadType": "S3",
+ "bucket": "my-bucket",
+ "baseKey": "my/prefix",
+ "stsEnabled": True,
+ "banner": "Upload banner",
+ "description": "Test storage location",
+ "etag": "abc123",
+ "createdOn": "2024-01-01T00:00:00.000Z",
+ "createdBy": 123456,
+ }
+
+ # WHEN we fill from the response
+ storage.fill_from_dict(response)
+
+ # THEN the storage location should be populated correctly
+ assert storage.storage_location_id == 12345
+ assert storage.storage_type == StorageLocationType.EXTERNAL_S3
+ assert storage.upload_type == UploadType.S3
+ assert storage.bucket == "my-bucket"
+ assert storage.base_key == "my/prefix"
+ assert storage.sts_enabled is True
+ assert storage.banner == "Upload banner"
+ assert storage.description == "Test storage location"
+ assert storage.etag == "abc123"
+ assert storage.created_on == "2024-01-01T00:00:00.000Z"
+ assert storage.created_by == 123456
+
+ def test_fill_from_dict_synapse_s3(self):
+ """Test filling from a REST API response for SYNAPSE_S3."""
+ # GIVEN a storage location
+ storage = StorageLocation()
+
+ # AND a response from the REST API
+ response = {
+ "storageLocationId": 1,
+ "concreteType": "org.sagebionetworks.repo.model.project.S3StorageLocationSetting",
+ "uploadType": "S3",
+ }
+
+ # WHEN we fill from the response
+ storage.fill_from_dict(response)
+
+ # THEN the storage location should be populated correctly
+ assert storage.storage_location_id == 1
+ assert storage.storage_type == StorageLocationType.SYNAPSE_S3
+
+ def test_fill_from_dict_google_cloud(self):
+ """Test filling from a REST API response for EXTERNAL_GOOGLE_CLOUD."""
+ # GIVEN a storage location
+ storage = StorageLocation()
+
+ # AND a response from the REST API
+ response = {
+ "storageLocationId": 67890,
+ "concreteType": "org.sagebionetworks.repo.model.project.ExternalGoogleCloudStorageLocationSetting",
+ "uploadType": "GOOGLECLOUDSTORAGE",
+ "bucket": "my-gcs-bucket",
+ }
+
+ # WHEN we fill from the response
+ storage.fill_from_dict(response)
+
+ # THEN the storage location should be populated correctly
+ assert storage.storage_location_id == 67890
+ assert storage.storage_type == StorageLocationType.EXTERNAL_GOOGLE_CLOUD
+ assert storage.upload_type == UploadType.GOOGLE_CLOUD_STORAGE
+ assert storage.bucket == "my-gcs-bucket"
+
+ def test_fill_from_dict_sftp(self):
+ """Test filling from a REST API response for EXTERNAL_SFTP."""
+ # GIVEN a storage location
+ storage = StorageLocation()
+
+ # AND a response from the REST API
+ response = {
+ "storageLocationId": 11111,
+ "concreteType": "org.sagebionetworks.repo.model.project.ExternalStorageLocationSetting",
+ "uploadType": "SFTP",
+ "url": "sftp://example.com/path",
+ "supportsSubfolders": True,
+ }
+
+ # WHEN we fill from the response
+ storage.fill_from_dict(response)
+
+ # THEN the storage location should be populated correctly
+ assert storage.storage_location_id == 11111
+ assert storage.storage_type == StorageLocationType.EXTERNAL_SFTP
+ assert storage.upload_type == UploadType.SFTP
+ assert storage.url == "sftp://example.com/path"
+ assert storage.supports_subfolders is True
+
+ def test_fill_from_dict_proxy(self):
+ """Test filling from a REST API response for PROXY."""
+ # GIVEN a storage location
+ storage = StorageLocation()
+
+ # AND a response from the REST API
+ response = {
+ "storageLocationId": 22222,
+ "concreteType": "org.sagebionetworks.repo.model.project.ProxyStorageLocationSettings",
+ "uploadType": "HTTPS",
+ "proxyUrl": "https://proxy.example.com",
+ "secretKey": "my-secret-key",
+ "benefactorId": "syn123",
+ }
+
+ # WHEN we fill from the response
+ storage.fill_from_dict(response)
+
+ # THEN the storage location should be populated correctly
+ assert storage.storage_location_id == 22222
+ assert storage.storage_type == StorageLocationType.PROXY
+ assert storage.upload_type == UploadType.HTTPS
+ assert storage.proxy_url == "https://proxy.example.com"
+ assert storage.secret_key == "my-secret-key"
+ assert storage.benefactor_id == "syn123"
+
+
+class TestStorageLocationAsync:
+ """Async unit tests for StorageLocation model."""
+
+ @pytest.mark.asyncio
+ async def test_get_async_missing_id(self):
+ """Test that get_async raises ValueError when storage_location_id is missing."""
+ # GIVEN a storage location without an ID
+ storage = StorageLocation()
+
+ # THEN it should raise ValueError
+ with pytest.raises(ValueError, match="storage_location_id is required"):
+ await storage.get_async()
+
+ @pytest.mark.asyncio
+ async def test_store_async_missing_storage_type(self):
+ """Test that store_async raises ValueError when storage_type is missing."""
+ # GIVEN a storage location without a storage_type
+ storage = StorageLocation(bucket="my-bucket")
+
+ # THEN it should raise ValueError
+ with pytest.raises(ValueError, match="storage_type is required"):
+ await storage.store_async()
+
+
+class TestSetupS3:
+ """Tests for the setup_s3 convenience method."""
+
+ @pytest.mark.asyncio
+ async def test_setup_s3_async_requires_folder_or_folder_name(self):
+ """Test that setup_s3_async raises ValueError when neither folder nor folder_name is provided."""
+ # WHEN I call setup_s3_async without folder or folder_name
+ # THEN it should raise ValueError
+ with pytest.raises(
+ ValueError, match="Either folder or folder_name is required"
+ ):
+ await StorageLocation.setup_s3_async(parent="syn123")
+
+ @pytest.mark.asyncio
+ async def test_setup_s3_async_folder_and_folder_name_mutually_exclusive(self):
+ """Test that setup_s3_async raises ValueError when both folder and folder_name are provided."""
+ from synapseclient.models import Folder
+
+ # GIVEN both folder and folder_name
+ folder = Folder(id="syn456")
+
+ # WHEN I call setup_s3_async with both
+ # THEN it should raise ValueError
+ with pytest.raises(
+ ValueError, match="folder and folder_name are mutually exclusive"
+ ):
+ await StorageLocation.setup_s3_async(
+ parent="syn123", folder_name="test", folder=folder
+ )