From c9bdf8a83dd515bca7b30258002a3d1b73de49b8 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Tue, 10 Feb 2026 10:34:07 -0500 Subject: [PATCH 01/10] update create_record_based_metadata_task function --- docs/guides/extensions/curator/metadata_curation.md | 6 ++++-- .../extensions/curator/record_based_metadata_task.py | 10 +++++++++- synapseclient/models/curation.py | 7 +++++++ 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/docs/guides/extensions/curator/metadata_curation.md b/docs/guides/extensions/curator/metadata_curation.md index 9e8d64d39..0b969cc5b 100644 --- a/docs/guides/extensions/curator/metadata_curation.md +++ b/docs/guides/extensions/curator/metadata_curation.md @@ -80,7 +80,8 @@ record_set, curation_task, data_grid = create_record_based_metadata_task( upsert_keys=["StudyKey"], # Fields that uniquely identify records instructions="Complete all required fields according to the schema. Use StudyKey to link records to your data files.", schema_uri=schema_uri, # Schema found in Step 2 - bind_schema_to_record_set=True + bind_schema_to_record_set=True, + assignee_principal_id="123456" # Optional: Assign to a user or team ) print(f"Created RecordSet: {record_set.id}") @@ -156,7 +157,8 @@ record_set, curation_task, data_grid = create_record_based_metadata_task( upsert_keys=["StudyKey"], instructions="Complete metadata for all study animals using StudyKey to link records to data files.", schema_uri=schema_uri, - bind_schema_to_record_set=True + bind_schema_to_record_set=True, + assignee_principal_id="123456" # Optional: Assign to a user or team ) print(f"Record-based workflow created:") diff --git a/synapseclient/extensions/curator/record_based_metadata_task.py b/synapseclient/extensions/curator/record_based_metadata_task.py index 8dd984938..985a410f3 100644 --- a/synapseclient/extensions/curator/record_based_metadata_task.py +++ b/synapseclient/extensions/curator/record_based_metadata_task.py @@ -109,6 +109,7 @@ def create_record_based_metadata_task( schema_uri: str, bind_schema_to_record_set: bool = True, enable_derived_annotations: bool = False, + assignee_principal_id: Optional[str] = None, *, synapse_client: Optional[Synapse] = None, ) -> Tuple[RecordSet, CurationTask, Grid]: @@ -148,7 +149,8 @@ def create_record_based_metadata_task( curation_task_name="BiospecimenMetadataTemplate", upsert_keys=["specimenID"], instructions="Please curate this metadata according to the schema requirements", - schema_uri="schema-org-schema.name.schema-v1.0.0" + schema_uri="schema-org-schema.name.schema-v1.0.0", + assignee_principal_id="123456" # Optional: Assign to a user or team ) ``` @@ -167,6 +169,11 @@ def create_record_based_metadata_task( bind_schema_to_record_set: Whether to bind the given schema to the RecordSet (default: True). enable_derived_annotations: If true, enable derived annotations. Defaults to False. + assignee_principal_id: The principal ID of the user or team to assign to this + curation task. If None (default), the task will be unassigned. For metadata + tasks, this determines the owner of the grid session. Team members can all + join grid sessions owned by their team, while user-owned grid sessions are + restricted to that user only. synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. @@ -244,6 +251,7 @@ def create_record_based_metadata_task( data_type=curation_task_name, project_id=project_id, instructions=instructions, + assignee_principal_id=assignee_principal_id, task_properties=RecordBasedMetadataTaskProperties( record_set_id=record_set_id, ), diff --git a/synapseclient/models/curation.py b/synapseclient/models/curation.py index b1ed54b0b..6b3eb5843 100644 --- a/synapseclient/models/curation.py +++ b/synapseclient/models/curation.py @@ -466,6 +466,11 @@ class CurationTask(CurationTaskSynchronousProtocol): modified_by: Optional[str] = None """(Read Only) The ID of the user that last modified this task""" + assignee_principal_id: Optional[str] = None + """The principal ID of the user or team assigned to this task. Null if unassigned. For metadata + tasks, determines the owner of the grid session. Team members can all join grid sessions + owned by their team, while user-owned grid sessions are restricted to that user only.""" + _last_persistent_instance: Optional["CurationTask"] = field( default=None, repr=False, compare=False ) @@ -510,6 +515,7 @@ def fill_from_dict( self.modified_on = synapse_response.get("modifiedOn", None) self.created_by = synapse_response.get("createdBy", None) self.modified_by = synapse_response.get("modifiedBy", None) + self.assignee_principal_id = synapse_response.get("assigneePrincipalId", None) task_properties_dict = synapse_response.get("taskProperties", None) if task_properties_dict: @@ -536,6 +542,7 @@ def to_synapse_request(self) -> Dict[str, Any]: request_dict["modifiedOn"] = self.modified_on request_dict["createdBy"] = self.created_by request_dict["modifiedBy"] = self.modified_by + request_dict["assigneePrincipalId"] = self.assignee_principal_id if self.task_properties is not None: request_dict["taskProperties"] = self.task_properties.to_synapse_request() From c5cb3439f987c02fcbb93ec329cd69a25fd76c04 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Tue, 10 Feb 2026 10:51:43 -0500 Subject: [PATCH 02/10] update create_file_based_metadata_task --- docs/guides/extensions/curator/metadata_curation.md | 6 ++++-- .../extensions/curator/file_based_metadata_task.py | 10 +++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/docs/guides/extensions/curator/metadata_curation.md b/docs/guides/extensions/curator/metadata_curation.md index 0b969cc5b..46d478aaf 100644 --- a/docs/guides/extensions/curator/metadata_curation.md +++ b/docs/guides/extensions/curator/metadata_curation.md @@ -107,7 +107,8 @@ entity_view_id, task_id = create_file_based_metadata_task( instructions="Annotate each file with metadata according to the schema requirements.", attach_wiki=False, # Creates a wiki in the folder with the entity view (Defaults to False) entity_view_name="Animal Study Files View", - schema_uri=schema_uri # Schema found in Step 2 + schema_uri=schema_uri, # Schema found in Step 2 + assignee_principal_id="123456" # Optional: Assign to a user or team ) print(f"Created EntityView: {entity_view_id}") @@ -173,7 +174,8 @@ entity_view_id, task_id = create_file_based_metadata_task( instructions="Annotate each file with complete metadata according to schema.", attach_wiki=True, entity_view_name="Animal Study Files View", - schema_uri=schema_uri + schema_uri=schema_uri, + assignee_principal_id="123456" # Optional: Assign to a user or team ) print(f"File-based workflow created:") diff --git a/synapseclient/extensions/curator/file_based_metadata_task.py b/synapseclient/extensions/curator/file_based_metadata_task.py index 207b72a01..88936e67a 100644 --- a/synapseclient/extensions/curator/file_based_metadata_task.py +++ b/synapseclient/extensions/curator/file_based_metadata_task.py @@ -298,6 +298,7 @@ def create_file_based_metadata_task( entity_view_name: str = "JSON Schema view", schema_uri: Optional[str] = None, enable_derived_annotations: bool = False, + assignee_principal_id: Optional[str] = None, *, synapse_client: Optional[Synapse] = None, ) -> Tuple[str, str]: @@ -322,7 +323,8 @@ def create_file_based_metadata_task( instructions="Please curate this metadata according to the schema requirements", attach_wiki=False, entity_view_name="Biospecimen Metadata View", - schema_uri="sage.schemas.v2571-amp.Biospecimen.schema-0.0.1" + schema_uri="sage.schemas.v2571-amp.Biospecimen.schema-0.0.1", + assignee_principal_id="123456" # Optional: Assign to a user or team ) ``` @@ -338,6 +340,11 @@ def create_file_based_metadata_task( the schema will be bound to the folder before creating the entity view. (e.g., 'sage.schemas.v2571-amp.Biospecimen.schema-0.0.1') enable_derived_annotations: If true, enable derived annotations. Defaults to False. + assignee_principal_id: The principal ID of the user or team to assign to this + curation task. If None (default), the task will be unassigned. For metadata + tasks, this determines the owner of the grid session. Team members can all + join grid sessions owned by their team, while user-owned grid sessions are + restricted to that user only. synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. @@ -445,6 +452,7 @@ def create_file_based_metadata_task( data_type=task_datatype, project_id=project.id, instructions=instructions, + assignee_principal_id=assignee_principal_id, task_properties=FileBasedMetadataTaskProperties( upload_folder_id=folder_id, file_view_id=entity_view_id, From 42e27cf95455e4bf6dc8ca5a63010f74c3339227 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Wed, 11 Feb 2026 15:55:22 -0500 Subject: [PATCH 03/10] add unit test --- .../extensions/unit_test_curator.py | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/tests/unit/synapseclient/extensions/unit_test_curator.py b/tests/unit/synapseclient/extensions/unit_test_curator.py index c149ffea5..108650ec4 100644 --- a/tests/unit/synapseclient/extensions/unit_test_curator.py +++ b/tests/unit/synapseclient/extensions/unit_test_curator.py @@ -50,6 +50,7 @@ get_latest_schema_uri, ) from synapseclient.models import ColumnType +from synapseclient.models.curation import FileBasedMetadataTaskProperties from synapseclient.models.mixins import JSONSchemaBinding from synapseclient.models.mixins.json_schema import JSONSchemaVersionInfo @@ -428,6 +429,73 @@ def test_create_file_based_metadata_task_project_traversal( # Verify that syn.get was called twice (for parent folder and project) self.assertEqual(self.mock_syn.get.call_count, 2) + @patch( + "synapseclient.extensions.curator.file_based_metadata_task.Synapse.get_client" + ) + @patch( + "synapseclient.extensions.curator.file_based_metadata_task.create_json_schema_entity_view" + ) + @patch("synapseclient.extensions.curator.file_based_metadata_task.Folder") + @patch("synapseclient.extensions.curator.file_based_metadata_task.CurationTask") + def test_create_file_based_metadata_task_with_assignee( + self, + mock_curation_task_cls, + mock_folder_cls, + mock_create_entity_view, + mock_get_client, + ): + """Test successful creation of filed-based metadata task with assign_principal_id.""" + # GIVEN a file-based metadata task with assign_principal_id set to True + mock_get_client.return_value = self.mock_syn + mock_create_entity_view.return_value = "test_entity_view_id" + + mock_folder = Mock() + mock_folder_cls.return_value = mock_folder + mock_folder.get.return_value = mock_folder + mock_folder.parent_id = "syn11111111" + + mock_project = Mock() + mock_project.concreteType = "org.sagebionetworks.repo.model.Project" + mock_project.id = "syn22222222" + self.mock_syn.get.return_value = mock_project + + mock_task = Mock() + mock_task.task_id = "task123" + mock_curation_task = Mock() + mock_curation_task.store.return_value = mock_task + mock_curation_task_cls.return_value = mock_curation_task + + # WHEN I create the file-based metadata task with assign_principal_id=True + result = create_file_based_metadata_task( + folder_id=self.folder_id, + curation_task_name=self.curation_task_name, + instructions=self.instructions, + attach_wiki=False, + entity_view_name=self.entity_view_name, + schema_uri=self.schema_uri, + enable_derived_annotations=True, + assignee_principal_id="syn1234", + synapse_client=self.mock_syn, + ) + + mock_curation_task_cls.assert_called_once_with( + data_type=self.curation_task_name, + project_id="syn22222222", + instructions=self.instructions, + assignee_principal_id="syn1234", + task_properties=FileBasedMetadataTaskProperties( + upload_folder_id=self.folder_id, + file_view_id=mock_create_entity_view.return_value, + ), + ) + # THEN the task should be created successfully + assert result == ("test_entity_view_id", "task123") + mock_create_entity_view.assert_called_once_with( + syn=self.mock_syn, + synapse_entity_id=self.folder_id, + entity_view_name=self.entity_view_name, + ) + class TestCreateRecordBasedMetadataTask(unittest.TestCase): """Test cases for create_record_based_metadata_task function.""" From f0e1b538421b26fb6c4fccb5ff8fc7cc0c07c66c Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Wed, 11 Feb 2026 16:28:56 -0500 Subject: [PATCH 04/10] add test_create_record_based_metadata_task_with_assignee --- .../extensions/unit_test_curator.py | 90 ++++++++++++++++++- 1 file changed, 89 insertions(+), 1 deletion(-) diff --git a/tests/unit/synapseclient/extensions/unit_test_curator.py b/tests/unit/synapseclient/extensions/unit_test_curator.py index 108650ec4..ee458d387 100644 --- a/tests/unit/synapseclient/extensions/unit_test_curator.py +++ b/tests/unit/synapseclient/extensions/unit_test_curator.py @@ -50,7 +50,10 @@ get_latest_schema_uri, ) from synapseclient.models import ColumnType -from synapseclient.models.curation import FileBasedMetadataTaskProperties +from synapseclient.models.curation import ( + FileBasedMetadataTaskProperties, + RecordBasedMetadataTaskProperties, +) from synapseclient.models.mixins import JSONSchemaBinding from synapseclient.models.mixins.json_schema import JSONSchemaVersionInfo @@ -940,6 +943,91 @@ def test_create_record_based_metadata_task_grid_creation_error( synapse_client=self.mock_syn, ) + @patch( + "synapseclient.extensions.curator.record_based_metadata_task.Synapse.get_client" + ) + @patch( + "synapseclient.extensions.curator.record_based_metadata_task.extract_schema_properties_from_web" + ) + @patch( + "synapseclient.extensions.curator.record_based_metadata_task.tempfile.NamedTemporaryFile" + ) + @patch("synapseclient.extensions.curator.record_based_metadata_task.RecordSet") + @patch("synapseclient.extensions.curator.record_based_metadata_task.CurationTask") + @patch("synapseclient.extensions.curator.record_based_metadata_task.Grid") + @patch("builtins.open") + def test_create_record_based_metadata_task_with_assignee( + self, + mock_open, + mock_grid_cls, + mock_curation_task_cls, + mock_record_set_cls, + mock_temp_file, + mock_extract_schema, + mock_get_client, + ): + """Test successful creation of record-based metadata task with assignee_principal_id.""" + # GIVEN a record-based metadata task with assignee_principal_id + mock_get_client.return_value = self.mock_syn + + mock_df = pd.DataFrame(columns=["specimenID", "age", "diagnosis"]) + mock_extract_schema.return_value = mock_df + + mock_temp = Mock() + mock_temp.name = "/tmp/test.csv" + mock_temp_file.return_value = mock_temp + + mock_record_set = Mock() + mock_record_set.id = "syn87654321" + mock_record_set_instance = Mock() + mock_record_set_instance.store.return_value = mock_record_set + mock_record_set_cls.return_value = mock_record_set_instance + + mock_task = Mock() + mock_task.task_id = "task123" + mock_curation_task = Mock() + mock_curation_task.store.return_value = mock_task + mock_curation_task_cls.return_value = mock_curation_task + + mock_grid = Mock() + mock_grid_instance = Mock() + mock_grid_instance.export_to_record_set.return_value = mock_grid + mock_grid_cls.return_value = mock_grid_instance + + # WHEN I create the record-based metadata task with assignee_principal_id + result = create_record_based_metadata_task( + project_id=self.project_id, + folder_id=self.folder_id, + record_set_name=self.record_set_name, + record_set_description=self.record_set_description, + curation_task_name=self.curation_task_name, + upsert_keys=self.upsert_keys, + instructions=self.instructions, + schema_uri=self.schema_uri, + bind_schema_to_record_set=True, + assignee_principal_id="syn1234", + synapse_client=self.mock_syn, + ) + + # THEN the task should be created successfully + assert isinstance(result, tuple) + assert len(result) == 3 + record_set, task, grid = result + assert record_set == mock_record_set + assert task == mock_task + assert grid == mock_grid + + # AND the CurationTask should be called with the correct assignee_principal_id + mock_curation_task_cls.assert_called_once_with( + data_type=self.curation_task_name, + project_id=self.project_id, + instructions=self.instructions, + assignee_principal_id="syn1234", + task_properties=RecordBasedMetadataTaskProperties( + record_set_id=mock_record_set.id + ), + ) + class TestQuerySchemaRegistry(unittest.TestCase): """Test cases for query_schema_registry function.""" From 64ad47b5ecefce04fa75a9e343b41d8f54f78ac5 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Wed, 11 Feb 2026 16:56:21 -0500 Subject: [PATCH 05/10] modify test_store_record_based_curation_task --- tests/integration/conftest.py | 5 ++++- .../synapseclient/models/synchronous/test_curation.py | 11 ++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 708462ccb..14d1c126a 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -200,7 +200,10 @@ async def _cleanup(syn: Synapse, items): else: print("Error cleaning up entity: " + str(ex)) else: - sys.stderr.write("Don't know how to clean: %s" % str(item)) + sys.stderr.write( + "Don't know how to clean: %s (type: %s)" + % (str(item), type(item).__name__) + ) active_span_processors = [] diff --git a/tests/integration/synapseclient/models/synchronous/test_curation.py b/tests/integration/synapseclient/models/synchronous/test_curation.py index cda597efc..8dace11de 100644 --- a/tests/integration/synapseclient/models/synchronous/test_curation.py +++ b/tests/integration/synapseclient/models/synchronous/test_curation.py @@ -20,6 +20,7 @@ Project, RecordBasedMetadataTaskProperties, RecordSet, + Team, ViewTypeMask, ) @@ -147,6 +148,12 @@ def init(self, syn: Synapse, schedule_for_cleanup: Callable[..., None]) -> None: self.syn = syn self.schedule_for_cleanup = schedule_for_cleanup + @pytest.fixture(autouse=True, scope="function") + def team(self) -> Team: + team = Team(name=f"test_team_{uuid.uuid4()}").create(synapse_client=self.syn) + self.schedule_for_cleanup(str(team.id)) + return team + @pytest.fixture(scope="function") def folder_with_view(self, project_model: Project) -> tuple[Folder, EntityView]: """Create a folder with an associated EntityView for file-based testing.""" @@ -275,7 +282,7 @@ def test_store_file_based_curation_task( assert stored_task.created_by is not None def test_store_record_based_curation_task( - self, project_model: Project, record_set: RecordSet + self, project_model: Project, record_set: RecordSet, team: Team ) -> None: # GIVEN a project and record set # AND a RecordBasedMetadataTaskProperties @@ -290,6 +297,7 @@ def test_store_record_based_curation_task( project_id=project_model.id, instructions="Please curate this record-based test data.", task_properties=task_properties, + assignee_principal_id=str(team.id), ) # WHEN I store the curation task @@ -307,6 +315,7 @@ def test_store_record_based_curation_task( assert stored_task.etag is not None assert stored_task.created_on is not None assert stored_task.created_by is not None + assert stored_task.assignee_principal_id == str(team.id) def test_store_update_existing_curation_task( self, project_model: Project, record_set: RecordSet From 4ae5f2eb8257faf5ceaf872178159d621d6bbf67 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Wed, 11 Feb 2026 17:00:28 -0500 Subject: [PATCH 06/10] modify test to test task assignment --- .../synapseclient/models/synchronous/test_curation.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/synapseclient/models/synchronous/test_curation.py b/tests/integration/synapseclient/models/synchronous/test_curation.py index 8dace11de..dfcad22ff 100644 --- a/tests/integration/synapseclient/models/synchronous/test_curation.py +++ b/tests/integration/synapseclient/models/synchronous/test_curation.py @@ -246,7 +246,7 @@ def record_set(self, project_model: Project) -> RecordSet: raise def test_store_file_based_curation_task( - self, project_model: Project, folder_with_view: tuple[Folder, EntityView] + self, team, project_model: Project, folder_with_view: tuple[Folder, EntityView] ) -> None: # GIVEN a project, folder, and entity view folder, entity_view = folder_with_view @@ -264,6 +264,7 @@ def test_store_file_based_curation_task( project_id=project_model.id, instructions="Please curate this test data.", task_properties=task_properties, + assignee_principal_id=str(team.id), ) # WHEN I store the curation task @@ -280,6 +281,7 @@ def test_store_file_based_curation_task( assert stored_task.etag is not None assert stored_task.created_on is not None assert stored_task.created_by is not None + assert stored_task.assignee_principal_id == str(team.id) def test_store_record_based_curation_task( self, project_model: Project, record_set: RecordSet, team: Team From 0b844c8cfa5cbe06bcce531bd19e7a57453d92b2 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Thu, 12 Feb 2026 08:36:27 -0500 Subject: [PATCH 07/10] fix typo; remove autouse=True --- .../synapseclient/models/synchronous/test_curation.py | 4 ++-- tests/unit/synapseclient/extensions/unit_test_curator.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/integration/synapseclient/models/synchronous/test_curation.py b/tests/integration/synapseclient/models/synchronous/test_curation.py index dfcad22ff..cd78bf7e0 100644 --- a/tests/integration/synapseclient/models/synchronous/test_curation.py +++ b/tests/integration/synapseclient/models/synchronous/test_curation.py @@ -148,10 +148,10 @@ def init(self, syn: Synapse, schedule_for_cleanup: Callable[..., None]) -> None: self.syn = syn self.schedule_for_cleanup = schedule_for_cleanup - @pytest.fixture(autouse=True, scope="function") + @pytest.fixture(scope="function") def team(self) -> Team: team = Team(name=f"test_team_{uuid.uuid4()}").create(synapse_client=self.syn) - self.schedule_for_cleanup(str(team.id)) + self.schedule_for_cleanup(team) return team @pytest.fixture(scope="function") diff --git a/tests/unit/synapseclient/extensions/unit_test_curator.py b/tests/unit/synapseclient/extensions/unit_test_curator.py index ee458d387..5dfce3cfc 100644 --- a/tests/unit/synapseclient/extensions/unit_test_curator.py +++ b/tests/unit/synapseclient/extensions/unit_test_curator.py @@ -447,8 +447,8 @@ def test_create_file_based_metadata_task_with_assignee( mock_create_entity_view, mock_get_client, ): - """Test successful creation of filed-based metadata task with assign_principal_id.""" - # GIVEN a file-based metadata task with assign_principal_id set to True + """Test successful creation of file-based metadata task with assignee_principal_id.""" + # GIVEN a file-based metadata task with assignee_principal_id mock_get_client.return_value = self.mock_syn mock_create_entity_view.return_value = "test_entity_view_id" From e29a43846a06c978b4e991a7ea501409dbde4564 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Thu, 12 Feb 2026 16:33:16 -0500 Subject: [PATCH 08/10] make sure that assignee id can be either str or int --- .../curator/file_based_metadata_task.py | 20 +- .../curator/record_based_metadata_task.py | 20 +- .../extensions/unit_test_curator.py | 244 ++++++++++-------- 3 files changed, 162 insertions(+), 122 deletions(-) diff --git a/synapseclient/extensions/curator/file_based_metadata_task.py b/synapseclient/extensions/curator/file_based_metadata_task.py index 88936e67a..146409ab1 100644 --- a/synapseclient/extensions/curator/file_based_metadata_task.py +++ b/synapseclient/extensions/curator/file_based_metadata_task.py @@ -5,7 +5,7 @@ in Synapse, including EntityView creation, CurationTask setup, and Wiki attachment. """ -from typing import Any, Optional, Tuple +from typing import Any, Optional, Tuple, Union from synapseclient import Synapse # type: ignore from synapseclient import Wiki # type: ignore @@ -298,7 +298,7 @@ def create_file_based_metadata_task( entity_view_name: str = "JSON Schema view", schema_uri: Optional[str] = None, enable_derived_annotations: bool = False, - assignee_principal_id: Optional[str] = None, + assignee_principal_id: Optional[Union[str, int]] = None, *, synapse_client: Optional[Synapse] = None, ) -> Tuple[str, str]: @@ -324,7 +324,7 @@ def create_file_based_metadata_task( attach_wiki=False, entity_view_name="Biospecimen Metadata View", schema_uri="sage.schemas.v2571-amp.Biospecimen.schema-0.0.1", - assignee_principal_id="123456" # Optional: Assign to a user or team + assignee_principal_id=123456 # Optional: Assign to a user or team (can be str or int) ) ``` @@ -341,10 +341,10 @@ def create_file_based_metadata_task( (e.g., 'sage.schemas.v2571-amp.Biospecimen.schema-0.0.1') enable_derived_annotations: If true, enable derived annotations. Defaults to False. assignee_principal_id: The principal ID of the user or team to assign to this - curation task. If None (default), the task will be unassigned. For metadata - tasks, this determines the owner of the grid session. Team members can all - join grid sessions owned by their team, while user-owned grid sessions are - restricted to that user only. + curation task. Can be provided as either a string or an integer. If None + (default), the task will be unassigned. For metadata tasks, this determines + the owner of the grid session. Team members can all join grid sessions owned + by their team, while user-owned grid sessions are restricted to that user only. synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. @@ -452,7 +452,11 @@ def create_file_based_metadata_task( data_type=task_datatype, project_id=project.id, instructions=instructions, - assignee_principal_id=assignee_principal_id, + assignee_principal_id=( + str(assignee_principal_id) + if assignee_principal_id is not None + else None + ), task_properties=FileBasedMetadataTaskProperties( upload_folder_id=folder_id, file_view_id=entity_view_id, diff --git a/synapseclient/extensions/curator/record_based_metadata_task.py b/synapseclient/extensions/curator/record_based_metadata_task.py index 985a410f3..7274ffdde 100644 --- a/synapseclient/extensions/curator/record_based_metadata_task.py +++ b/synapseclient/extensions/curator/record_based_metadata_task.py @@ -6,7 +6,7 @@ in Synapse, including RecordSet creation, CurationTask setup, and Grid view initialization. """ import tempfile -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple, Union from synapseclient import Synapse from synapseclient.core.typing_utils import DataFrame as DATA_FRAME_TYPE @@ -109,7 +109,7 @@ def create_record_based_metadata_task( schema_uri: str, bind_schema_to_record_set: bool = True, enable_derived_annotations: bool = False, - assignee_principal_id: Optional[str] = None, + assignee_principal_id: Optional[Union[str, int]] = None, *, synapse_client: Optional[Synapse] = None, ) -> Tuple[RecordSet, CurationTask, Grid]: @@ -150,7 +150,7 @@ def create_record_based_metadata_task( upsert_keys=["specimenID"], instructions="Please curate this metadata according to the schema requirements", schema_uri="schema-org-schema.name.schema-v1.0.0", - assignee_principal_id="123456" # Optional: Assign to a user or team + assignee_principal_id=123456 # Optional: Assign to a user or team (can be str or int) ) ``` @@ -170,10 +170,10 @@ def create_record_based_metadata_task( (default: True). enable_derived_annotations: If true, enable derived annotations. Defaults to False. assignee_principal_id: The principal ID of the user or team to assign to this - curation task. If None (default), the task will be unassigned. For metadata - tasks, this determines the owner of the grid session. Team members can all - join grid sessions owned by their team, while user-owned grid sessions are - restricted to that user only. + curation task. Can be provided as either a string or an integer. If None + (default), the task will be unassigned. For metadata tasks, this determines + the owner of the grid session. Team members can all join grid sessions owned + by their team, while user-owned grid sessions are restricted to that user only. synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. @@ -251,7 +251,11 @@ def create_record_based_metadata_task( data_type=curation_task_name, project_id=project_id, instructions=instructions, - assignee_principal_id=assignee_principal_id, + assignee_principal_id=( + str(assignee_principal_id) + if assignee_principal_id is not None + else None + ), task_properties=RecordBasedMetadataTaskProperties( record_set_id=record_set_id, ), diff --git a/tests/unit/synapseclient/extensions/unit_test_curator.py b/tests/unit/synapseclient/extensions/unit_test_curator.py index 5dfce3cfc..ebaef1e88 100644 --- a/tests/unit/synapseclient/extensions/unit_test_curator.py +++ b/tests/unit/synapseclient/extensions/unit_test_curator.py @@ -448,56 +448,71 @@ def test_create_file_based_metadata_task_with_assignee( mock_get_client, ): """Test successful creation of file-based metadata task with assignee_principal_id.""" - # GIVEN a file-based metadata task with assignee_principal_id - mock_get_client.return_value = self.mock_syn - mock_create_entity_view.return_value = "test_entity_view_id" - - mock_folder = Mock() - mock_folder_cls.return_value = mock_folder - mock_folder.get.return_value = mock_folder - mock_folder.parent_id = "syn11111111" - - mock_project = Mock() - mock_project.concreteType = "org.sagebionetworks.repo.model.Project" - mock_project.id = "syn22222222" - self.mock_syn.get.return_value = mock_project - - mock_task = Mock() - mock_task.task_id = "task123" - mock_curation_task = Mock() - mock_curation_task.store.return_value = mock_task - mock_curation_task_cls.return_value = mock_curation_task + # Test both string and int inputs - int should be converted to string + test_cases = [ + ("1234", "1234"), + (1234, "1234"), + ] - # WHEN I create the file-based metadata task with assign_principal_id=True - result = create_file_based_metadata_task( - folder_id=self.folder_id, - curation_task_name=self.curation_task_name, - instructions=self.instructions, - attach_wiki=False, - entity_view_name=self.entity_view_name, - schema_uri=self.schema_uri, - enable_derived_annotations=True, - assignee_principal_id="syn1234", - synapse_client=self.mock_syn, - ) + for input_assignee, expected_assignee in test_cases: + with self.subTest(input_assignee=input_assignee): + # Reset mocks for each subtest + mock_curation_task_cls.reset_mock() + mock_folder_cls.reset_mock() + mock_create_entity_view.reset_mock() + mock_get_client.reset_mock() + + # GIVEN a file-based metadata task with assignee_principal_id + mock_get_client.return_value = self.mock_syn + mock_create_entity_view.return_value = "test_entity_view_id" + + mock_folder = Mock() + mock_folder_cls.return_value = mock_folder + mock_folder.get.return_value = mock_folder + mock_folder.parent_id = "syn11111111" + + mock_project = Mock() + mock_project.concreteType = "org.sagebionetworks.repo.model.Project" + mock_project.id = "syn22222222" + self.mock_syn.get.return_value = mock_project + + mock_task = Mock() + mock_task.task_id = "task123" + mock_curation_task = Mock() + mock_curation_task.store.return_value = mock_task + mock_curation_task_cls.return_value = mock_curation_task + + # WHEN I create the file-based metadata task with assignee_principal_id + result = create_file_based_metadata_task( + folder_id=self.folder_id, + curation_task_name=self.curation_task_name, + instructions=self.instructions, + attach_wiki=False, + entity_view_name=self.entity_view_name, + schema_uri=self.schema_uri, + enable_derived_annotations=True, + assignee_principal_id=input_assignee, + synapse_client=self.mock_syn, + ) - mock_curation_task_cls.assert_called_once_with( - data_type=self.curation_task_name, - project_id="syn22222222", - instructions=self.instructions, - assignee_principal_id="syn1234", - task_properties=FileBasedMetadataTaskProperties( - upload_folder_id=self.folder_id, - file_view_id=mock_create_entity_view.return_value, - ), - ) - # THEN the task should be created successfully - assert result == ("test_entity_view_id", "task123") - mock_create_entity_view.assert_called_once_with( - syn=self.mock_syn, - synapse_entity_id=self.folder_id, - entity_view_name=self.entity_view_name, - ) + # THEN the CurationTask should be called with assignee_principal_id as string + mock_curation_task_cls.assert_called_once_with( + data_type=self.curation_task_name, + project_id="syn22222222", + instructions=self.instructions, + assignee_principal_id=expected_assignee, + task_properties=FileBasedMetadataTaskProperties( + upload_folder_id=self.folder_id, + file_view_id=mock_create_entity_view.return_value, + ), + ) + # AND the task should be created successfully + assert result == ("test_entity_view_id", "task123") + mock_create_entity_view.assert_called_once_with( + syn=self.mock_syn, + synapse_entity_id=self.folder_id, + entity_view_name=self.entity_view_name, + ) class TestCreateRecordBasedMetadataTask(unittest.TestCase): @@ -967,66 +982,83 @@ def test_create_record_based_metadata_task_with_assignee( mock_get_client, ): """Test successful creation of record-based metadata task with assignee_principal_id.""" - # GIVEN a record-based metadata task with assignee_principal_id - mock_get_client.return_value = self.mock_syn - - mock_df = pd.DataFrame(columns=["specimenID", "age", "diagnosis"]) - mock_extract_schema.return_value = mock_df - - mock_temp = Mock() - mock_temp.name = "/tmp/test.csv" - mock_temp_file.return_value = mock_temp - - mock_record_set = Mock() - mock_record_set.id = "syn87654321" - mock_record_set_instance = Mock() - mock_record_set_instance.store.return_value = mock_record_set - mock_record_set_cls.return_value = mock_record_set_instance - - mock_task = Mock() - mock_task.task_id = "task123" - mock_curation_task = Mock() - mock_curation_task.store.return_value = mock_task - mock_curation_task_cls.return_value = mock_curation_task - - mock_grid = Mock() - mock_grid_instance = Mock() - mock_grid_instance.export_to_record_set.return_value = mock_grid - mock_grid_cls.return_value = mock_grid_instance - - # WHEN I create the record-based metadata task with assignee_principal_id - result = create_record_based_metadata_task( - project_id=self.project_id, - folder_id=self.folder_id, - record_set_name=self.record_set_name, - record_set_description=self.record_set_description, - curation_task_name=self.curation_task_name, - upsert_keys=self.upsert_keys, - instructions=self.instructions, - schema_uri=self.schema_uri, - bind_schema_to_record_set=True, - assignee_principal_id="syn1234", - synapse_client=self.mock_syn, - ) + # Test both string and int inputs - int should be converted to string + test_cases = [ + ("1234", "1234"), + (1234, "1234"), + ] - # THEN the task should be created successfully - assert isinstance(result, tuple) - assert len(result) == 3 - record_set, task, grid = result - assert record_set == mock_record_set - assert task == mock_task - assert grid == mock_grid + for input_assignee, expected_assignee in test_cases: + with self.subTest(input_assignee=input_assignee): + # Reset mocks for each subtest + mock_open.reset_mock() + mock_grid_cls.reset_mock() + mock_curation_task_cls.reset_mock() + mock_record_set_cls.reset_mock() + mock_temp_file.reset_mock() + mock_extract_schema.reset_mock() + mock_get_client.reset_mock() + + # GIVEN a record-based metadata task with assignee_principal_id + mock_get_client.return_value = self.mock_syn + + mock_df = pd.DataFrame(columns=["specimenID", "age", "diagnosis"]) + mock_extract_schema.return_value = mock_df + + mock_temp = Mock() + mock_temp.name = "/tmp/test.csv" + mock_temp_file.return_value = mock_temp + + mock_record_set = Mock() + mock_record_set.id = "syn87654321" + mock_record_set_instance = Mock() + mock_record_set_instance.store.return_value = mock_record_set + mock_record_set_cls.return_value = mock_record_set_instance + + mock_task = Mock() + mock_task.task_id = "task123" + mock_curation_task = Mock() + mock_curation_task.store.return_value = mock_task + mock_curation_task_cls.return_value = mock_curation_task + + mock_grid = Mock() + mock_grid_instance = Mock() + mock_grid_instance.export_to_record_set.return_value = mock_grid + mock_grid_cls.return_value = mock_grid_instance + + # WHEN I create the record-based metadata task with assignee_principal_id + result = create_record_based_metadata_task( + project_id=self.project_id, + folder_id=self.folder_id, + record_set_name=self.record_set_name, + record_set_description=self.record_set_description, + curation_task_name=self.curation_task_name, + upsert_keys=self.upsert_keys, + instructions=self.instructions, + schema_uri=self.schema_uri, + bind_schema_to_record_set=True, + assignee_principal_id=input_assignee, + synapse_client=self.mock_syn, + ) - # AND the CurationTask should be called with the correct assignee_principal_id - mock_curation_task_cls.assert_called_once_with( - data_type=self.curation_task_name, - project_id=self.project_id, - instructions=self.instructions, - assignee_principal_id="syn1234", - task_properties=RecordBasedMetadataTaskProperties( - record_set_id=mock_record_set.id - ), - ) + # THEN the task should be created successfully + assert isinstance(result, tuple) + assert len(result) == 3 + record_set, task, grid = result + assert record_set == mock_record_set + assert task == mock_task + assert grid == mock_grid + + # AND the CurationTask should be called with assignee_principal_id as string + mock_curation_task_cls.assert_called_once_with( + data_type=self.curation_task_name, + project_id=self.project_id, + instructions=self.instructions, + assignee_principal_id=expected_assignee, + task_properties=RecordBasedMetadataTaskProperties( + record_set_id=mock_record_set.id + ), + ) class TestQuerySchemaRegistry(unittest.TestCase): From 5b8b11c03d42f2bc9877827e52d53c8a05689c30 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Fri, 13 Feb 2026 12:27:39 -0500 Subject: [PATCH 09/10] add doc --- docs/guides/extensions/curator/metadata_curation.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/guides/extensions/curator/metadata_curation.md b/docs/guides/extensions/curator/metadata_curation.md index 46d478aaf..d28e3c75e 100644 --- a/docs/guides/extensions/curator/metadata_curation.md +++ b/docs/guides/extensions/curator/metadata_curation.md @@ -69,6 +69,8 @@ all_schemas = query_schema_registry( Use this when metadata describes individual data files and is stored as annotations directly on each file. +**Team collaboration**: To enable multiple users to collaborate on a Grid session, assign the task to a Synapse team using `assignee_principal_id`. The team must exist before creating the task for collaboration on a grid. + ```python record_set, curation_task, data_grid = create_record_based_metadata_task( synapse_client=syn, @@ -99,6 +101,8 @@ print(f"Created CurationTask: {curation_task.task_id}") Use this when metadata is normalized in structured records to eliminate duplication and ensure consistency. +**Team collaboration**: To enable multiple users to collaborate on a Grid session, assign the task to a Synapse team using `assignee_principal_id`. The team must exist before creating the task for collaboration on a grid. + ```python entity_view_id, task_id = create_file_based_metadata_task( synapse_client=syn, From d128f0bcbe41a6dbe8b3488009981255964b679a Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Fri, 13 Feb 2026 12:35:26 -0500 Subject: [PATCH 10/10] add to pre-requisite --- docs/guides/extensions/curator/metadata_curation.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/docs/guides/extensions/curator/metadata_curation.md b/docs/guides/extensions/curator/metadata_curation.md index d28e3c75e..65149d87e 100644 --- a/docs/guides/extensions/curator/metadata_curation.md +++ b/docs/guides/extensions/curator/metadata_curation.md @@ -18,6 +18,7 @@ By following this guide, you will: - Python environment with synapseclient and the `curator` extension installed (ie. `pip install --upgrade "synapseclient[curator]"`) - An existing Synapse project and folder where you want to manage metadata - A JSON Schema registered in Synapse (many schemas are already available for Sage-affiliated projects, or you can register your own by following the [JSON Schema tutorial](../../../tutorials/python/json_schema.md)) +- (Optional) An existing Synapse team if you want multiple users to collaborate on the same Grid session. Pass the team's ID as `assignee_principal_id` when creating the curation task. ## Step 1: Authenticate and import required functions @@ -69,8 +70,6 @@ all_schemas = query_schema_registry( Use this when metadata describes individual data files and is stored as annotations directly on each file. -**Team collaboration**: To enable multiple users to collaborate on a Grid session, assign the task to a Synapse team using `assignee_principal_id`. The team must exist before creating the task for collaboration on a grid. - ```python record_set, curation_task, data_grid = create_record_based_metadata_task( synapse_client=syn, @@ -101,8 +100,6 @@ print(f"Created CurationTask: {curation_task.task_id}") Use this when metadata is normalized in structured records to eliminate duplication and ensure consistency. -**Team collaboration**: To enable multiple users to collaborate on a Grid session, assign the task to a Synapse team using `assignee_principal_id`. The team must exist before creating the task for collaboration on a grid. - ```python entity_view_id, task_id = create_file_based_metadata_task( synapse_client=syn,