diff --git a/docs/guides/extensions/curator/metadata_curation.md b/docs/guides/extensions/curator/metadata_curation.md index 9e8d64d39..65149d87e 100644 --- a/docs/guides/extensions/curator/metadata_curation.md +++ b/docs/guides/extensions/curator/metadata_curation.md @@ -18,6 +18,7 @@ By following this guide, you will: - Python environment with synapseclient and the `curator` extension installed (ie. `pip install --upgrade "synapseclient[curator]"`) - An existing Synapse project and folder where you want to manage metadata - A JSON Schema registered in Synapse (many schemas are already available for Sage-affiliated projects, or you can register your own by following the [JSON Schema tutorial](../../../tutorials/python/json_schema.md)) +- (Optional) An existing Synapse team if you want multiple users to collaborate on the same Grid session. Pass the team's ID as `assignee_principal_id` when creating the curation task. ## Step 1: Authenticate and import required functions @@ -80,7 +81,8 @@ record_set, curation_task, data_grid = create_record_based_metadata_task( upsert_keys=["StudyKey"], # Fields that uniquely identify records instructions="Complete all required fields according to the schema. Use StudyKey to link records to your data files.", schema_uri=schema_uri, # Schema found in Step 2 - bind_schema_to_record_set=True + bind_schema_to_record_set=True, + assignee_principal_id="123456" # Optional: Assign to a user or team ) print(f"Created RecordSet: {record_set.id}") @@ -106,7 +108,8 @@ entity_view_id, task_id = create_file_based_metadata_task( instructions="Annotate each file with metadata according to the schema requirements.", attach_wiki=False, # Creates a wiki in the folder with the entity view (Defaults to False) entity_view_name="Animal Study Files View", - schema_uri=schema_uri # Schema found in Step 2 + schema_uri=schema_uri, # Schema found in Step 2 + assignee_principal_id="123456" # Optional: Assign to a user or team ) print(f"Created EntityView: {entity_view_id}") @@ -156,7 +159,8 @@ record_set, curation_task, data_grid = create_record_based_metadata_task( upsert_keys=["StudyKey"], instructions="Complete metadata for all study animals using StudyKey to link records to data files.", schema_uri=schema_uri, - bind_schema_to_record_set=True + bind_schema_to_record_set=True, + assignee_principal_id="123456" # Optional: Assign to a user or team ) print(f"Record-based workflow created:") @@ -171,7 +175,8 @@ entity_view_id, task_id = create_file_based_metadata_task( instructions="Annotate each file with complete metadata according to schema.", attach_wiki=True, entity_view_name="Animal Study Files View", - schema_uri=schema_uri + schema_uri=schema_uri, + assignee_principal_id="123456" # Optional: Assign to a user or team ) print(f"File-based workflow created:") diff --git a/synapseclient/extensions/curator/file_based_metadata_task.py b/synapseclient/extensions/curator/file_based_metadata_task.py index 207b72a01..146409ab1 100644 --- a/synapseclient/extensions/curator/file_based_metadata_task.py +++ b/synapseclient/extensions/curator/file_based_metadata_task.py @@ -5,7 +5,7 @@ in Synapse, including EntityView creation, CurationTask setup, and Wiki attachment. """ -from typing import Any, Optional, Tuple +from typing import Any, Optional, Tuple, Union from synapseclient import Synapse # type: ignore from synapseclient import Wiki # type: ignore @@ -298,6 +298,7 @@ def create_file_based_metadata_task( entity_view_name: str = "JSON Schema view", schema_uri: Optional[str] = None, enable_derived_annotations: bool = False, + assignee_principal_id: Optional[Union[str, int]] = None, *, synapse_client: Optional[Synapse] = None, ) -> Tuple[str, str]: @@ -322,7 +323,8 @@ def create_file_based_metadata_task( instructions="Please curate this metadata according to the schema requirements", attach_wiki=False, entity_view_name="Biospecimen Metadata View", - schema_uri="sage.schemas.v2571-amp.Biospecimen.schema-0.0.1" + schema_uri="sage.schemas.v2571-amp.Biospecimen.schema-0.0.1", + assignee_principal_id=123456 # Optional: Assign to a user or team (can be str or int) ) ``` @@ -338,6 +340,11 @@ def create_file_based_metadata_task( the schema will be bound to the folder before creating the entity view. (e.g., 'sage.schemas.v2571-amp.Biospecimen.schema-0.0.1') enable_derived_annotations: If true, enable derived annotations. Defaults to False. + assignee_principal_id: The principal ID of the user or team to assign to this + curation task. Can be provided as either a string or an integer. If None + (default), the task will be unassigned. For metadata tasks, this determines + the owner of the grid session. Team members can all join grid sessions owned + by their team, while user-owned grid sessions are restricted to that user only. synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. @@ -445,6 +452,11 @@ def create_file_based_metadata_task( data_type=task_datatype, project_id=project.id, instructions=instructions, + assignee_principal_id=( + str(assignee_principal_id) + if assignee_principal_id is not None + else None + ), task_properties=FileBasedMetadataTaskProperties( upload_folder_id=folder_id, file_view_id=entity_view_id, diff --git a/synapseclient/extensions/curator/record_based_metadata_task.py b/synapseclient/extensions/curator/record_based_metadata_task.py index 8dd984938..7274ffdde 100644 --- a/synapseclient/extensions/curator/record_based_metadata_task.py +++ b/synapseclient/extensions/curator/record_based_metadata_task.py @@ -6,7 +6,7 @@ in Synapse, including RecordSet creation, CurationTask setup, and Grid view initialization. """ import tempfile -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple, Union from synapseclient import Synapse from synapseclient.core.typing_utils import DataFrame as DATA_FRAME_TYPE @@ -109,6 +109,7 @@ def create_record_based_metadata_task( schema_uri: str, bind_schema_to_record_set: bool = True, enable_derived_annotations: bool = False, + assignee_principal_id: Optional[Union[str, int]] = None, *, synapse_client: Optional[Synapse] = None, ) -> Tuple[RecordSet, CurationTask, Grid]: @@ -148,7 +149,8 @@ def create_record_based_metadata_task( curation_task_name="BiospecimenMetadataTemplate", upsert_keys=["specimenID"], instructions="Please curate this metadata according to the schema requirements", - schema_uri="schema-org-schema.name.schema-v1.0.0" + schema_uri="schema-org-schema.name.schema-v1.0.0", + assignee_principal_id=123456 # Optional: Assign to a user or team (can be str or int) ) ``` @@ -167,6 +169,11 @@ def create_record_based_metadata_task( bind_schema_to_record_set: Whether to bind the given schema to the RecordSet (default: True). enable_derived_annotations: If true, enable derived annotations. Defaults to False. + assignee_principal_id: The principal ID of the user or team to assign to this + curation task. Can be provided as either a string or an integer. If None + (default), the task will be unassigned. For metadata tasks, this determines + the owner of the grid session. Team members can all join grid sessions owned + by their team, while user-owned grid sessions are restricted to that user only. synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. @@ -244,6 +251,11 @@ def create_record_based_metadata_task( data_type=curation_task_name, project_id=project_id, instructions=instructions, + assignee_principal_id=( + str(assignee_principal_id) + if assignee_principal_id is not None + else None + ), task_properties=RecordBasedMetadataTaskProperties( record_set_id=record_set_id, ), diff --git a/synapseclient/models/curation.py b/synapseclient/models/curation.py index b1ed54b0b..6b3eb5843 100644 --- a/synapseclient/models/curation.py +++ b/synapseclient/models/curation.py @@ -466,6 +466,11 @@ class CurationTask(CurationTaskSynchronousProtocol): modified_by: Optional[str] = None """(Read Only) The ID of the user that last modified this task""" + assignee_principal_id: Optional[str] = None + """The principal ID of the user or team assigned to this task. Null if unassigned. For metadata + tasks, determines the owner of the grid session. Team members can all join grid sessions + owned by their team, while user-owned grid sessions are restricted to that user only.""" + _last_persistent_instance: Optional["CurationTask"] = field( default=None, repr=False, compare=False ) @@ -510,6 +515,7 @@ def fill_from_dict( self.modified_on = synapse_response.get("modifiedOn", None) self.created_by = synapse_response.get("createdBy", None) self.modified_by = synapse_response.get("modifiedBy", None) + self.assignee_principal_id = synapse_response.get("assigneePrincipalId", None) task_properties_dict = synapse_response.get("taskProperties", None) if task_properties_dict: @@ -536,6 +542,7 @@ def to_synapse_request(self) -> Dict[str, Any]: request_dict["modifiedOn"] = self.modified_on request_dict["createdBy"] = self.created_by request_dict["modifiedBy"] = self.modified_by + request_dict["assigneePrincipalId"] = self.assignee_principal_id if self.task_properties is not None: request_dict["taskProperties"] = self.task_properties.to_synapse_request() diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 708462ccb..14d1c126a 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -200,7 +200,10 @@ async def _cleanup(syn: Synapse, items): else: print("Error cleaning up entity: " + str(ex)) else: - sys.stderr.write("Don't know how to clean: %s" % str(item)) + sys.stderr.write( + "Don't know how to clean: %s (type: %s)" + % (str(item), type(item).__name__) + ) active_span_processors = [] diff --git a/tests/integration/synapseclient/models/synchronous/test_curation.py b/tests/integration/synapseclient/models/synchronous/test_curation.py index cda597efc..cd78bf7e0 100644 --- a/tests/integration/synapseclient/models/synchronous/test_curation.py +++ b/tests/integration/synapseclient/models/synchronous/test_curation.py @@ -20,6 +20,7 @@ Project, RecordBasedMetadataTaskProperties, RecordSet, + Team, ViewTypeMask, ) @@ -147,6 +148,12 @@ def init(self, syn: Synapse, schedule_for_cleanup: Callable[..., None]) -> None: self.syn = syn self.schedule_for_cleanup = schedule_for_cleanup + @pytest.fixture(scope="function") + def team(self) -> Team: + team = Team(name=f"test_team_{uuid.uuid4()}").create(synapse_client=self.syn) + self.schedule_for_cleanup(team) + return team + @pytest.fixture(scope="function") def folder_with_view(self, project_model: Project) -> tuple[Folder, EntityView]: """Create a folder with an associated EntityView for file-based testing.""" @@ -239,7 +246,7 @@ def record_set(self, project_model: Project) -> RecordSet: raise def test_store_file_based_curation_task( - self, project_model: Project, folder_with_view: tuple[Folder, EntityView] + self, team, project_model: Project, folder_with_view: tuple[Folder, EntityView] ) -> None: # GIVEN a project, folder, and entity view folder, entity_view = folder_with_view @@ -257,6 +264,7 @@ def test_store_file_based_curation_task( project_id=project_model.id, instructions="Please curate this test data.", task_properties=task_properties, + assignee_principal_id=str(team.id), ) # WHEN I store the curation task @@ -273,9 +281,10 @@ def test_store_file_based_curation_task( assert stored_task.etag is not None assert stored_task.created_on is not None assert stored_task.created_by is not None + assert stored_task.assignee_principal_id == str(team.id) def test_store_record_based_curation_task( - self, project_model: Project, record_set: RecordSet + self, project_model: Project, record_set: RecordSet, team: Team ) -> None: # GIVEN a project and record set # AND a RecordBasedMetadataTaskProperties @@ -290,6 +299,7 @@ def test_store_record_based_curation_task( project_id=project_model.id, instructions="Please curate this record-based test data.", task_properties=task_properties, + assignee_principal_id=str(team.id), ) # WHEN I store the curation task @@ -307,6 +317,7 @@ def test_store_record_based_curation_task( assert stored_task.etag is not None assert stored_task.created_on is not None assert stored_task.created_by is not None + assert stored_task.assignee_principal_id == str(team.id) def test_store_update_existing_curation_task( self, project_model: Project, record_set: RecordSet diff --git a/tests/unit/synapseclient/extensions/unit_test_curator.py b/tests/unit/synapseclient/extensions/unit_test_curator.py index c149ffea5..ebaef1e88 100644 --- a/tests/unit/synapseclient/extensions/unit_test_curator.py +++ b/tests/unit/synapseclient/extensions/unit_test_curator.py @@ -50,6 +50,10 @@ get_latest_schema_uri, ) from synapseclient.models import ColumnType +from synapseclient.models.curation import ( + FileBasedMetadataTaskProperties, + RecordBasedMetadataTaskProperties, +) from synapseclient.models.mixins import JSONSchemaBinding from synapseclient.models.mixins.json_schema import JSONSchemaVersionInfo @@ -428,6 +432,88 @@ def test_create_file_based_metadata_task_project_traversal( # Verify that syn.get was called twice (for parent folder and project) self.assertEqual(self.mock_syn.get.call_count, 2) + @patch( + "synapseclient.extensions.curator.file_based_metadata_task.Synapse.get_client" + ) + @patch( + "synapseclient.extensions.curator.file_based_metadata_task.create_json_schema_entity_view" + ) + @patch("synapseclient.extensions.curator.file_based_metadata_task.Folder") + @patch("synapseclient.extensions.curator.file_based_metadata_task.CurationTask") + def test_create_file_based_metadata_task_with_assignee( + self, + mock_curation_task_cls, + mock_folder_cls, + mock_create_entity_view, + mock_get_client, + ): + """Test successful creation of file-based metadata task with assignee_principal_id.""" + # Test both string and int inputs - int should be converted to string + test_cases = [ + ("1234", "1234"), + (1234, "1234"), + ] + + for input_assignee, expected_assignee in test_cases: + with self.subTest(input_assignee=input_assignee): + # Reset mocks for each subtest + mock_curation_task_cls.reset_mock() + mock_folder_cls.reset_mock() + mock_create_entity_view.reset_mock() + mock_get_client.reset_mock() + + # GIVEN a file-based metadata task with assignee_principal_id + mock_get_client.return_value = self.mock_syn + mock_create_entity_view.return_value = "test_entity_view_id" + + mock_folder = Mock() + mock_folder_cls.return_value = mock_folder + mock_folder.get.return_value = mock_folder + mock_folder.parent_id = "syn11111111" + + mock_project = Mock() + mock_project.concreteType = "org.sagebionetworks.repo.model.Project" + mock_project.id = "syn22222222" + self.mock_syn.get.return_value = mock_project + + mock_task = Mock() + mock_task.task_id = "task123" + mock_curation_task = Mock() + mock_curation_task.store.return_value = mock_task + mock_curation_task_cls.return_value = mock_curation_task + + # WHEN I create the file-based metadata task with assignee_principal_id + result = create_file_based_metadata_task( + folder_id=self.folder_id, + curation_task_name=self.curation_task_name, + instructions=self.instructions, + attach_wiki=False, + entity_view_name=self.entity_view_name, + schema_uri=self.schema_uri, + enable_derived_annotations=True, + assignee_principal_id=input_assignee, + synapse_client=self.mock_syn, + ) + + # THEN the CurationTask should be called with assignee_principal_id as string + mock_curation_task_cls.assert_called_once_with( + data_type=self.curation_task_name, + project_id="syn22222222", + instructions=self.instructions, + assignee_principal_id=expected_assignee, + task_properties=FileBasedMetadataTaskProperties( + upload_folder_id=self.folder_id, + file_view_id=mock_create_entity_view.return_value, + ), + ) + # AND the task should be created successfully + assert result == ("test_entity_view_id", "task123") + mock_create_entity_view.assert_called_once_with( + syn=self.mock_syn, + synapse_entity_id=self.folder_id, + entity_view_name=self.entity_view_name, + ) + class TestCreateRecordBasedMetadataTask(unittest.TestCase): """Test cases for create_record_based_metadata_task function.""" @@ -872,6 +958,108 @@ def test_create_record_based_metadata_task_grid_creation_error( synapse_client=self.mock_syn, ) + @patch( + "synapseclient.extensions.curator.record_based_metadata_task.Synapse.get_client" + ) + @patch( + "synapseclient.extensions.curator.record_based_metadata_task.extract_schema_properties_from_web" + ) + @patch( + "synapseclient.extensions.curator.record_based_metadata_task.tempfile.NamedTemporaryFile" + ) + @patch("synapseclient.extensions.curator.record_based_metadata_task.RecordSet") + @patch("synapseclient.extensions.curator.record_based_metadata_task.CurationTask") + @patch("synapseclient.extensions.curator.record_based_metadata_task.Grid") + @patch("builtins.open") + def test_create_record_based_metadata_task_with_assignee( + self, + mock_open, + mock_grid_cls, + mock_curation_task_cls, + mock_record_set_cls, + mock_temp_file, + mock_extract_schema, + mock_get_client, + ): + """Test successful creation of record-based metadata task with assignee_principal_id.""" + # Test both string and int inputs - int should be converted to string + test_cases = [ + ("1234", "1234"), + (1234, "1234"), + ] + + for input_assignee, expected_assignee in test_cases: + with self.subTest(input_assignee=input_assignee): + # Reset mocks for each subtest + mock_open.reset_mock() + mock_grid_cls.reset_mock() + mock_curation_task_cls.reset_mock() + mock_record_set_cls.reset_mock() + mock_temp_file.reset_mock() + mock_extract_schema.reset_mock() + mock_get_client.reset_mock() + + # GIVEN a record-based metadata task with assignee_principal_id + mock_get_client.return_value = self.mock_syn + + mock_df = pd.DataFrame(columns=["specimenID", "age", "diagnosis"]) + mock_extract_schema.return_value = mock_df + + mock_temp = Mock() + mock_temp.name = "/tmp/test.csv" + mock_temp_file.return_value = mock_temp + + mock_record_set = Mock() + mock_record_set.id = "syn87654321" + mock_record_set_instance = Mock() + mock_record_set_instance.store.return_value = mock_record_set + mock_record_set_cls.return_value = mock_record_set_instance + + mock_task = Mock() + mock_task.task_id = "task123" + mock_curation_task = Mock() + mock_curation_task.store.return_value = mock_task + mock_curation_task_cls.return_value = mock_curation_task + + mock_grid = Mock() + mock_grid_instance = Mock() + mock_grid_instance.export_to_record_set.return_value = mock_grid + mock_grid_cls.return_value = mock_grid_instance + + # WHEN I create the record-based metadata task with assignee_principal_id + result = create_record_based_metadata_task( + project_id=self.project_id, + folder_id=self.folder_id, + record_set_name=self.record_set_name, + record_set_description=self.record_set_description, + curation_task_name=self.curation_task_name, + upsert_keys=self.upsert_keys, + instructions=self.instructions, + schema_uri=self.schema_uri, + bind_schema_to_record_set=True, + assignee_principal_id=input_assignee, + synapse_client=self.mock_syn, + ) + + # THEN the task should be created successfully + assert isinstance(result, tuple) + assert len(result) == 3 + record_set, task, grid = result + assert record_set == mock_record_set + assert task == mock_task + assert grid == mock_grid + + # AND the CurationTask should be called with assignee_principal_id as string + mock_curation_task_cls.assert_called_once_with( + data_type=self.curation_task_name, + project_id=self.project_id, + instructions=self.instructions, + assignee_principal_id=expected_assignee, + task_properties=RecordBasedMetadataTaskProperties( + record_set_id=mock_record_set.id + ), + ) + class TestQuerySchemaRegistry(unittest.TestCase): """Test cases for query_schema_registry function."""