diff --git a/docs/tutorials/command_line_client.md b/docs/tutorials/command_line_client.md index 2b7763e7a..b4a4cf109 100644 --- a/docs/tutorials/command_line_client.md +++ b/docs/tutorials/command_line_client.md @@ -552,9 +552,10 @@ Generate JSON Schema(s) from a data model synapse generate-json-schema [-h] [--data-types data_type1, data_type2] [--output dir_name] [--data-model-labels class_label] data_model_path ``` -| Name | Type | Description | -|--------------------------|------------|---------------------------------------------------------------------| -| `data_model_path` | Positional | Data model path or URL | -| `--data-types` | Named | Optional list of data types to create JSON Schema for | -| `--output` | Named | Optional. Either a file path ending in '.json', or a directory path | -| `--data-model-labels` | Named | Either 'class_label', or 'display_label' | +| Name | Type | Description | +|-----------------------------------|------------|-----------------------------------------------------------------------------------| +| `data_model_path` | Positional | Data model path or URL | +| `--data-types` | Named | Optional list of data types to create JSON Schema for | +| `--output` | Named | Optional. Either a file path ending in '.json', or a directory path | +| `--use-property-display-names` | Named | Optional. Defaults to False. Formats the property name strings in the JSON Schema | +| `--use-valid-value-display-names` | Named | Optional. Defaults to False. Formats the valid value strings in the JSON Schema | diff --git a/docs/tutorials/python/schema_operations.md b/docs/tutorials/python/schema_operations.md index a8376e316..71bb6f1c1 100644 --- a/docs/tutorials/python/schema_operations.md +++ b/docs/tutorials/python/schema_operations.md @@ -89,6 +89,16 @@ Create a JSON Schema If you don't set `output` parameter the JSON Schema file will be created in the current working directory. +## 8. Create a JSON Schema using display names + +Create a JSON Schema + +```python +{!docs/tutorials/python/tutorial_scripts/schema_operations.py!lines=56-63} +``` + +You can have Curator format the property names and/or valid values in the JSON Schema. This will remove whitespace and special characters. + ## Source Code for this Tutorial
diff --git a/docs/tutorials/python/tutorial_scripts/schema_operations.py b/docs/tutorials/python/tutorial_scripts/schema_operations.py index 1f244c53a..85f74c85d 100644 --- a/docs/tutorials/python/tutorial_scripts/schema_operations.py +++ b/docs/tutorials/python/tutorial_scripts/schema_operations.py @@ -52,3 +52,12 @@ data_types=DATA_TYPE, synapse_client=syn, ) + +# Create JSON Schema in using display names for both properties names and valid values +schemas, file_paths = generate_jsonschema( + data_model_source=DATA_MODEL_SOURCE, + data_types=DATA_TYPE, + use_property_display_names=True, + use_valid_value_display_names=True, + synapse_client=syn, +) diff --git a/synapseclient/__main__.py b/synapseclient/__main__.py index 50b12b82f..71b68fdb0 100644 --- a/synapseclient/__main__.py +++ b/synapseclient/__main__.py @@ -808,7 +808,8 @@ def generate_json_schema(args, syn): data_model_source=args.data_model_path, output=args.output, data_types=args.data_types, - data_model_labels=args.data_model_labels, + use_property_display_names=args.use_property_display_names, + use_valid_value_display_names=args.use_valid_value_display_names, synapse_client=syn, ) logging.info(f"Created JSON Schema files: [{paths}]") @@ -1833,15 +1834,16 @@ def build_parser(): ), ) parser_generate_json_schema.add_argument( - "--data-model-labels", - type=str, - default="class_label", - choices=["class_label", "display_label"], - help=( - "Optional Label format for properties in the generated schema. " - "'class_label' uses standard attribute names (default). " - "'display_label' uses display names when valid" - ), + "--use-property-display-names", + action="store_true", + default=False, + help="Use display names for properties in the generated JSON Schema", + ) + parser_generate_json_schema.add_argument( + "--use-valid-value-display-names", + action="store_true", + default=False, + help="Use display names for valid values in the generated JSON Schema", ) parser_generate_json_schema.set_defaults(func=generate_json_schema) diff --git a/synapseclient/extensions/curator/schema_generation.py b/synapseclient/extensions/curator/schema_generation.py index 89574cdce..0d0c2823e 100644 --- a/synapseclient/extensions/curator/schema_generation.py +++ b/synapseclient/extensions/curator/schema_generation.py @@ -5137,7 +5137,7 @@ def update_property(self, property_dict: dict[str, Property]) -> None: def _set_conditional_dependencies( json_schema: JSONSchema, graph_state: GraphTraversalState, - use_property_display_names: bool = True, + use_property_display_names: bool = False, ) -> None: """ This sets conditional requirements in the "allOf" keyword. @@ -5204,7 +5204,7 @@ def _set_conditional_dependencies( def _create_enum_array_property( - node: TraversalNode, use_valid_value_display_names: bool = True + node: TraversalNode, use_valid_value_display_names: bool = False ) -> Property: """ Creates a JSON Schema property array with enum items @@ -5270,7 +5270,7 @@ def _create_array_property(node: TraversalNode) -> Property: def _create_enum_property( - node: TraversalNode, use_valid_value_display_names: bool = True + node: TraversalNode, use_valid_value_display_names: bool = False ) -> Property: """ Creates a JSON Schema property enum @@ -5346,8 +5346,8 @@ def _set_type_specific_keywords(schema: dict[str, Any], node: TraversalNode) -> def _set_property( json_schema: JSONSchema, node: TraversalNode, - use_property_display_names: bool = True, - use_valid_value_display_names: bool = True, + use_property_display_names: bool = False, + use_valid_value_display_names: bool = False, ) -> None: """ Sets a property in the JSON schema. that is required by the schema @@ -5393,8 +5393,8 @@ def _process_node( json_schema: JSONSchema, graph_state: GraphTraversalState, logger: Logger, - use_property_display_names: bool = True, - use_valid_value_display_names: bool = True, + use_property_display_names: bool = False, + use_valid_value_display_names: bool = False, ) -> None: """ Processes a node in the data model graph. @@ -5473,8 +5473,8 @@ def create_json_schema( # pylint: disable=too-many-arguments write_schema: bool = True, schema_path: Optional[str] = None, jsonld_path: Optional[str] = None, - use_property_display_names: bool = True, - use_valid_value_display_names: bool = True, + use_property_display_names: bool = False, + use_valid_value_display_names: bool = False, ) -> dict[str, Any]: """ Creates a JSONSchema dict for the datatype in the data model. @@ -5594,7 +5594,8 @@ def generate_jsonschema( synapse_client: Synapse, data_types: Optional[list[str]] = None, output: Optional[str] = None, - data_model_labels: DisplayLabelType = "class_label", + use_property_display_names: bool = False, + use_valid_value_display_names: bool = False, ) -> tuple[list[dict[str, Any]], list[str]]: """ Generate JSON Schema files from a data model. @@ -5612,9 +5613,11 @@ def generate_jsonschema( - If None, schemas will be written to the current working directory, with filenames formatted as `.json`. - If a directory path, schemas will be written to that directory, with filenames formatted as `/.json`. - If a file path (must end with `.json`) and a single data type is specified, the schema for that data type will be written to that file. - data_model_labels: Label format for properties in the generated schema: - - `"class_label"` (default): Uses standard attribute names as property keys - - `"display_label"`: Uses display names if valid (no blacklisted characters),. + use_property_display_names: If True, the properties in the JSONSchema + will be written using node display names + use_valid_value_display_names: If True, the valid_values in the JSONSchema + will be written using node display names + Returns: A tuple containing: @@ -5670,10 +5673,22 @@ def generate_jsonschema( data_model_source="https://raw.githubusercontent.com/org/repo/main/model.csv", output_directory="./schemas", data_type=None, - data_model_labels="class_label", synapse_client=syn ) ``` + + Generate JSON Schema using labels instead of display names: + + ```python + schemas, file_paths = generate_jsonschema( + data_model_source="https://raw.githubusercontent.com/org/repo/main/model.csv", + output_directory="./schemas", + data_type=None, + synapse_client=syn, + use_property_display_names=False, + use_valid_value_display_names=False, + ) + ``` """ check_curator_imports() data_model_parser = DataModelParser( @@ -5728,7 +5743,8 @@ def generate_jsonschema( logger=synapse_client.logger, write_schema=True, schema_path=schema_path, - use_property_display_names=(data_model_labels == "display_label"), + use_property_display_names=use_property_display_names, + use_valid_value_display_names=use_valid_value_display_names, ) for data_type, schema_path in zip(data_types, schema_paths) ] diff --git a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.JSONSchemaComponent.schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.JSONSchemaComponent.schema.json index 397311ad9..10a04f7df 100644 --- a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.JSONSchemaComponent.schema.json +++ b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.JSONSchemaComponent.schema.json @@ -19,20 +19,20 @@ "Enum": { "description": "TBD", "enum": [ - "ab", - "cd", - "ef", - "gh" + "Ab", + "Cd", + "Ef", + "Gh" ], "title": "Enum" }, "EnumNotRequired": { "description": "TBD", "enum": [ - "ab", - "cd", - "ef", - "gh" + "Ab", + "Cd", + "Ef", + "Gh" ], "title": "Enum Not Required" }, @@ -67,10 +67,10 @@ "description": "TBD", "items": { "enum": [ - "ab", - "cd", - "ef", - "gh" + "Ab", + "Cd", + "Ef", + "Gh" ], "type": "string" }, @@ -81,10 +81,10 @@ "description": "TBD", "items": { "enum": [ - "ab", - "cd", - "ef", - "gh" + "Ab", + "Cd", + "Ef", + "Gh" ], "type": "string" }, diff --git a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockComponent.schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockComponent.schema.json index 5236a424c..81b002d08 100644 --- a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockComponent.schema.json +++ b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockComponent.schema.json @@ -37,10 +37,10 @@ "description": "TBD", "items": { "enum": [ - "ab", - "cd", - "ef", - "gh" + "Ab", + "Cd", + "Ef", + "Gh" ], "type": "string" }, @@ -51,10 +51,10 @@ "description": "TBD", "items": { "enum": [ - "ab", - "cd", - "ef", - "gh" + "Ab", + "Cd", + "Ef", + "Gh" ], "type": "string" }, @@ -70,10 +70,10 @@ "description": "TBD", "items": { "enum": [ - "ab", - "cd", - "ef", - "gh" + "Ab", + "Cd", + "Ef", + "Gh" ], "type": "string" }, diff --git a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py index 048d5e791..ed1342564 100644 --- a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py +++ b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py @@ -953,6 +953,8 @@ def test_create_json_schema_with_display_names( datatype=datatype, schema_name=f"{datatype}_validation", schema_path=test_path, + use_property_display_names=True, + use_valid_value_display_names=True, logger=logger, ) with open(expected_path, encoding="utf-8") as file1, open( @@ -997,7 +999,6 @@ def test_create_json_schema_with_class_label_using_jsonld( datatype=datatype, schema_name=f"{datatype}_validation", schema_path=test_path, - use_property_display_names=False, logger=logger, ) with open(expected_path, encoding="utf-8") as file1, open( @@ -1262,7 +1263,7 @@ def test_set_conditional_dependencies( "description": "TBD", "title": "List Enum", "type": "array", - "items": {"enum": ["ab", "cd", "ef", "gh"], "type": "string"}, + "items": {"enum": ["Ab", "Cd", "Ef", "Gh"], "type": "string"}, } }, required=["ListEnum"], @@ -1277,7 +1278,7 @@ def test_set_conditional_dependencies( "description": "TBD", "title": "List Enum Not Required", "type": "array", - "items": {"enum": ["ab", "cd", "ef", "gh"], "type": "string"}, + "items": {"enum": ["Ab", "Cd", "Ef", "Gh"], "type": "string"}, } }, required=[], @@ -1291,7 +1292,7 @@ def test_set_conditional_dependencies( "Enum": { "description": "TBD", "title": "Enum", - "enum": ["ab", "cd", "ef", "gh"], + "enum": ["Ab", "Cd", "Ef", "Gh"], } }, required=["Enum"], @@ -1338,7 +1339,7 @@ def test_set_property( ) -> None: """Tests for set_property""" schema = JSONSchema() - _set_property(schema, test_nodes[node_name], use_property_display_names=False) + _set_property(schema, test_nodes[node_name]) assert schema == expected_schema @@ -1350,9 +1351,9 @@ def test_set_property( { "type": "array", "title": "array", - "items": {"enum": ["ab", "cd", "ef", "gh"], "type": "string"}, + "items": {"enum": ["Ab", "Cd", "Ef", "Gh"], "type": "string"}, }, - [[], ["ab"]], + [[], ["Ab"]], [[None], ["x"]], ), ( @@ -1360,9 +1361,9 @@ def test_set_property( { "type": "array", "title": "array", - "items": {"enum": ["ab", "cd", "ef", "gh"], "type": "string"}, + "items": {"enum": ["Ab", "Cd", "Ef", "Gh"], "type": "string"}, }, - [[], ["ab"]], + [[], ["Ab"]], [[None], ["x"]], ), ], @@ -1444,18 +1445,18 @@ def test_create_array_property( # If is_required is True, no type is added ( "Enum", - {"enum": ["ab", "cd", "ef", "gh"], "title": "enum"}, - ["ab"], + {"enum": ["Ab", "Cd", "Ef", "Gh"], "title": "enum"}, + ["Ab"], [1, "x", None], ), # SYNPY 1699: If is_required is False, null type is no longer added ( "EnumNotRequired", { - "enum": ["ab", "cd", "ef", "gh"], + "enum": ["Ab", "Cd", "Ef", "Gh"], "title": "enum", }, - ["ab"], + ["Ab"], [1, "x"], ), ], diff --git a/tests/unit/synapseclient/extensions/unit_test_curator.py b/tests/unit/synapseclient/extensions/unit_test_curator.py index c149ffea5..1ca5f2e44 100644 --- a/tests/unit/synapseclient/extensions/unit_test_curator.py +++ b/tests/unit/synapseclient/extensions/unit_test_curator.py @@ -1963,7 +1963,6 @@ def test_generate_jsonschema_from_csv(self): data_model_source=self.test_schema_path, output=temp_dir, data_types=None, - data_model_labels="class_label", synapse_client=self.syn, ) @@ -1995,7 +1994,6 @@ def test_generate_jsonschema_from_minimal_csv(self): data_model_source=self.minimal_test_schema_path, output=temp_dir, data_types=None, - data_model_labels="class_label", synapse_client=self.syn, ) @@ -2035,7 +2033,6 @@ def test_generate_jsonschema_from_jsonld(self): data_model_source=jsonld_path, output=temp_dir, data_types=None, - data_model_labels="class_label", synapse_client=self.syn, ) @@ -2061,7 +2058,6 @@ def test_generate_jsonschema_specific_components(self): data_model_source=self.test_schema_path, output=temp_dir, data_types=target_components, - data_model_labels="class_label", synapse_client=self.syn, ) @@ -2086,7 +2082,8 @@ def test_generate_jsonschema_with_display_label(self): data_model_source=self.test_schema_path, output=temp_dir, data_types=["Patient"], - data_model_labels="display_label", + use_property_display_names=True, + use_valid_value_display_names=True, synapse_client=self.syn, ) @@ -2110,7 +2107,6 @@ def test_generate_jsonschema_validates_required_fields(self): data_model_source=self.test_schema_path, output=temp_dir, data_types=["Patient"], - data_model_labels="class_label", synapse_client=self.syn, ) @@ -2137,7 +2133,6 @@ def test_generate_jsonschema_includes_enums(self): data_model_source=self.test_schema_path, output=temp_dir, data_types=["Patient"], - data_model_labels="class_label", synapse_client=self.syn, ) @@ -2175,7 +2170,6 @@ def test_generate_jsonschema_includes_validation_rules(self): data_model_source=self.test_schema_path, output=temp_dir, data_types=["MockComponent"], - data_model_labels="class_label", synapse_client=self.syn, ) @@ -2224,7 +2218,6 @@ def test_generate_jsonschema_includes_conditional_dependencies(self): data_model_source=self.test_schema_path, output=temp_dir, data_types=["Patient"], - data_model_labels="class_label", synapse_client=self.syn, ) @@ -2271,7 +2264,6 @@ def test_generate_jsonschema_handles_array_types(self): data_model_source=self.test_schema_path, output=temp_dir, data_types=["MockComponent"], - data_model_labels="class_label", synapse_client=self.syn, ) @@ -2306,7 +2298,6 @@ def test_generate_jsonschema_file_content_matches_schema_dict(self): data_model_source=self.test_schema_path, output=temp_dir, data_types=["Patient"], - data_model_labels="class_label", synapse_client=self.syn, ) @@ -2328,7 +2319,6 @@ def test_generate_jsonschema_creates_valid_json_schema_structure(self): data_model_source=self.test_schema_path, output=temp_dir, data_types=None, - data_model_labels="class_label", synapse_client=self.syn, ) diff --git a/tests/unit/synapseclient/unit_test_commandline.py b/tests/unit/synapseclient/unit_test_commandline.py index 442e5a76a..24242861e 100644 --- a/tests/unit/synapseclient/unit_test_commandline.py +++ b/tests/unit/synapseclient/unit_test_commandline.py @@ -976,3 +976,26 @@ def test_jsonld_path(self): finally: if os.path.isfile(schema_path): os.remove(schema_path) + + def test_display_names(self): + # GIVEN a CSV schema file + parser = cmdline.build_parser() + args = parser.parse_args( + [ + "generate-json-schema", + self.csv_path, + "--data-types", + "Patient", + "--use-property-display-names", + "--use-valid-value-display-names", + ] + ) + schema_path = "./Patient.json" + try: + # WHEN I generate a schema with one datatype and no output path + cmdline.generate_json_schema(args, self.syn) + # THEN a schema file should be created at ./Patient.json + assert os.path.isfile(schema_path) + finally: + if os.path.isfile(schema_path): + os.remove(schema_path)