From 49c3ae6790d6c43a5a66bbfce9345d74fbae647b Mon Sep 17 00:00:00 2001 From: Sina Haselmann Date: Tue, 17 Feb 2026 10:40:32 +0100 Subject: [PATCH 1/2] Fix CSV metadata import failure when authority contains separator - Enhanced resolveValueAndAuthority() to handle authorities containing :: - Fixes NumberFormatException when parsing values like: value::will be referenced::ORCID::0000-0002-5474-1918::600 - Properly handles 2-part, 3-part, and 4+ part formats - Maintains backward compatibility with existing CSV imports --- .../dspace/app/bulkedit/MetadataImport.java | 70 ++++++++++++++----- 1 file changed, 53 insertions(+), 17 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/app/bulkedit/MetadataImport.java b/dspace-api/src/main/java/org/dspace/app/bulkedit/MetadataImport.java index e79e807608eb..22137de3838d 100644 --- a/dspace-api/src/main/java/org/dspace/app/bulkedit/MetadataImport.java +++ b/dspace-api/src/main/java/org/dspace/app/bulkedit/MetadataImport.java @@ -1197,30 +1197,66 @@ protected void simplyCopyValue(String value, BulkEditMetadataValue dcv) { private void resolveValueAndAuthority(String value, BulkEditMetadataValue dcv) { // Cells with valid authority are composed of three parts ~ , , - // The value itself may also include the authority separator though + // The authority itself may also include the authority separator though (e.g., "will be referenced::ORCID::0000-0002-5474-1918" or "Chemnitz University of Technology::will be referenced::ROR-ID::https://ror.org/00a208s56::600") String[] parts = value.split(csv.getEscapedAuthoritySeparator()); // If we don't have enough parts, assume the whole string is the value - if (parts.length < 3) { + if (parts.length < 2) { simplyCopyValue(value, dcv); return; } - try { - // The last part of the cell must be a confidence value (integer) - int confidence = Integer.parseInt(parts[parts.length - 1]); - String authority = parts[parts.length - 2]; - String plainValue = String.join( - csv.getAuthoritySeparator(), - ArrayUtils.subarray(parts, 0, parts.length - 2) - ); - - dcv.setValue(plainValue); - dcv.setAuthority(authority); - dcv.setConfidence(confidence); - } catch (NumberFormatException e) { - // Otherwise assume the whole string is the value - simplyCopyValue(value, dcv); + // Handle case where authority itself contains the separator + // The format should be: value::authority::confidence + // If we have more than 3 parts, the authority contains the separator + if (parts.length == 2) { + // Only value and authority, no confidence + // When an authority is explicitly provided, use CF_ACCEPTED (consistent with line 1183) + // The old buggy code would have called simplyCopyValue() which ignores the authority + dcv.setValue(parts[0]); + dcv.setAuthority(parts[1]); + dcv.setConfidence(Choices.CF_ACCEPTED); + } else if (parts.length == 3) { + // Standard format: value::authority::confidence + try { + dcv.setValue(parts[0]); + dcv.setAuthority(parts[1]); + dcv.setConfidence(Integer.parseInt(parts[2])); + } catch (NumberFormatException e) { + // Last part is not numeric, so it's part of the authority + simplyCopyValue(value, dcv); + } + } else { + // Authority contains the separator - combine middle parts as authority + // Last part should be confidence if it's numeric + String lastPart = parts[parts.length - 1]; + try { + int confidence = Integer.parseInt(lastPart); + // Last part is confidence, combine parts[1] to parts[length-2] as authority + StringBuilder authorityBuilder = new StringBuilder(); + for (int i = 1; i < parts.length - 1; i++) { + if (i > 1) { + authorityBuilder.append(csv.getAuthoritySeparator()); + } + authorityBuilder.append(parts[i]); + } + dcv.setValue(parts[0]); + dcv.setAuthority(authorityBuilder.toString()); + dcv.setConfidence(confidence); + } catch (NumberFormatException e) { + // Last part is not numeric, so it's part of the authority + // Combine all parts except the first as authority + StringBuilder authorityBuilder = new StringBuilder(); + for (int i = 1; i < parts.length; i++) { + if (i > 1) { + authorityBuilder.append(csv.getAuthoritySeparator()); + } + authorityBuilder.append(parts[i]); + } + dcv.setValue(parts[0]); + dcv.setAuthority(authorityBuilder.toString()); + dcv.setConfidence(Choices.CF_ACCEPTED); + } } } From 7326c802d9592ddd55f8c6249f6d01b1c1e1803c Mon Sep 17 00:00:00 2001 From: Sina Haselmann Date: Tue, 17 Feb 2026 12:29:49 +0100 Subject: [PATCH 2/2] add integration tests for authority separator parsing in CSV import --- .../dspace/app/bulkedit/MetadataImport.java | 3 +- .../dspace/app/bulkedit/MetadataImportIT.java | 103 ++++++++++++++++++ 2 files changed, 104 insertions(+), 2 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/app/bulkedit/MetadataImport.java b/dspace-api/src/main/java/org/dspace/app/bulkedit/MetadataImport.java index 22137de3838d..6c961b0c4f05 100644 --- a/dspace-api/src/main/java/org/dspace/app/bulkedit/MetadataImport.java +++ b/dspace-api/src/main/java/org/dspace/app/bulkedit/MetadataImport.java @@ -23,7 +23,6 @@ import jakarta.annotation.Nullable; import org.apache.commons.cli.ParseException; -import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.Strings; import org.apache.logging.log4j.Logger; @@ -1197,7 +1196,7 @@ protected void simplyCopyValue(String value, BulkEditMetadataValue dcv) { private void resolveValueAndAuthority(String value, BulkEditMetadataValue dcv) { // Cells with valid authority are composed of three parts ~ , , - // The authority itself may also include the authority separator though (e.g., "will be referenced::ORCID::0000-0002-5474-1918" or "Chemnitz University of Technology::will be referenced::ROR-ID::https://ror.org/00a208s56::600") + // The authority itself may also include the authority separator though (e.g., "will be referenced::ORCID::0000-0002-5474-1918" or "will be referenced::ROR-ID::https://ror.org/00a208s56::600") String[] parts = value.split(csv.getEscapedAuthoritySeparator()); // If we don't have enough parts, assume the whole string is the value diff --git a/dspace-api/src/test/java/org/dspace/app/bulkedit/MetadataImportIT.java b/dspace-api/src/test/java/org/dspace/app/bulkedit/MetadataImportIT.java index 8fc432815390..fed4153d2187 100644 --- a/dspace-api/src/test/java/org/dspace/app/bulkedit/MetadataImportIT.java +++ b/dspace-api/src/test/java/org/dspace/app/bulkedit/MetadataImportIT.java @@ -35,10 +35,13 @@ import org.dspace.content.Community; import org.dspace.content.EntityType; import org.dspace.content.Item; +import org.dspace.content.MetadataValue; import org.dspace.content.Relationship; import org.dspace.content.factory.ContentServiceFactory; import org.dspace.content.service.ItemService; import org.dspace.content.service.RelationshipService; +import org.dspace.content.authority.factory.ContentAuthorityServiceFactory; +import org.dspace.content.authority.service.MetadataAuthorityService; import org.dspace.eperson.EPerson; import org.dspace.eperson.factory.EPersonServiceFactory; import org.dspace.eperson.service.EPersonService; @@ -46,6 +49,8 @@ import org.dspace.scripts.configuration.ScriptConfiguration; import org.dspace.scripts.factory.ScriptServiceFactory; import org.dspace.scripts.service.ScriptService; +import org.dspace.services.ConfigurationService; +import org.dspace.services.factory.DSpaceServicesFactory; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -58,6 +63,10 @@ public class MetadataImportIT extends AbstractIntegrationTestWithDatabase { = EPersonServiceFactory.getInstance().getEPersonService(); private final RelationshipService relationshipService = ContentServiceFactory.getInstance().getRelationshipService(); + private final ConfigurationService configurationService + = DSpaceServicesFactory.getInstance().getConfigurationService(); + private final MetadataAuthorityService metadataAuthorityService + = ContentAuthorityServiceFactory.getInstance().getMetadataAuthorityService(); private Collection collection; private Collection publicationCollection; @@ -67,6 +76,18 @@ public class MetadataImportIT extends AbstractIntegrationTestWithDatabase { @Override public void setUp() throws Exception { super.setUp(); + // Configure authority-controlled fields for testing (since config files may be incomplete) + // Ensure dc.contributor.author is explicitly configured + configurationService.setProperty("authority.controlled.dc.contributor.author", "true"); + configurationService.setProperty("choices.plugin.dc.contributor.author", "AuthorAuthority"); + // Configure dc.contributor.other as authority-controlled for testing + configurationService.setProperty("authority.controlled.dc.contributor.other", "true"); + configurationService.setProperty("choices.plugin.dc.contributor.other", "AuthorAuthority"); + // Reload configuration service to ensure properties are picked up + configurationService.reloadConfig(); + // Clear cache to reload authority configuration + metadataAuthorityService.clearCache(); + context.turnOffAuthorisationSystem(); Community community = CommunityBuilder.createCommunity(context).build(); this.collection = CollectionBuilder.createCollection(context, community).build(); @@ -276,6 +297,88 @@ public void metadataImportRemovingValueTest() throws Exception { assertEquals(0, itemService.getMetadata(item, "dc", "contributor", "author", Item.ANY).size()); } + @Test + public void metadataImportWithAuthorityContainingSeparatorTest() throws Exception { + // Test case that reproduces the bug: authority values containing the separator (::) + // This test verifies that CSV import correctly handles authorities like: + // "will be referenced::ORCID::0000-0002-5474-1918" + String[] csv = { + "id,collection,dc.title,dc.contributor.author", + "+," + collection.getHandle() + ",\"Test Import Authority\"," + + "\"Fischer, Frank::will be referenced::ORCID::0000-0002-5474-1918::600\"" + }; + performImportScript(csv); + Item importedItem = findItemByName("Test Import Authority"); + List authorMetadata = + itemService.getMetadata(importedItem, "dc", "contributor", "author", Item.ANY); + + assertEquals("Should have exactly one author metadata value", 1, authorMetadata.size()); + + MetadataValue authorValue = authorMetadata.get(0); + assertEquals("Value should be correctly parsed", "Fischer, Frank", authorValue.getValue()); + assertEquals("Authority should contain full authority string with separators", + "will be referenced::ORCID::0000-0002-5474-1918", authorValue.getAuthority()); + assertEquals("Confidence should be correctly parsed", 600, authorValue.getConfidence()); + + context.turnOffAuthorisationSystem(); + itemService.delete(context, itemService.find(context, importedItem.getID())); + context.restoreAuthSystemState(); + } + + @Test + public void metadataImportWithRORAuthorityContainingSeparatorTest() throws Exception { + // Test case with ROR-ID authority containing separators + String[] csv = { + "id,collection,dc.title,dc.contributor.other", + "+," + collection.getHandle() + ",\"Test Import ROR\"," + + "\"Chemnitz University of Technology::will be referenced::ROR-ID::https://ror.org/00a208s56::600\"" + }; + performImportScript(csv); + Item importedItem = findItemByName("Test Import ROR"); + List otherMetadata = + itemService.getMetadata(importedItem, "dc", "contributor", "other", Item.ANY); + + assertEquals("Should have exactly one contributor.other metadata value", 1, otherMetadata.size()); + + MetadataValue otherValue = otherMetadata.get(0); + assertEquals("Value should be correctly parsed", "Chemnitz University of Technology", + otherValue.getValue()); + assertEquals("Authority should contain full authority string with separators", + "will be referenced::ROR-ID::https://ror.org/00a208s56", otherValue.getAuthority()); + assertEquals("Confidence should be correctly parsed", 600, otherValue.getConfidence()); + + context.turnOffAuthorisationSystem(); + itemService.delete(context, itemService.find(context, importedItem.getID())); + context.restoreAuthSystemState(); + } + + @Test + public void metadataImportWithTwoPartAuthorityTest() throws Exception { + // Test case with 2-part format (value::authority, no confidence) + String[] csv = { + "id,collection,dc.title,dc.contributor.author", + "+," + collection.getHandle() + ",\"Test Import 2Part\"," + + "\"Smith, John::will be referenced::ORCID::0000-0001-2345-6789\"" + }; + performImportScript(csv); + Item importedItem = findItemByName("Test Import 2Part"); + List authorMetadata = + itemService.getMetadata(importedItem, "dc", "contributor", "author", Item.ANY); + + assertEquals("Should have exactly one author metadata value", 1, authorMetadata.size()); + + MetadataValue authorValue = authorMetadata.get(0); + assertEquals("Value should be correctly parsed", "Smith, John", authorValue.getValue()); + assertEquals("Authority should contain full authority string", + "will be referenced::ORCID::0000-0001-2345-6789", authorValue.getAuthority()); + // When no confidence is provided, should default to CF_ACCEPTED (600) + assertEquals("Confidence should default to CF_ACCEPTED", 600, authorValue.getConfidence()); + + context.turnOffAuthorisationSystem(); + itemService.delete(context, itemService.find(context, importedItem.getID())); + context.restoreAuthSystemState(); + } + private Item findItemByName(String name) throws Exception { List items = IteratorUtils.toList(itemService.findByMetadataField(context, "dc", "title", null, name));