From 6516f60e2d78f66ce941f421029aaab2abc0fb29 Mon Sep 17 00:00:00 2001 From: holmeso Date: Fri, 27 Feb 2026 14:07:58 +1000 Subject: [PATCH 01/11] feat(nanno): if snpeff deems a variant to be in multiple genes report them all --- .../qannotate/nanno/AnnotationSource.java | 33 ++- .../nanno/AnnotationSourceSnpEffVCF.java | 107 +++------- .../qannotate/nanno/AnnotationSourceTSV.java | 27 ++- .../qannotate/nanno/AnnotationSourceVCF.java | 57 +++-- .../nanno/AnnotationSourceSnpEffVCFTest.java | 199 ++++++++++++++++++ .../nanno/AnnotationSourceTSVTest.java | 43 ++-- 6 files changed, 329 insertions(+), 137 deletions(-) diff --git a/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSource.java b/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSource.java index 79e5fd12d..312e2de96 100644 --- a/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSource.java +++ b/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSource.java @@ -78,25 +78,24 @@ public String getAnnotation(long requestedCpAsLong, ChrPosition requestedCp) { * lets see if there are any records that match on ref and alt */ return getAnnotationsFromCurrentRecords(requestedCp); + } - } else { - int matchWithNextCP = Long.compare(requestedCpAsLong, nextCPAsLong); - if (nextCPAsLong > -1 && matchWithNextCP < 0) { - /* - * requestedCp is "less than" next CP - * return empty list here - */ - } else { -// logger.debug(reader.getFile().getName() + ": getting next record. requestedCp: " + (null != requestedCp ? requestedCp.toIGVString() : null) + ", currentCP: " + (null != currentCP ? currentCP.toIGVString() : null)); - getNextRecord(requestedCpAsLong, matchWithNextCP); - if (requestedCpAsLong == currentCPAsLong) { - return getAnnotationsFromCurrentRecords(requestedCp); - } - /* - * requestedCP and currentCP are not equal - */ - } + int matchWithNextCP = Long.compare(requestedCpAsLong, nextCPAsLong); + if (nextCPAsLong > -1 && matchWithNextCP < 0) { + /* + * requestedCp is "less than" next CP + * return empty list here + */ + return annotationToReturn(null); + } + + getNextRecord(requestedCpAsLong, matchWithNextCP); + if (requestedCpAsLong == currentCPAsLong) { + return getAnnotationsFromCurrentRecords(requestedCp); } + /* + * requestedCP and currentCP are not equal + */ return annotationToReturn(null); } diff --git a/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceSnpEffVCF.java b/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceSnpEffVCF.java index 005a0875f..e4982f5df 100644 --- a/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceSnpEffVCF.java +++ b/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceSnpEffVCF.java @@ -62,8 +62,6 @@ public AnnotationSourceSnpEffVCF(RecordReader reader, int chrPositionInR @Override public String getAnnotation(long requestedCpAsLong, ChrPosition requestedCp) { -// logger.debug(reader.getFile().getName() + ": requestedCp is " + (null != requestedCp ? requestedCp.toIGVString() : null) + ", currentCP: " + (null != currentCP ? currentCP.toIGVString() : null) + ", nextCP: " + (null != nextCP ? nextCP.toIGVString() : null)); - /* * check to see if the records we currently have stored are a match */ @@ -73,70 +71,21 @@ public String getAnnotation(long requestedCpAsLong, ChrPosition requestedCp) { * we match on position * lets see if there are any records that match on ref and alt */ -// return getAnnotationsFromRecords(requestedCp); - if (requestedCp instanceof ChrPositionRefAlt reqCpRefAlt) { - String reqRef = reqCpRefAlt.getRef(); - String reqAlt = reqCpRefAlt.getAlt(); - for (String rec : currentRecords) { - String[] recArray = TabTokenizer.tokenize(rec, DEFAULT_DELIMITER); - String recRef = recArray[refPositionInFile]; - String recAlt = recArray[altPositionInFile]; - - if (recAlt.contains(",")) { - String[] recAltArray = recAlt.split(","); - for (String recAltValue : recAltArray) { - if (reqRef.equals(recRef) && reqAlt.equals(recAltValue)) { - return annotationToReturnWithAlt(rec, recAltValue); - } - } - } else { - if (reqRef.equals(recRef) && reqAlt.equals(recAlt)) { - return annotationToReturnWithAlt(rec, recAlt); - } - } - } - } - + return getAnnotationsFromRecords(requestedCp); } else { int matchWithNextCP = Long.compare(requestedCpAsLong, nextCPAsLong); if (nextCPAsLong > -1 && matchWithNextCP < 0) { } else { -// logger.debug(reader.getFile().getName() + ": getting next record. requestedCp: " + (null != requestedCp ? requestedCp.toIGVString() : null) + ", currentCP: " + (null != currentCP ? currentCP.toIGVString() : null)); getNextRecord(requestedCpAsLong, matchWithNextCP); if (requestedCpAsLong == currentCPAsLong) { /* * we match on position * lets see if there are any records that match on ref and alt */ - if (requestedCp instanceof ChrPositionRefAlt reqCpRefAlt) { - String reqRef = reqCpRefAlt.getRef(); - String reqAlt = reqCpRefAlt.getAlt(); - for (String rec : currentRecords) { - String[] recArray = TabTokenizer.tokenize(rec, DEFAULT_DELIMITER); - String recRef = recArray[refPositionInFile]; - String recAlt = recArray[altPositionInFile]; - - if (recAlt.contains(",")) { - String[] recAltArray = recAlt.split(","); - for (String recAltValue : recAltArray) { - if (reqRef.equals(recRef) && reqAlt.equals(recAltValue)) { - return annotationToReturnWithAlt(rec, recAltValue); - } - } - } else { - if (reqRef.equals(recRef) && reqAlt.equals(recAlt)) { - return annotationToReturnWithAlt(rec, recAlt); - } - } - } - } -// return getAnnotationsFromRecords(requestedCp); + return getAnnotationsFromRecords(requestedCp); } - /* - * requestedCP and currentCP are not equal - */ } } return annotationToReturn(null); @@ -151,7 +100,7 @@ private String getAnnotationsFromRecords(ChrPosition requestedCp){ String recRef = recArray[refPositionInFile]; String recAlt = recArray[altPositionInFile]; - if (recAlt.contains(",")) { + if (recAlt.indexOf(',') >= 0) { String[] recAltArray = recAlt.split(","); for (String recAltValue : recAltArray) { if (reqRef.equals(recRef) && reqAlt.equals(recAltValue)) { @@ -177,7 +126,6 @@ public String annotationToReturn(String[] record) { * dealing with a vcf file and assuming that the required annotation fields are in the INFO field * so get that and go from there. */ -// String[] recordArray = record.split("\t"); String info = record[7]; String alt = record[4]; @@ -221,32 +169,31 @@ public static String extractFieldsFromInfoField(String info, List fields if (StringUtils.isNullOrEmpty(worstConsequence)) { return emptyInfoFieldResult; } - /* - * we have our consequence - * split by pipe and then get our fields + * we have our consequences (comma-delimited) + * split by comma into consequences, then by pipe into fields */ - String[] consequenceArray = TabTokenizer.tokenize(worstConsequence, '|'); + String[] consequences = worstConsequence.split(","); for (String af : fields) { if (!StringUtils.isNullOrEmpty(af)) { - /* - * get position from map - */ String aflc = af.toLowerCase(); Integer arrayPosition = SNP_EFF_ANNOTATION_FIELDS_AND_POSITIONS.get(aflc); - if (null != arrayPosition && arrayPosition >= 0 && arrayPosition < consequenceArray.length) { - /* - * good - */ - String annotation = consequenceArray[arrayPosition]; - dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB + af + "=" + annotation : af + "=" + annotation); - } else { -// System.out.println("Could not find field [" + af + "] in SNP_EFF_ANNOTATION_FIELDS_AND_POSITIONS map!"); -// System.out.println("arrayPosition.intValue(): " + arrayPosition.intValue() + ", consequenceArray.length: " + consequenceArray.length); - } + if (null != arrayPosition) { + StringBuilder fieldValues = new StringBuilder(); + for (String consequence : consequences) { + String[] consequenceArray = TabTokenizer.tokenize(consequence, '|'); + if (arrayPosition >= 0 && arrayPosition < consequenceArray.length) { + String annotation = consequenceArray[arrayPosition]; + fieldValues.append(fieldValues.isEmpty() ? annotation : "|" + annotation); + } + } + dataToReturn.append((!dataToReturn.isEmpty()) + ? FIELD_DELIMITER_TAB + af + "=" + fieldValues + : af + "=" + fieldValues); + } } } return (dataToReturn.isEmpty()) ? emptyInfoFieldResult : dataToReturn.toString(); @@ -283,19 +230,27 @@ public static String getWorstConsequence(String info, String alt) { * Pick the first one as that is the one with the highest effect as decreed by snpEff */ int annoIndex = info.indexOf("ANN="); + if (annoIndex < 0) { + return ""; + } int end = info.indexOf(FIELD_DELIMITER_SEMI_COLON, annoIndex); String ann = info.substring(annoIndex + 4, end == -1 ? info.length() : end); String[] annArray = ann.split(","); - String worstConsequence = ""; + Map worstByGene = new java.util.LinkedHashMap<>(); for (String aa : annArray) { if (aa.startsWith(alt)) { - worstConsequence = aa; - break; + String[] parts = TabTokenizer.tokenize(aa, '|'); + if (parts.length > 3) { + String gene = parts[3]; + if (!StringUtils.isNullOrEmpty(gene) && !worstByGene.containsKey(gene)) { + worstByGene.put(gene, aa); + } + } } } - return worstConsequence; + return String.join(",", worstByGene.values()); } @Override diff --git a/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceTSV.java b/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceTSV.java index 3c050499c..41e20e907 100644 --- a/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceTSV.java +++ b/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceTSV.java @@ -19,6 +19,9 @@ public class AnnotationSourceTSV extends AnnotationSource { List headerLines; Map headerNameAndPosition; + private String[] fieldNames; + private int[] fieldPositions; + public AnnotationSourceTSV(RecordReader reader, int chrPositionInRecord, int positionPositionInRecord, int refPositionInFile, int altPositionInFile, String fieldNames, boolean chrStartsWithChr) { super(reader, chrPositionInRecord, positionPositionInRecord, refPositionInFile, altPositionInFile, chrStartsWithChr); @@ -46,6 +49,12 @@ public AnnotationSourceTSV(RecordReader reader, int chrPositionInRecord, if (headerNameAndPosition.isEmpty()) { throw new IllegalArgumentException("Could not find requested fields (" + fieldNames + ") in header: " + headerLine); } + // precompute arrays for fast extraction + this.fieldNames = headerNameAndPosition.keySet().toArray(new String[0]); + this.fieldPositions = new int[this.fieldNames.length]; + for (int i = 0; i < this.fieldNames.length; i++) { + this.fieldPositions[i] = headerNameAndPosition.get(this.fieldNames[i]); + } } /* @@ -96,20 +105,18 @@ public String annotationToReturn(String[] record) { /* * entries in the INFO field are delimited by ';' */ - return extractFieldsFromRecord(record, headerNameAndPosition); + return extractFieldsFromRecord(record, fieldNames, fieldPositions); } - public static String extractFieldsFromRecord(String[] record, Map fields) { + public static String extractFieldsFromRecord(String[] record, String[] fieldNames, int[] fieldPositions) { StringBuilder dataToReturn = new StringBuilder(); int recordLength = null != record ? record.length : 0; - if ( recordLength > 0 && null != fields) { -// String [] recordArray = TabTokenizer.tokenize(record); - for (Entry entry : fields.entrySet()) { - /* - * make sure that array length is not shorter than entry value - */ - if (recordLength > entry.getValue()) { - dataToReturn.append(( ! dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB : "").append(entry.getKey()).append("=").append(record[entry.getValue()]); + if (recordLength > 0 && null != fieldNames && null != fieldPositions) { + for (int i = 0; i < fieldNames.length; i++) { + int pos = fieldPositions[i]; + if (recordLength > pos) { + dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB : "") + .append(fieldNames[i]).append("=").append(record[pos]); } } } diff --git a/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceVCF.java b/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceVCF.java index dc15768ab..874c4a317 100644 --- a/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceVCF.java +++ b/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceVCF.java @@ -2,7 +2,9 @@ import java.io.IOException; import java.util.Arrays; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.stream.Collectors; import org.qcmg.common.string.StringUtils; @@ -11,7 +13,8 @@ public class AnnotationSourceVCF extends AnnotationSource { public static final String FIELD_DELIMITER_SEMI_COLON = ";"; - + private static final int INFO_LENGTH_PARSE_THRESHOLD = 2000; + private static final int FIELDS_PARSE_THRESHOLD = 3; List annotationFields; @@ -52,24 +55,52 @@ public String annotationToReturn(String [] record) { public static String extractFieldsFromInfoField(String info, List fields, String emptyInfoFieldResult) { - if (StringUtils.isNullOrEmptyOrMissingData(info)) { + if (StringUtils.isNullOrEmptyOrMissingData(info) || fields == null) { return emptyInfoFieldResult; } - StringBuilder dataToReturn = new StringBuilder(); - for (String af : fields) { - if ( ! StringUtils.isNullOrEmpty(af)) { - int start = info.indexOf(af + "="); - if (start > -1) { - int end = info.indexOf(FIELD_DELIMITER_SEMI_COLON, start); - if (end == -1) { - dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB + info.substring(start) : info.substring(start)); + boolean parseOnce = (fields.size() > FIELDS_PARSE_THRESHOLD) || info.length() > INFO_LENGTH_PARSE_THRESHOLD; + if ( ! parseOnce) { + StringBuilder dataToReturn = new StringBuilder(); + for (String af : fields) { + if (!StringUtils.isNullOrEmpty(af)) { + int start = info.indexOf(af + "="); + if (start > -1) { + int end = info.indexOf(FIELD_DELIMITER_SEMI_COLON, start); + if (end == -1) { + dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB + info.substring(start) : info.substring(start)); + } else { + dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB + info.substring(start, end) : info.substring(start, end)); + } } else { - dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB + info.substring(start, end) : info.substring(start, end)); + dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB + af + "=" : af + "="); } - } else { - dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB + af + "=" : af + "="); } } + return (dataToReturn.isEmpty()) ? emptyInfoFieldResult : dataToReturn.toString(); + } + Map infoMap = new HashMap<>(); + int start = 0; + while (start <= info.length()) { + int end = info.indexOf(FIELD_DELIMITER_SEMI_COLON, start); + if (end == -1) end = info.length(); + + String token = info.substring(start, end); + int eq = token.indexOf('='); + if (eq > -1) { + infoMap.put(token.substring(0, eq), token.substring(eq + 1)); + } else if (!token.isEmpty()) { + infoMap.put(token, ""); + } + + start = end + 1; + } + StringBuilder dataToReturn = new StringBuilder(); + for (String af : fields) { + if (!StringUtils.isNullOrEmpty(af)) { + String value = infoMap.get(af); + String entry = (value != null) ? af + "=" + value : af + "="; + dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB : "").append(entry); + } } return (dataToReturn.isEmpty()) ? emptyInfoFieldResult : dataToReturn.toString(); } diff --git a/qannotate/test/au/edu/qimr/qannotate/nanno/AnnotationSourceSnpEffVCFTest.java b/qannotate/test/au/edu/qimr/qannotate/nanno/AnnotationSourceSnpEffVCFTest.java index 0921607aa..9ed45e930 100644 --- a/qannotate/test/au/edu/qimr/qannotate/nanno/AnnotationSourceSnpEffVCFTest.java +++ b/qannotate/test/au/edu/qimr/qannotate/nanno/AnnotationSourceSnpEffVCFTest.java @@ -2,14 +2,25 @@ import static org.junit.Assert.assertEquals; +import java.io.File; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; import java.util.Arrays; import java.util.List; +import org.junit.Rule; import org.junit.Test; import au.edu.qimr.qannotate.nanno.AnnotationSourceSnpEffVCF; +import org.junit.rules.TemporaryFolder; +import org.qcmg.common.model.ChrPositionRefAlt; +import org.qcmg.common.util.ChrPositionUtils; +import org.qcmg.qio.record.StringFileReader; public class AnnotationSourceSnpEffVCFTest { + + @Rule + public final TemporaryFolder testFolder = new TemporaryFolder(); @Test public void extractFieldsFromInfoField() { @@ -47,4 +58,192 @@ public void extractFieldsFromInfoField2() { assertEquals("cdna_position=", AnnotationSourceSnpEffVCF.extractFieldsFromInfoField(info, List.of("cdna_position"), ".", alt)); } + @Test + public void getWorstConsequenceMultipleGenesCommaDelimited() { + String info = "AC=2;AF=1.00;AN=2;ANN=" + + "G|missense_variant|MODERATE|GENE1|ID1|transcript|TR1|protein_coding|1/1|c.1A>G|||||10|," + + "G|synonymous_variant|LOW|GENE1|ID1|transcript|TR2|protein_coding|1/1|c.1A>G|||||10|," + + "G|stop_gained|HIGH|GENE2|ID2|transcript|TR3|protein_coding|1/1|c.1A>G|||||10|," + + "T|synonymous_variant|LOW|GENE3|ID3|transcript|TR4|protein_coding|1/1|c.1A>T|||||10|"; + + String alt = "G"; + String expected = "G|missense_variant|MODERATE|GENE1|ID1|transcript|TR1|protein_coding|1/1|c.1A>G|||||10|," + + "G|stop_gained|HIGH|GENE2|ID2|transcript|TR3|protein_coding|1/1|c.1A>G|||||10|"; + + assertEquals(expected, AnnotationSourceSnpEffVCF.getWorstConsequence(info, alt)); + } + + @Test + public void getAnnotationMultiGene() throws Exception { + File vcf = testFolder.newFile("snpeff.vcf"); + + String line = "chr1\t100\t.\tA\tG,T\t.\t.\tANN=G|downstream_gene_variant|MODIFIER|NADK|ENSG00000008130.15|transcript|ENST00000341426.9|protein_coding||c.*4307G>C|||||2635|," + + "G|intergenic_region|MODIFIER|CDK11A-NADK|ENSG00000008128.23-ENSG00000008130.15|intergenic_region|ENSG00000008128.23-ENSG00000008130.15|||n.1748597C>G||||||"; + + Files.write(vcf.toPath(), List.of(line), StandardCharsets.UTF_8); + + try (StringFileReader reader = new StringFileReader(vcf)) { + AnnotationSourceSnpEffVCF source = new AnnotationSourceSnpEffVCF( + reader, + 1, // chrPositionInRecord (1-based) + 2, // positionPositionInRecord (1-based) + 4, // refPositionInFile (1-based) + 5, // altPositionInFile (1-based) + "effect,gene_name", + true + ); + + ChrPositionRefAlt cp = new ChrPositionRefAlt("chr1", 100, 100, "A", "G"); + long cpAsLong = ChrPositionUtils.convertContigAndPositionToLong("1", 100); + + String first = source.getAnnotation(cpAsLong, cp); + String second = source.getAnnotation(cpAsLong, cp); + + assertEquals("effect=downstream_gene_variant|intergenic_region\tgene_name=NADK|CDK11A-NADK", first); + assertEquals("effect=downstream_gene_variant|intergenic_region\tgene_name=NADK|CDK11A-NADK", second); + } + } + + @Test + public void getAnnotationUsesSameLogicForCurrentAndNext() throws Exception { + File vcf = testFolder.newFile("snpeff.vcf"); + + String line = "chr1\t100\t.\tA\tG,T\t.\t.\tANN=" + + "G|missense_variant|MODERATE|GENE|ID|transcript|TR|protein_coding|1/1|c.1A>G|||||10|," + + "T|synonymous_variant|LOW|GENE|ID|transcript|TR|protein_coding|1/1|c.1A>T|||||10|"; + + Files.write(vcf.toPath(), List.of(line), StandardCharsets.UTF_8); + + try (StringFileReader reader = new StringFileReader(vcf)) { + AnnotationSourceSnpEffVCF source = new AnnotationSourceSnpEffVCF( + reader, + 1, // chrPositionInRecord (1-based) + 2, // positionPositionInRecord (1-based) + 4, // refPositionInFile (1-based) + 5, // altPositionInFile (1-based) + "effect", + true + ); + + ChrPositionRefAlt cp = new ChrPositionRefAlt("chr1", 100, 100, "A", "T"); + long cpAsLong = ChrPositionUtils.convertContigAndPositionToLong("1", 100); + + String first = source.getAnnotation(cpAsLong, cp); + String second = source.getAnnotation(cpAsLong, cp); + + assertEquals("effect=synonymous_variant", first); + assertEquals("effect=synonymous_variant", second); + + cp = new ChrPositionRefAlt("chr1", 100, 100, "A", "G"); + + first = source.getAnnotation(cpAsLong, cp); + second = source.getAnnotation(cpAsLong, cp); + + assertEquals("effect=missense_variant", first); + assertEquals("effect=missense_variant", second); + } + } + + @Test + public void getAnnotationNoMatchReturnsEmpty() throws Exception { + File vcf = testFolder.newFile("snpeff-no-match.vcf"); + + String line = "chr1\t100\t.\tA\tG,T\t.\t.\tANN=" + + "G|missense_variant|MODERATE|GENE|ID|transcript|TR|protein_coding|1/1|c.1A>G|||||10|," + + "T|synonymous_variant|LOW|GENE|ID|transcript|TR|protein_coding|1/1|c.1A>T|||||10|"; + + Files.write(vcf.toPath(), List.of(line), StandardCharsets.UTF_8); + + try (StringFileReader reader = new StringFileReader(vcf)) { + AnnotationSourceSnpEffVCF source = new AnnotationSourceSnpEffVCF( + reader, + 1, // chrPositionInRecord (1-based) + 2, // positionPositionInRecord (1-based) + 4, // refPositionInFile (1-based) + 5, // altPositionInFile (1-based) + "effect", + true + ); + + // same position, but alt does not exist in the record + ChrPositionRefAlt cp = new ChrPositionRefAlt("chr1", 100, 100, "A", "C"); + long cpAsLong = ChrPositionUtils.convertContigAndPositionToLong("1", 100); + + assertEquals("effect=", source.getAnnotation(cpAsLong, cp)); + } + } + + @Test + public void getAnnotationNoPositionMatchReturnsEmpty() throws Exception { + File vcf = testFolder.newFile("snpeff-no-pos.vcf"); + + String line = "chr1\t100\t.\tA\tG\t.\t.\tANN=" + + "G|missense_variant|MODERATE|GENE|ID|transcript|TR|protein_coding|1/1|c.1A>G|||||10|"; + + Files.write(vcf.toPath(), List.of(line), StandardCharsets.UTF_8); + + try (StringFileReader reader = new StringFileReader(vcf)) { + AnnotationSourceSnpEffVCF source = new AnnotationSourceSnpEffVCF( + reader, + 1, // chrPositionInRecord (1-based) + 2, // positionPositionInRecord (1-based) + 4, // refPositionInFile (1-based) + 5, // altPositionInFile (1-based) + "effect", + true + ); + + // different position (no match) + ChrPositionRefAlt cp = new ChrPositionRefAlt("chr1", 101, 101, "A", "G"); + long cpAsLong = ChrPositionUtils.convertContigAndPositionToLong("1", 101); + + assertEquals("effect=", source.getAnnotation(cpAsLong, cp)); + } + } + + @Test + public void getWorstConsequenceSkipsDuplicateGeneUsesFirst() { + String info = "ANN=" + + "G|missense_variant|MODERATE|GENE1|ID1|transcript|TR1|protein_coding|1/1|c.1A>G|||||10|," + + "G|stop_gained|HIGH|GENE1|ID1|transcript|TR2|protein_coding|1/1|c.1A>G|||||10|"; + + String alt = "G"; + String expected = "G|missense_variant|MODERATE|GENE1|ID1|transcript|TR1|protein_coding|1/1|c.1A>G|||||10|"; + + assertEquals(expected, AnnotationSourceSnpEffVCF.getWorstConsequence(info, alt)); + } + + @Test + public void getWorstConsequenceSkipsEmptyGene() { + String info = "ANN=" + + "G|missense_variant|MODERATE||ID1|transcript|TR1|protein_coding|1/1|c.1A>G|||||10|," + + "G|stop_gained|HIGH|GENE2|ID2|transcript|TR2|protein_coding|1/1|c.1A>G|||||10|"; + + String alt = "G"; + String expected = "G|stop_gained|HIGH|GENE2|ID2|transcript|TR2|protein_coding|1/1|c.1A>G|||||10|"; + + assertEquals(expected, AnnotationSourceSnpEffVCF.getWorstConsequence(info, alt)); + } + + @Test + public void extractFieldsFromInfoFieldMultiGeneAltNotPresent() { + String info = "ANN=" + + "G|missense_variant|MODERATE|GENE1|ID1|transcript|TR1|protein_coding|1/1|c.1A>G|||||10|," + + "G|stop_gained|HIGH|GENE2|ID2|transcript|TR2|protein_coding|1/1|c.1A>G|||||10|"; + + assertEquals("effect=", AnnotationSourceSnpEffVCF.extractFieldsFromInfoField(info, List.of("effect"), "effect=", "T")); + } + + @Test + public void extractFieldsFromInfoFieldMultiGeneOrderIsStable() { + // SnpEff already orders consequences by severity; we preserve first-seen order. + String info = "ANN=" + + "G|missense_variant|MODERATE|GENE_B|ID1|transcript|TR1|protein_coding|1/1|c.1A>G|||||10|," + + "G|stop_gained|HIGH|GENE_A|ID2|transcript|TR2|protein_coding|1/1|c.1A>G|||||10|"; + + String result = AnnotationSourceSnpEffVCF.extractFieldsFromInfoField(info, List.of("effect", "gene_name"), "effect=\tgene_name=", "G"); + + assertEquals("effect=missense_variant|stop_gained\tgene_name=GENE_B|GENE_A", result); + } + } diff --git a/qannotate/test/au/edu/qimr/qannotate/nanno/AnnotationSourceTSVTest.java b/qannotate/test/au/edu/qimr/qannotate/nanno/AnnotationSourceTSVTest.java index c1cc590a1..0754e6865 100644 --- a/qannotate/test/au/edu/qimr/qannotate/nanno/AnnotationSourceTSVTest.java +++ b/qannotate/test/au/edu/qimr/qannotate/nanno/AnnotationSourceTSVTest.java @@ -14,27 +14,28 @@ public class AnnotationSourceTSVTest { @Test public void extractFieldsFromRecord() { - assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(null, null)); - assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{}, null)); - assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"blah"}, null)); - assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(null, new HashMap<>())); - assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{}, new HashMap<>())); - Map fields = new HashMap<>(); - fields.put("foo", 0); - assertEquals("foo=short_record", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"short_record"}, fields)); - assertEquals("foo=slightly_longer", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer","record"}, fields)); - fields.put("foo", 10); - assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer","record"}, fields)); - assertEquals("foo=", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer", "record", "", "", "", "", "", "", "", "", ""}, fields)); - assertEquals("foo=bar", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer", "record", "", "", "", "", "", "", "", "", "bar"}, fields)); - fields.put("foo2", 2); - assertEquals("foo=bar\tfoo2=", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer", "record", "", "", "", "", "", "", "", "", "bar"}, fields)); - fields.put("foo2", 1); - assertEquals("foo=bar\tfoo2=record", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer", "record", "", "", "", "", "", "", "", "", "bar"}, fields)); - fields.put("foo2", 11); - assertEquals("foo=bar", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer", "record", "", "", "", "", "", "", "", "", "bar"}, fields)); - fields.put("foo", 100); - assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer","record","","","","","","","","","bar"}, fields)); + assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(null, null, null)); + assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{}, null, null)); + assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"blah"}, null, null)); + assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(null, new String[]{}, new int[]{})); + assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{}, new String[]{}, new int[]{})); + String[] fieldNames = new String[]{"foo"}; + int[] fieldPositions = new int[]{0}; + assertEquals("foo=short_record", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"short_record"}, fieldNames, fieldPositions)); + assertEquals("foo=slightly_longer", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer","record"}, fieldNames, fieldPositions)); + fieldPositions = new int[]{10}; + assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer","record"}, fieldNames, fieldPositions)); + assertEquals("foo=", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer", "record", "", "", "", "", "", "", "", "", ""}, fieldNames, fieldPositions)); + assertEquals("foo=bar", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer", "record", "", "", "", "", "", "", "", "", "bar"}, fieldNames, fieldPositions)); + fieldNames = new String[]{"foo", "foo2"}; + fieldPositions = new int[]{10, 2}; + assertEquals("foo=bar\tfoo2=", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer", "record", "", "", "", "", "", "", "", "", "bar"}, fieldNames, fieldPositions)); + fieldPositions = new int[]{10, 1}; + assertEquals("foo=bar\tfoo2=record", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer", "record", "", "", "", "", "", "", "", "", "bar"}, fieldNames, fieldPositions)); + fieldPositions = new int[]{10, 11}; + assertEquals("foo=bar", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer", "record", "", "", "", "", "", "", "", "", "bar"}, fieldNames, fieldPositions)); + fieldPositions = new int[]{100, 11}; + assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer","record","","","","","","","","","bar"}, fieldNames, fieldPositions)); } From 460c9c4f7380be1779651363da110a80d8abaee4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 27 Feb 2026 06:57:02 +0000 Subject: [PATCH 02/11] Initial plan From 2d606cc5bb6c7db2c55de8b0ec147430fd470670 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 27 Feb 2026 06:59:23 +0000 Subject: [PATCH 03/11] Initial plan From 283d2012ebd0b7f582b7dd5b52da7869f7db7a41 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 27 Feb 2026 06:59:29 +0000 Subject: [PATCH 04/11] fix: guard against mismatched array lengths in extractFieldsFromRecord Co-authored-by: holmeso <7066552+holmeso@users.noreply.github.com> --- .../au/edu/qimr/qannotate/nanno/AnnotationSourceTSV.java | 2 +- .../edu/qimr/qannotate/nanno/AnnotationSourceTSVTest.java | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceTSV.java b/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceTSV.java index 41e20e907..8d91add62 100644 --- a/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceTSV.java +++ b/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceTSV.java @@ -112,7 +112,7 @@ public static String extractFieldsFromRecord(String[] record, String[] fieldName StringBuilder dataToReturn = new StringBuilder(); int recordLength = null != record ? record.length : 0; if (recordLength > 0 && null != fieldNames && null != fieldPositions) { - for (int i = 0; i < fieldNames.length; i++) { + for (int i = 0; i < Math.min(fieldNames.length, fieldPositions.length); i++) { int pos = fieldPositions[i]; if (recordLength > pos) { dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB : "") diff --git a/qannotate/test/au/edu/qimr/qannotate/nanno/AnnotationSourceTSVTest.java b/qannotate/test/au/edu/qimr/qannotate/nanno/AnnotationSourceTSVTest.java index 0754e6865..b45dc0562 100644 --- a/qannotate/test/au/edu/qimr/qannotate/nanno/AnnotationSourceTSVTest.java +++ b/qannotate/test/au/edu/qimr/qannotate/nanno/AnnotationSourceTSVTest.java @@ -37,6 +37,14 @@ public void extractFieldsFromRecord() { fieldPositions = new int[]{100, 11}; assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer","record","","","","","","","","","bar"}, fieldNames, fieldPositions)); + // mismatched array lengths - fieldNames longer than fieldPositions + fieldNames = new String[]{"foo", "foo2", "foo3"}; + fieldPositions = new int[]{10}; + assertEquals("foo=bar", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer", "record", "", "", "", "", "", "", "", "", "bar"}, fieldNames, fieldPositions)); + // mismatched array lengths - fieldPositions longer than fieldNames + fieldNames = new String[]{"foo"}; + fieldPositions = new int[]{10, 1, 0}; + assertEquals("foo=bar", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer", "record", "", "", "", "", "", "", "", "", "bar"}, fieldNames, fieldPositions)); } @Test From 0cd751ffed6252bbbde5a633fe6af647a92b5046 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 27 Feb 2026 07:02:17 +0000 Subject: [PATCH 05/11] Initial plan From d431245a4844a72be40f6317fb32473d8bcbbc03 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 27 Feb 2026 07:05:21 +0000 Subject: [PATCH 06/11] fix: preserve user-requested field order in AnnotationSourceTSV by iterating over fieldNames.split() instead of HashMap keyset Co-authored-by: holmeso <7066552+holmeso@users.noreply.github.com> --- .../edu/qimr/qannotate/nanno/AnnotationSourceTSV.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceTSV.java b/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceTSV.java index 41e20e907..b5cba50b7 100644 --- a/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceTSV.java +++ b/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceTSV.java @@ -49,11 +49,12 @@ public AnnotationSourceTSV(RecordReader reader, int chrPositionInRecord, if (headerNameAndPosition.isEmpty()) { throw new IllegalArgumentException("Could not find requested fields (" + fieldNames + ") in header: " + headerLine); } - // precompute arrays for fast extraction - this.fieldNames = headerNameAndPosition.keySet().toArray(new String[0]); - this.fieldPositions = new int[this.fieldNames.length]; - for (int i = 0; i < this.fieldNames.length; i++) { - this.fieldPositions[i] = headerNameAndPosition.get(this.fieldNames[i]); + // precompute arrays for fast extraction, preserving user-requested field order + String[] requestedFields = fieldNames.split(","); + this.fieldNames = requestedFields; + this.fieldPositions = new int[requestedFields.length]; + for (int i = 0; i < requestedFields.length; i++) { + this.fieldPositions[i] = headerNameAndPosition.get(requestedFields[i]); } } From 79cc951fc802e011358ab10a191fede58da6091b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 27 Feb 2026 07:05:49 +0000 Subject: [PATCH 07/11] fix: use LinkedHashMap in getHeaderNameAndPositions to preserve user-requested field order Co-authored-by: holmeso <7066552+holmeso@users.noreply.github.com> --- .../src/au/edu/qimr/qannotate/nanno/AnnotationSourceTSV.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceTSV.java b/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceTSV.java index 41e20e907..5c303594a 100644 --- a/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceTSV.java +++ b/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceTSV.java @@ -3,7 +3,7 @@ import java.io.IOException; import java.util.Arrays; import java.util.Collections; -import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -81,7 +81,7 @@ public static String getLastHeaderLine(List headerLines) { * return an empty map if any of the fields are not in the header */ public static Map getHeaderNameAndPositions(String fieldNames, String header) { - Map namePositions = new HashMap<>(); + Map namePositions = new LinkedHashMap<>(); System.out.println("header: " + header); From d646599e18c7ffb023e70ee43f915a0c3018bcf5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 27 Feb 2026 08:55:56 +0000 Subject: [PATCH 08/11] fix: remove --enable-preview, fix Executor classpath quoting, update test expectations - Revert --enable-preview from build.gradle (List.getFirst/getLast are standard Java 21, not preview) - Revert --enable-preview from AnnotateTest.java Executor call - Update jsonInputsTSV expected value to match user-requested field order (aaref,HGVSc_VEP,HGVSp_VEP) with LinkedHashMap - Fix Executor.java: use getJavaPath() instead of literal "java", remove shell-style quotes from classpath (ProcessBuilder does not use a shell), switch array/jvmArgs constructors to ProcessBuilder Co-authored-by: holmeso <7066552+holmeso@users.noreply.github.com> --- .../qimr/qannotate/nanno/AnnotateTest.java | 2 +- .../org/qcmg/common/commandline/Executor.java | 54 +++++++++++++++++-- 2 files changed, 50 insertions(+), 6 deletions(-) diff --git a/qannotate/test/au/edu/qimr/qannotate/nanno/AnnotateTest.java b/qannotate/test/au/edu/qimr/qannotate/nanno/AnnotateTest.java index 96b4c666c..a032392cb 100644 --- a/qannotate/test/au/edu/qimr/qannotate/nanno/AnnotateTest.java +++ b/qannotate/test/au/edu/qimr/qannotate/nanno/AnnotateTest.java @@ -115,7 +115,7 @@ public void jsonInputsTSV() throws IOException { AnnotateUtils.populateAnnotationSources(ais, sources); assertEquals(1, sources.size()); String annotation = sources.getFirst().getAnnotation(ChrPositionUtils.convertContigAndPositionToLong("1", 655652), new ChrPositionRefAlt("chr1", 655652, 655652, "A", "T")); - assertEquals("HGVSc_VEP=c.1A>C\tHGVSp_VEP=p.Met1?\taaref=M", annotation); + assertEquals("aaref=M\tHGVSc_VEP=c.1A>C\tHGVSp_VEP=p.Met1?", annotation); } @Test diff --git a/qcommon/src/org/qcmg/common/commandline/Executor.java b/qcommon/src/org/qcmg/common/commandline/Executor.java index 246754425..e44b4b758 100644 --- a/qcommon/src/org/qcmg/common/commandline/Executor.java +++ b/qcommon/src/org/qcmg/common/commandline/Executor.java @@ -8,6 +8,7 @@ package org.qcmg.common.commandline; import java.io.IOException; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -17,18 +18,61 @@ public class Executor { private final StreamConsumer outputStreamConsumer; private final StreamConsumer errorStreamConsumer; - private final int errCode; + private final int errCode; + + private static String getJavaPath() { + return Paths.get(System.getProperty("java.home"), "bin", "java").toString(); + } public Executor(String arguments, String qualifiedMainClassName) throws IOException, InterruptedException { - this("java", "-classpath", "\"" + System.getProperty("java.class.path") + "\"", qualifiedMainClassName, arguments); + this(getJavaPath(), "-classpath", System.getProperty("java.class.path"), qualifiedMainClassName, arguments); } - public Executor(String [] arguments, String qualifiedMainClassName) throws IOException, InterruptedException { - this("java -classpath " + System.getProperty("java.class.path") + " " + qualifiedMainClassName + " " + Arrays.toString(arguments) ); + public Executor(String [] arguments, String qualifiedMainClassName) throws IOException, InterruptedException { + String javaPath = getJavaPath(); + String classPath = System.getProperty("java.class.path"); + List commands = new ArrayList<>(); + commands.add(javaPath); + commands.add("-classpath"); + commands.add(classPath); + commands.add(qualifiedMainClassName); + commands.addAll(Arrays.asList(arguments)); + + ProcessBuilder processBuilder = new ProcessBuilder(commands); + Process process = processBuilder.start(); + outputStreamConsumer = new StreamConsumer(process.getInputStream()); + errorStreamConsumer = new StreamConsumer(process.getErrorStream()); + outputStreamConsumer.run(); + errorStreamConsumer.run(); + errCode = process.waitFor(); } public Executor(String jvmArgs, String arguments, String qualifiedMainClassName) throws IOException, InterruptedException { - this("java -classpath " + System.getProperty("java.class.path") + " " + jvmArgs + " " + qualifiedMainClassName + " " + arguments); + String javaPath = getJavaPath(); + String classPath = System.getProperty("java.class.path"); + List commands = new ArrayList<>(); + commands.add(javaPath); + commands.add("-classpath"); + commands.add(classPath); + for (String arg : jvmArgs.split(" ")) { + if (!arg.isEmpty()) { + commands.add(arg); + } + } + commands.add(qualifiedMainClassName); + for (String arg : arguments.split(" ")) { + if (!arg.isEmpty()) { + commands.add(arg); + } + } + + ProcessBuilder processBuilder = new ProcessBuilder(commands); + Process process = processBuilder.start(); + outputStreamConsumer = new StreamConsumer(process.getInputStream()); + errorStreamConsumer = new StreamConsumer(process.getErrorStream()); + outputStreamConsumer.run(); + errorStreamConsumer.run(); + errCode = process.waitFor(); } // constructor for running a command line From f3e112a7c0f17a8abd8b302935ccd02d0701c8e6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 3 Mar 2026 01:25:59 +0000 Subject: [PATCH 09/11] revert: remove Executor.java changes from this PR Co-authored-by: holmeso <7066552+holmeso@users.noreply.github.com> --- .../org/qcmg/common/commandline/Executor.java | 54 ++----------------- 1 file changed, 5 insertions(+), 49 deletions(-) diff --git a/qcommon/src/org/qcmg/common/commandline/Executor.java b/qcommon/src/org/qcmg/common/commandline/Executor.java index e44b4b758..246754425 100644 --- a/qcommon/src/org/qcmg/common/commandline/Executor.java +++ b/qcommon/src/org/qcmg/common/commandline/Executor.java @@ -8,7 +8,6 @@ package org.qcmg.common.commandline; import java.io.IOException; -import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -18,61 +17,18 @@ public class Executor { private final StreamConsumer outputStreamConsumer; private final StreamConsumer errorStreamConsumer; - private final int errCode; - - private static String getJavaPath() { - return Paths.get(System.getProperty("java.home"), "bin", "java").toString(); - } + private final int errCode; public Executor(String arguments, String qualifiedMainClassName) throws IOException, InterruptedException { - this(getJavaPath(), "-classpath", System.getProperty("java.class.path"), qualifiedMainClassName, arguments); + this("java", "-classpath", "\"" + System.getProperty("java.class.path") + "\"", qualifiedMainClassName, arguments); } - public Executor(String [] arguments, String qualifiedMainClassName) throws IOException, InterruptedException { - String javaPath = getJavaPath(); - String classPath = System.getProperty("java.class.path"); - List commands = new ArrayList<>(); - commands.add(javaPath); - commands.add("-classpath"); - commands.add(classPath); - commands.add(qualifiedMainClassName); - commands.addAll(Arrays.asList(arguments)); - - ProcessBuilder processBuilder = new ProcessBuilder(commands); - Process process = processBuilder.start(); - outputStreamConsumer = new StreamConsumer(process.getInputStream()); - errorStreamConsumer = new StreamConsumer(process.getErrorStream()); - outputStreamConsumer.run(); - errorStreamConsumer.run(); - errCode = process.waitFor(); + public Executor(String [] arguments, String qualifiedMainClassName) throws IOException, InterruptedException { + this("java -classpath " + System.getProperty("java.class.path") + " " + qualifiedMainClassName + " " + Arrays.toString(arguments) ); } public Executor(String jvmArgs, String arguments, String qualifiedMainClassName) throws IOException, InterruptedException { - String javaPath = getJavaPath(); - String classPath = System.getProperty("java.class.path"); - List commands = new ArrayList<>(); - commands.add(javaPath); - commands.add("-classpath"); - commands.add(classPath); - for (String arg : jvmArgs.split(" ")) { - if (!arg.isEmpty()) { - commands.add(arg); - } - } - commands.add(qualifiedMainClassName); - for (String arg : arguments.split(" ")) { - if (!arg.isEmpty()) { - commands.add(arg); - } - } - - ProcessBuilder processBuilder = new ProcessBuilder(commands); - Process process = processBuilder.start(); - outputStreamConsumer = new StreamConsumer(process.getInputStream()); - errorStreamConsumer = new StreamConsumer(process.getErrorStream()); - outputStreamConsumer.run(); - errorStreamConsumer.run(); - errCode = process.waitFor(); + this("java -classpath " + System.getProperty("java.class.path") + " " + jvmArgs + " " + qualifiedMainClassName + " " + arguments); } // constructor for running a command line From 623d91107a7edcac83ed981349e762f28e7e18af Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 3 Mar 2026 03:12:41 +0000 Subject: [PATCH 10/11] test: add test verifying user-requested field order is preserved in annotation output Co-authored-by: holmeso <7066552+holmeso@users.noreply.github.com> --- .../nanno/AnnotationSourceTSVTest.java | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/qannotate/test/au/edu/qimr/qannotate/nanno/AnnotationSourceTSVTest.java b/qannotate/test/au/edu/qimr/qannotate/nanno/AnnotationSourceTSVTest.java index 0754e6865..5e6b0f98b 100644 --- a/qannotate/test/au/edu/qimr/qannotate/nanno/AnnotationSourceTSVTest.java +++ b/qannotate/test/au/edu/qimr/qannotate/nanno/AnnotationSourceTSVTest.java @@ -39,6 +39,32 @@ public void extractFieldsFromRecord() { } + @Test + public void fieldOrderPreservedFromUserRequest() { + /* + * header has fields in order: alpha, beta, gamma + * user requests them in reverse: gamma,beta,alpha + * output should match user-requested order, not header order + */ + String header = "alpha\tbeta\tgamma"; + String[] record = new String[]{"val_alpha", "val_beta", "val_gamma"}; + + Map fieldsAndPositions = AnnotationSourceTSV.getHeaderNameAndPositions("gamma,beta,alpha", header); + assertEquals(2, fieldsAndPositions.get("gamma").intValue()); + assertEquals(1, fieldsAndPositions.get("beta").intValue()); + assertEquals(0, fieldsAndPositions.get("alpha").intValue()); + + // build arrays the same way the constructor does (using split order, not keyset order) + String[] requestedFields = "gamma,beta,alpha".split(","); + int[] positions = new int[requestedFields.length]; + for (int i = 0; i < requestedFields.length; i++) { + positions[i] = fieldsAndPositions.get(requestedFields[i]); + } + // output must be in user-requested order: gamma, beta, alpha + assertEquals("gamma=val_gamma\tbeta=val_beta\talpha=val_alpha", + AnnotationSourceTSV.extractFieldsFromRecord(record, requestedFields, positions)); + } + @Test public void getHeaderNameAndPositions() { List headerLines = Arrays.asList(new String[] {"","#chr pos(1-based) ref alt aaref aaalt rs_dbSNP151 hg19_chr hg19_pos(1-based) hg18_chr hg18_pos(1-based) aapos genename Ensembl_geneid Ensembl_transcriptid Ensembl_proteinid Uniprot_acc Uniprot_entry HGVSc_ANNOVAR HGVSp_ANNOVAR HGVSc_snpEff HGVSp_snpEff HGVSc_VEP HGVSp_VEP APPRIS GENCODE_basic TSL VEP_canonicalcds_strand refcodon codonpos codon_degeneracy Ancestral_allele AltaiNeandertal Denisova VindijiaNeandertal SIFT_score SIFT_converted_rankscore SIFT_pred SIFT4G_score SIFT4G_converted_rankscore SIFT4G_pred Polyphen2_HDIV_score Polyphen2_HDIV_rankscore Polyphen2_HDIV_pred Polyphen2_HVAR_score Polyphen2_HVAR_rankscorePolyphen2_HVAR_pred LRT_score LRT_converted_rankscore LRT_pred LRT_Omega MutationTaster_score MutationTaster_converted_rankscore MutationTaster_pred MutationTaster_modelMutationTaster_AAE MutationAssessor_score MutationAssessor_rankscore MutationAssessor_pred FATHMM_score FATHMM_converted_rankscore FATHMM_pred PROVEAN_score PROVEAN_converted_rankscore PROVEAN_pred VEST4_score VEST4_rankscore MetaSVM_score MetaSVM_rankscore MetaSVM_pred MetaLR_score MetaLR_rankscore MetaLR_pred Reliability_index M-CAP_scoreM-CAP_rankscore M-CAP_pred REVEL_score REVEL_rankscore MutPred_score MutPred_rankscore MutPred_protID MutPred_AAchange MutPred_Top5features MVP_score MVP_rankscore MPC_score MPC_rankscore PrimateAI_score PrimateAI_rankscore PrimateAI_pred DEOGEN2_score DEOGEN2_rankscore DEOGEN2_pred BayesDel_addAF_score BayesDel_addAF_rankscore BayesDel_addAF_pred BayesDel_noAF_score BayesDel_noAF_rankscore BayesDel_noAF_pred ClinPred_score ClinPred_rankscore ClinPred_pred LIST-S2_score LIST-S2_rankscore LIST-S2_pred Aloft_Fraction_transcripts_affected Aloft_prob_Tolerant Aloft_prob_Recessive Aloft_prob_Dominant Aloft_pred Aloft_Confidence CADD_raw CADD_raw_rankscore CADD_phred CADD_raw_hg19CADD_raw_rankscore_hg19 CADD_phred_hg19 DANN_score DANN_rankscore fathmm-MKL_coding_score fathmm-MKL_coding_rankscore fathmm-MKL_coding_pred fathmm-MKL_coding_group fathmm-XF_coding_score fathmm-XF_coding_rankscore fathmm-XF_coding_pred Eigen-raw_coding Eigen-raw_coding_rankscore Eigen-phred_coding Eigen-PC-raw_coding Eigen-PC-raw_coding_rankscore Eigen-PC-phred_codingGenoCanyon_score GenoCanyon_rankscore integrated_fitCons_score integrated_fitCons_rankscore integrated_confidence_value GM12878_fitCons_score GM12878_fitCons_rankscore GM12878_confidence_value H1-hESC_fitCons_score H1-hESC_fitCons_rankscore H1-hESC_confidence_value HUVEC_fitCons_score HUVEC_fitCons_rankscore HUVEC_confidence_value LINSIGHT LINSIGHT_rankscore GERP++_NR GERP++_RS GERP++_RS_rankscore phyloP100way_vertebrate phyloP100way_vertebrate_rankscore phyloP30way_mammalian phyloP30way_mammalian_rankscore phyloP17way_primate phyloP17way_primate_rankscore phastCons100way_vertebrate phastCons100way_vertebrate_rankscore phastCons30way_mammalian phastCons30way_mammalian_rankscore phastCons17way_primatephastCons17way_primate_rankscore SiPhy_29way_pi SiPhy_29way_logOdds SiPhy_29way_logOdds_rankscore bStatistic bStatistic_converted_rankscore 1000Gp3_AC 1000Gp3_AF 1000Gp3_AFR_AC1000Gp3_AFR_AF 1000Gp3_EUR_AC 1000Gp3_EUR_AF 1000Gp3_AMR_AC 1000Gp3_AMR_AF 1000Gp3_EAS_AC 1000Gp3_EAS_AF 1000Gp3_SAS_AC 1000Gp3_SAS_AF TWINSUK_AC TWINSUK_AF ALSPAC_AC ALSPAC_AFUK10K_AC UK10K_AF ESP6500_AA_AC ESP6500_AA_AF ESP6500_EA_AC ESP6500_EA_AF ExAC_AC ExAC_AF ExAC_Adj_AC ExAC_Adj_AF ExAC_AFR_AC ExAC_AFR_AF ExAC_AMR_AC ExAC_AMR_AFExAC_EAS_AC ExAC_EAS_AF ExAC_FIN_AC ExAC_FIN_AF ExAC_NFE_AC ExAC_NFE_AF ExAC_SAS_AC ExAC_SAS_AF ExAC_nonTCGA_AC ExAC_nonTCGA_AF ExAC_nonTCGA_Adj_AC ExAC_nonTCGA_Adj_AFExAC_nonTCGA_AFR_AC ExAC_nonTCGA_AFR_AF ExAC_nonTCGA_AMR_AC ExAC_nonTCGA_AMR_AF ExAC_nonTCGA_EAS_AC ExAC_nonTCGA_EAS_AF ExAC_nonTCGA_FIN_AC ExAC_nonTCGA_FIN_AF ExAC_nonTCGA_NFE_AC ExAC_nonTCGA_NFE_AF ExAC_nonTCGA_SAS_AC ExAC_nonTCGA_SAS_AF ExAC_nonpsych_AC ExAC_nonpsych_AF ExAC_nonpsych_Adj_AC ExAC_nonpsych_Adj_AF ExAC_nonpsych_AFR_AC ExAC_nonpsych_AFR_AF ExAC_nonpsych_AMR_AC ExAC_nonpsych_AMR_AF ExAC_nonpsych_EAS_AC ExAC_nonpsych_EAS_AF ExAC_nonpsych_FIN_AC ExAC_nonpsych_FIN_AF ExAC_nonpsych_NFE_AC ExAC_nonpsych_NFE_AFExAC_nonpsych_SAS_AC ExAC_nonpsych_SAS_AF gnomAD_exomes_flag gnomAD_exomes_AC gnomAD_exomes_AN gnomAD_exomes_AF gnomAD_exomes_nhomalt gnomAD_exomes_AFR_AC gnomAD_exomes_AFR_AN gnomAD_exomes_AFR_AF gnomAD_exomes_AFR_nhomalt gnomAD_exomes_AMR_AC gnomAD_exomes_AMR_AN gnomAD_exomes_AMR_AF gnomAD_exomes_AMR_nhomalt gnomAD_exomes_ASJ_AC gnomAD_exomes_ASJ_AN gnomAD_exomes_ASJ_AF gnomAD_exomes_ASJ_nhomalt gnomAD_exomes_EAS_AC gnomAD_exomes_EAS_AN gnomAD_exomes_EAS_AF gnomAD_exomes_EAS_nhomalt gnomAD_exomes_FIN_AC gnomAD_exomes_FIN_AN gnomAD_exomes_FIN_AF gnomAD_exomes_FIN_nhomalt gnomAD_exomes_NFE_AC gnomAD_exomes_NFE_AN gnomAD_exomes_NFE_AF gnomAD_exomes_NFE_nhomalt gnomAD_exomes_SAS_AC gnomAD_exomes_SAS_AN gnomAD_exomes_SAS_AF gnomAD_exomes_SAS_nhomalt gnomAD_exomes_POPMAX_AC gnomAD_exomes_POPMAX_AN gnomAD_exomes_POPMAX_AF gnomAD_exomes_POPMAX_nhomalt gnomAD_exomes_controls_AC gnomAD_exomes_controls_AN gnomAD_exomes_controls_AF gnomAD_exomes_controls_nhomalt gnomAD_exomes_controls_AFR_AC gnomAD_exomes_controls_AFR_AN gnomAD_exomes_controls_AFR_AF gnomAD_exomes_controls_AFR_nhomalt gnomAD_exomes_controls_AMR_AC gnomAD_exomes_controls_AMR_AN gnomAD_exomes_controls_AMR_AF gnomAD_exomes_controls_AMR_nhomalt gnomAD_exomes_controls_ASJ_AC gnomAD_exomes_controls_ASJ_AN gnomAD_exomes_controls_ASJ_AF gnomAD_exomes_controls_ASJ_nhomalt gnomAD_exomes_controls_EAS_AC gnomAD_exomes_controls_EAS_AN gnomAD_exomes_controls_EAS_AF gnomAD_exomes_controls_EAS_nhomalt gnomAD_exomes_controls_FIN_AC gnomAD_exomes_controls_FIN_AN gnomAD_exomes_controls_FIN_AF gnomAD_exomes_controls_FIN_nhomalt gnomAD_exomes_controls_NFE_AC gnomAD_exomes_controls_NFE_ANgnomAD_exomes_controls_NFE_AF gnomAD_exomes_controls_NFE_nhomalt gnomAD_exomes_controls_SAS_AC gnomAD_exomes_controls_SAS_AN gnomAD_exomes_controls_SAS_AF gnomAD_exomes_controls_SAS_nhomaltgnomAD_exomes_controls_POPMAX_AC gnomAD_exomes_controls_POPMAX_AN gnomAD_exomes_controls_POPMAX_AF gnomAD_exomes_controls_POPMAX_nhomalt gnomAD_genomes_flag gnomAD_genomes_ACgnomAD_genomes_AN gnomAD_genomes_AF gnomAD_genomes_nhomalt gnomAD_genomes_AFR_AC gnomAD_genomes_AFR_AN gnomAD_genomes_AFR_AF gnomAD_genomes_AFR_nhomalt gnomAD_genomes_AMR_AC gnomAD_genomes_AMR_AN gnomAD_genomes_AMR_AF gnomAD_genomes_AMR_nhomalt gnomAD_genomes_ASJ_AC gnomAD_genomes_ASJ_AN gnomAD_genomes_ASJ_AF gnomAD_genomes_ASJ_nhomalt gnomAD_genomes_EAS_AC gnomAD_genomes_EAS_AN gnomAD_genomes_EAS_AF gnomAD_genomes_EAS_nhomalt gnomAD_genomes_FIN_AC gnomAD_genomes_FIN_AN gnomAD_genomes_FIN_AF gnomAD_genomes_FIN_nhomalt gnomAD_genomes_NFE_AC gnomAD_genomes_NFE_AN gnomAD_genomes_NFE_AF gnomAD_genomes_NFE_nhomalt gnomAD_genomes_AMI_AC gnomAD_genomes_AMI_AN gnomAD_genomes_AMI_AF gnomAD_genomes_AMI_nhomalt gnomAD_genomes_SAS_AC gnomAD_genomes_SAS_AN gnomAD_genomes_SAS_AF gnomAD_genomes_SAS_nhomalt gnomAD_genomes_POPMAX_AC gnomAD_genomes_POPMAX_AN gnomAD_genomes_POPMAX_AF gnomAD_genomes_POPMAX_nhomalt clinvar_id clinvar_clnsig clinvar_trait clinvar_review clinvar_hgvs clinvar_var_source clinvar_MedGen_id clinvar_OMIM_id clinvar_Orphanet_id Interpro_domain GTEx_V8_gene GTEx_V8_tissueGeuvadis_eQTL_target_gene"}); From fa8743ed85d06f89c62b69f684ed3a84c89b8fcb Mon Sep 17 00:00:00 2001 From: holmeso Date: Tue, 3 Mar 2026 13:44:39 +1000 Subject: [PATCH 11/11] Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../qimr/qannotate/nanno/AnnotationSourceSnpEffVCF.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceSnpEffVCF.java b/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceSnpEffVCF.java index e4982f5df..532d50118 100644 --- a/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceSnpEffVCF.java +++ b/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceSnpEffVCF.java @@ -240,7 +240,13 @@ public static String getWorstConsequence(String info, String alt) { String[] annArray = ann.split(","); Map worstByGene = new java.util.LinkedHashMap<>(); for (String aa : annArray) { - if (aa.startsWith(alt)) { + int pipeIndex = aa.indexOf('|'); + if (pipeIndex <= 0) { + // Malformed ANN entry or missing allele token; skip + continue; + } + String alleleToken = aa.substring(0, pipeIndex); + if (alleleToken.equals(alt)) { String[] parts = TabTokenizer.tokenize(aa, '|'); if (parts.length > 3) { String gene = parts[3];