Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions Model/lib/wdk/model/questions/geneQuestions.xml
Original file line number Diff line number Diff line change
Expand Up @@ -931,6 +931,54 @@ In the analysis carried out by Alsford et al., pseudogenes, genes annotated as "
</question>


<!--++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-->
<!-- Genes By Secondary Metabolites (antiSMASH) -->
<!--++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-->

<question name="GenesBySecondaryMetabolites"
includeProjects="FungiDB,UniDB"
displayName="Genes By Secondary Metabolites"
shortDisplayName="Sec Metabolites"
searchCategory="Function prediction"
queryRef="GeneId.GenesBySecondaryMetabolites"
recordClassRef="TranscriptRecordClasses.TranscriptRecordClass">

<paramRef ref="organismParams.antismash_organism"/>
<paramRef ref="geneParams.antismash_category"/>
<paramRef ref="geneParams.antismash_annotation"/>

<attributesList
summary="category,antismash_annotation,cluster_location,overlapping_clusters"
sorting="gene_source_id asc"/>

<dynamicAttributes>
<columnAttribute name="category" displayName="Cluster Category"/>
<columnAttribute name="antismash_annotation" displayName="Annotation"/>
<!--<columnAttribute name="sequence_id" displayName="Sequence"/>-->
<columnAttribute name="cluster_start" displayName="Cluster Start"/>
<columnAttribute name="cluster_end" displayName="Cluster End"/>
<columnAttribute name="org_abbrev" displayName="Organism Abbreviation" internal="true" inReportMaker="false"/>
<columnAttribute name="cluster_context_start" displayName="Cluster Context Start" inReportMaker="false"/>
<columnAttribute name="cluster_context_end" displayName="Cluster Context End" inReportMaker="false"/>
<columnAttribute name="overlapping_clusters" displayName="Overlapping Clusters"/>
<textAttribute name="cluster_location" displayName="View Cluster in JBrowse" inReportMaker="false" truncateTo="100000">
<text>
<![CDATA[
<a href="@JBROWSE_WEBPAGE_URL@?loc=$$sequence_id$$:$$cluster_context_start$$..$$cluster_context_end$$&data=@JBROWSE_SERVICE_URL@/tracks/$$org_abbrev$$&tracks=gene%2Cantibiotics%20and%20Secondary%20Metabolites%20Analysis%20SHell%20(antiSMASH)&highlight=$$sequence_id$$:$$gene_start_min$$..$$gene_end_max$$">$$sequence_id$$:$$cluster_start$$-$$cluster_end$$</a>
]]>
</text>
</textAttribute>
</dynamicAttributes>

<summary><![CDATA[Find genes in secondary metabolite biosynthesis clusters.]]></summary>

<description><![CDATA[
Find genes associated with secondary metabolite biosynthesis clusters predicted by antiSMASH,
filtered by organism, cluster category, and annotation.
]]></description>

</question>


<!--++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-->
<!-- functional profiling growth rate Phenotype -->
Expand Down
79 changes: 79 additions & 0 deletions Model/lib/wdk/model/questions/params/geneParams.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5104,6 +5104,29 @@ products of your selected type (or types).<br><br>
<suggest default="1"/>
</stringParam>

<!-- antiSMASH secondary metabolite params (FungiDB, UniDB only) -->
<flatVocabParam name="antismash_category"
queryRef="GeneVQ.AntismashCategories"
prompt="Cluster Category"
multiPick="true"
quote="true"
dependedParamRef="organismParams.antismash_organism"
includeProjects="FungiDB,UniDB">
<help>Select one or more secondary metabolite cluster categories.</help>
<suggest default="NRPS"/>
</flatVocabParam>

<flatVocabParam name="antismash_annotation"
queryRef="GeneVQ.AntismashAnnotations"
prompt="Annotation"
multiPick="true"
quote="true"
dependedParamRef="geneParams.antismash_category, organismParams.antismash_organism"
includeProjects="FungiDB,UniDB">
<help>Select an annotation. Options filter based on the selected category.</help>
<suggest default="biosynthetic"/>
</flatVocabParam>

<stringParam name="genbank_accession"
prompt="GenBank Accession Number"
number="false">
Expand Down Expand Up @@ -9711,6 +9734,62 @@ end as term
</sql>
</sqlQuery>

<sqlQuery name="AntismashCategories" includeProjects="FungiDB,UniDB">
<paramRef ref="organismParams.antismash_organism"/>
<column name="display"/>
<column name="internal"/>
<column name="term"/>
<sql>
<![CDATA[
SELECT DISTINCT ac.category AS display
, ac.category AS internal
, ac.category AS term
, CASE WHEN lower(ac.category) = 'other' THEN 1 ELSE 0 END AS sort_order
FROM apidb.antismashfeature af
JOIN apidb.antismashclusterfeature acf
ON acf.antismash_feature_id = af.antismash_feature_id
JOIN apidb.antismashcluster ac
ON ac.antismash_cluster_id = acf.antismash_cluster_id
JOIN apidbtuning.transcriptattributes ta
ON ta.gene_na_feature_id = af.na_feature_id
WHERE ta.org_abbrev in ($$antismash_organism$$)
AND ta.project_id = 'FungiDB'
ORDER BY sort_order, display
]]>
</sql>
</sqlQuery>

<sqlQuery name="AntismashAnnotations" includeProjects="FungiDB,UniDB">
<paramRef ref="organismParams.antismash_organism"/>
<paramRef ref="geneParams.antismash_category"/>
<column name="display"/>
<column name="internal"/>
<column name="term"/>
<sql>
<![CDATA[
SELECT DISTINCT COALESCE (af.antismash_annotation, 'unknown') AS display
, COALESCE (af.antismash_annotation, 'unknown') AS internal
, CASE WHEN COALESCE(af.antismash_annotation, 'unknown') = 'biosynthetic' THEN 0
WHEN COALESCE(af.antismash_annotation, 'unknown') = 'biosynthetic-additional' THEN 1
WHEN COALESCE(af.antismash_annotation, 'unknown') = 'other' THEN 3
WHEN COALESCE(af.antismash_annotation, 'unknown') = 'unknown' THEN 4
ELSE 2 END AS sort_order
, COALESCE (af.antismash_annotation, 'unknown') AS term
FROM apidb.antismashfeature af
JOIN apidb.antismashclusterfeature acf
ON acf.antismash_feature_id = af.antismash_feature_id
JOIN apidb.antismashcluster ac
ON ac.antismash_cluster_id = acf.antismash_cluster_id
JOIN apidbtuning.transcriptattributes ta
ON ta.gene_na_feature_id = af.na_feature_id
WHERE ta.org_abbrev in ($$antismash_organism$$)
AND ta.project_id = 'FungiDB'
AND ac.category in ($$antismash_category$$)
ORDER BY sort_order, display
]]>
</sql>
</sqlQuery>

</querySet>

<groupSet name="geneParamGroupSet">
Expand Down
40 changes: 40 additions & 0 deletions Model/lib/wdk/model/questions/params/organismParams.xml
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,21 @@
</enumList>
</enumParam>

<flatVocabParam name="antismash_organism"
queryRef="organismVQ.AntismashOrganisms"
prompt="Organism"
displayType="treeBox"
multiPick="true"
suppressNode="true"
quote="true"
includeProjects="FungiDB,UniDB">
<help>Select the organism(s) to search.</help>
<propertyList name="organismProperties">
<value>pruneNodesWithSingleExtendingChild</value>
<value>showOnlyPreferredOrganisms</value>
</propertyList>
</flatVocabParam>

</paramSet>


Expand Down Expand Up @@ -1170,6 +1185,31 @@
</sql>
</sqlQuery>

<sqlQuery name="AntismashOrganisms" includeProjects="FungiDB,UniDB">
<column name="parentTerm"/>
<column name="internal"/>
<column name="term"/>
<sql>
<![CDATA[
WITH filter_query AS (
SELECT DISTINCT ga.organism, ga.org_abbrev
FROM apidbtuning.geneattributes ga
, apidb.antismashfeature af
WHERE ga.na_feature_id = af.na_feature_id
AND (ga.project_id = '@PROJECT_ID@' OR 'UniDB' = '@PROJECT_ID@')
)
SELECT DISTINCT term
, parentTerm
, string_agg(org_abbrev, ', ') AS internal
FROM apidbtuning.organismtree ot
, filter_query fq
WHERE ot.organism = fq.organism
GROUP BY term, parentTerm
ORDER BY parentTerm, term
]]>
</sql>
</sqlQuery>

</querySet>

</wdkModel>
79 changes: 79 additions & 0 deletions Model/lib/wdk/model/questions/queries/geneQueries.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5769,6 +5769,85 @@ select distinct ta.gene_source_id
</sql>
</sqlQuery>

<sqlQuery name="GenesBySecondaryMetabolites" includeProjects="FungiDB,UniDB">
<paramRef ref="organismParams.antismash_organism"/>
<paramRef ref="geneParams.antismash_category"/>
<paramRef ref="geneParams.antismash_annotation"/>
<column name="source_id"/>
<column name="gene_source_id"/>
<column name="project_id"/>
<column name="wdk_weight"/>
<column name="matched_result"/>
<column name="category"/>
<column name="antismash_annotation"/>
<column name="cluster_start"/>
<column name="cluster_end"/>
<column name="sequence_id"/>
<column name="org_abbrev"/>
<column name="cluster_context_start"/>
<column name="cluster_context_end"/>
<column name="overlapping_clusters"/>
<sql>
<![CDATA[
WITH gene_clusters AS (
SELECT DISTINCT
ta.source_id, ta.gene_source_id, ta.project_id, ta.sequence_id,
ta.org_abbrev,
ac.antismash_cluster_id, ac.category, af.antismash_annotation,
ac.cluster_start, ac.cluster_end,
(ac.cluster_end - ac.cluster_start) AS cluster_length
FROM apidb.antismashcluster ac
JOIN apidb.antismashclusterfeature acf ON acf.antismash_cluster_id = ac.antismash_cluster_id
JOIN apidb.antismashfeature af ON af.antismash_feature_id = acf.antismash_feature_id
JOIN apidbtuning.transcriptattributes ta ON ta.gene_na_feature_id = af.na_feature_id
WHERE ta.project_id = 'FungiDB'
AND ta.org_abbrev IN ($$antismash_organism$$)
AND ac.category IN ($$antismash_category$$)
AND (af.antismash_annotation IN ($$antismash_annotation$$)
OR ('unknown' IN ($$antismash_annotation$$) AND af.antismash_annotation IS NULL)
)
),
ranked AS (
SELECT gc.*,
ROW_NUMBER() OVER (
PARTITION BY gc.source_id
ORDER BY gc.cluster_length DESC, gc.antismash_cluster_id
) AS rn
FROM gene_clusters gc
),
gene_overlaps AS (
SELECT r.source_id,
STRING_AGG(
CONCAT(
'<a href="@JBROWSE_WEBPAGE_URL@?loc=', gc2.sequence_id, ':',
GREATEST(gc2.cluster_start - 500, 1), '..', gc2.cluster_end + 500,
'&data=@JBROWSE_SERVICE_URL@/tracks/', gc2.org_abbrev,
'&tracks=gene%2Cantibiotics%20and%20Secondary%20Metabolites%20Analysis%20SHell%20(antiSMASH)">',
gc2.sequence_id, ':', gc2.cluster_start, '-', gc2.cluster_end, '</a>'
),
', ' ORDER BY gc2.cluster_start
) AS overlapping_clusters
FROM ranked r
JOIN gene_clusters gc2
ON gc2.source_id = r.source_id
AND gc2.antismash_cluster_id != r.antismash_cluster_id
WHERE r.rn = 1
GROUP BY r.source_id
)
SELECT r.source_id, r.gene_source_id, r.project_id,
10 AS wdk_weight, 'Y' AS matched_result,
r.category, r.antismash_annotation, r.cluster_start, r.cluster_end, r.sequence_id,
r.org_abbrev,
GREATEST(r.cluster_start - 1000, 1) AS cluster_context_start,
r.cluster_end + 1000 AS cluster_context_end,
COALESCE(go.overlapping_clusters, 'No') AS overlapping_clusters
FROM ranked r
LEFT JOIN gene_overlaps go ON go.source_id = r.source_id
WHERE r.rn = 1
]]>
</sql>
</sqlQuery>



</querySet>
Expand Down