Skip to content

Commit bd15f24

Browse files
author
Emrah Akkoyun
committed
new rules added
1 parent d28486e commit bd15f24

15 files changed

Lines changed: 987 additions & 202 deletions

config/config.yml

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
cnfigfile: "config.yml"
22

33
#outputs
4-
workdir: "/cta/users/eakkoyun/WORKFOLDER/PROD/run_010321/phylogeny-snakemake"
5-
query_ids: ["Q6ZUB1", "P10635", "Q12756", "P16278", "P11055", "Q9Y5Z9", "O95278", "P50747", "P20823", "Q9GZU5", "Q9Y6K1", "P33261", "P01266", "P07477", "Q99959", "P17661", "P22033", "P02671", "P31327", "O75503", "O95972", "Q16281", "P51798", "P20930", "Q6NSW7", "Q9HC21"]
4+
workdir: "/cta/users/eakkoyun/WORKFOLDER/TEST/120421_test/phylogeny-snakemake"
5+
query_ids: ["P35520", "P63000"]
6+
#query_ids: ["P16157", "P22105"]
7+
68

79
#blast
810
blastdb_folder: "resources/blastdb"
@@ -25,21 +27,24 @@ trimal_method: "-gappyout"
2527

2628
#tree:
2729
raxml_model: "LG4X"
28-
raxml_seed: "8"
30+
raxml_seed: "2"
2931
raxml_threads: "1"
3032
raxml_tree_number: "20"
3133

3234
#iqtree
3335
iqtree_seed: "1234"
3436

37+
#outliers
38+
max_deviations: "1"
39+
3540
#codeml:
3641
aa_dist: "0"
3742
aa_rate_file: "static/lg_LG.PAML.txt"
3843
seqtype: "2"
3944
verbose: "2"
4045
noisy: "9"
4146
clock: "0"
42-
model: "1"
47+
model: "2"
4348
icode: "0"
4449
Mgene: "0"
4550
fix_alpha: "0"
@@ -53,10 +58,10 @@ cleandata: "0"
5358
method: "1"
5459

5560
#phylas
56-
weights: "0.1,0.5,0,1,2,3,5,mean,median,X,CountNodes_1,CountNodes_2,CountNodes_3,CountNodes_4"
61+
weights: "0.1,0.5,0,1,2,3,5,mean,median,X,CountNodes_1,CountNodes_2,CountNodes_3,CountNodes_4,MinThreshold,MinThreshold_Gauss"
5762
pattern: "CountNodes_1"
5863

5964
#raxmlng_ancestral
6065
raxmlng_ancestral_model: "LG4X"
61-
raxml_ancestral_threads: "12"
66+
raxml_ancestral_threads: "2"
6267

images/rulegraph.svg

Lines changed: 437 additions & 186 deletions
Loading

workflow/Snakefile

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ configfile: "../config/config.yml"
22

33
rule all:
44
input:
5+
expand("{workdir}/results/{query_id}/8_pruned_raxmlng_ancestral_scores/pruned_{query_id}_wol_param_{pattern}.csv",workdir=config["workdir"], query_id=config["query_ids"],pattern=config["pattern"]),
56
expand("{workdir}/results/{query_id}/5_raxmlng_ancestral_scores/{query_id}_wl_param_{pattern}.csv",workdir=config["workdir"], query_id=config["query_ids"],pattern=config["pattern"]),
67

78
include : "rules/query_fasta.smk"
@@ -10,7 +11,12 @@ include : "rules/get_blasthits.smk"
1011
include : "rules/msa.smk"
1112
include : "rules/trim_msa.smk"
1213
include : "rules/ml_tree.smk"
13-
include : "rules/unroot_tree.smk"
1414
include : "rules/remove_gaps.smk"
15+
include : "rules/remove_outliers.smk"
16+
include : "rules/prune.smk"
17+
include : "rules/unroot_tree.smk"
1518
include : "rules/raxmlng_ancestral.smk"
1619
include : "rules/compute_raxml_anc_score.smk"
20+
include : "rules/unroot_pruned_tree.smk"
21+
include : "rules/pruned_raxmlng_ancestral.smk"
22+
include : "rules/pruned_raxmlng_ancestral_score.smk"

workflow/envs/remove_outlier.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
name: remove_outlier
2+
channels:
3+
- etetoolkit
4+
- python
5+
- bioconda
6+
- conda-forge
7+
dependencies:
8+
- ete3 =3.1.1
9+
- python =3.7.4
10+
- pandas =1.2.3
11+

workflow/rules/compute_raxml_anc_score.smk

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ rule raxml_anc_score:
77
"{workdir}/results/{query_id}/5_raxmlng_ancestral_scores/{query_id}_wl_param_{pattern}.csv",
88
params:
99
out = "{workdir}/results/{query_id}/5_raxmlng_ancestral_scores/{query_id}",
10-
fasta = "{workdir}/results/{query_id}/2_msa/{query_id}_nogap_msa.fasta",
10+
fasta = "{workdir}/results/{query_id}/2_msa/{query_id}_no_outlier_no_gap.fasta",
1111
query_fasta = "{workdir}/results/{query_id}/1_psiblast/{query_id}.fasta"
1212
log:
1313
"{workdir}/workflow/logs/rules/{query_id}_raxmlanc_{pattern}_compute_score.err"

workflow/rules/prune.smk

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
rule prune:
2+
input:
3+
blastp_out_file = "{workdir}/results/{query_id}/1_psiblast/{query_id}_blasthits.out",
4+
ml_tree = "{workdir}/results/{query_id}/3_mltree/{query_id}.no_outlier.nwk",
5+
msa_file = "{workdir}/results/{query_id}/2_msa/{query_id}_no_outlier_no_gap.fasta",
6+
query_fasta = "{workdir}/results/{query_id}/1_psiblast/{query_id}.fasta",
7+
output:
8+
pruned_tree = "{workdir}/results/{query_id}/6_pruned_msa_tree/{query_id}.pruned.no_outlier",
9+
pruned_msa = "{workdir}/results/{query_id}/6_pruned_msa_tree/{query_id}_pruned_nogap_nooutlier_msa.fasta",
10+
log:
11+
"{workdir}/workflow/logs/rules/{query_id}_pruned.err"
12+
benchmark:
13+
"{workdir}/workflow/logs/benchmarks/{query_id}_pruned.out"
14+
conda:
15+
"../envs/prune.yml"
16+
shell:
17+
"python3 scripts/prune_msa_tree.py {input.blastp_out_file} {config[blast_hit_number]} {config[max_e_value]} {config[min_identity]} {config[max_identity]} {input.ml_tree} {output.pruned_tree} {input.msa_file} {output.pruned_msa} {input.query_fasta}"
18+
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
2+
rule pruned_raxmlng_ancestral:
3+
input:
4+
pruned_nogap_msa = "{workdir}/results/{query_id}/6_pruned_msa_tree/{query_id}_pruned_nogap_nooutlier_msa.fasta",
5+
pruned_unrooted_tree = "{workdir}/results/{query_id}/6_pruned_msa_tree/{query_id}.pruned.no_outlier_unrooted",
6+
output:
7+
pruned_ancestral_probabilities = "{workdir}/results/{query_id}/7_pruned_raxmlng_ancestral/pruned_{query_id}.raxml.ancestralProbs",
8+
pruned_ancestral_states = "{workdir}/results/{query_id}/7_pruned_raxmlng_ancestral/pruned_{query_id}.raxml.ancestralStates",
9+
pruned_ancestralTree = "{workdir}/results/{query_id}/7_pruned_raxmlng_ancestral/pruned_{query_id}.raxml.ancestralTree",
10+
11+
params:
12+
pruned_raxml_ancestral_out_name = "{workdir}/results/{query_id}/7_pruned_raxmlng_ancestral/pruned_{query_id}",
13+
log:
14+
"{workdir}/workflow/logs/rules/{query_id}_pruned_raxmlng_ancestral.err"
15+
conda:
16+
"../envs/raxml-ng.yml"
17+
benchmark:
18+
"{workdir}/workflow/logs/benchmarks/{query_id}_pruned_raxmlng_ancestral.out"
19+
shell:
20+
"raxml-ng --ancestral --msa {input.pruned_nogap_msa} --tree {input.pruned_unrooted_tree} --model {config[raxmlng_ancestral_model]} --prefix {params.pruned_raxml_ancestral_out_name} --threads {config[raxml_ancestral_threads]}"
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
rule pruned_raxml_anc_score:
2+
input:
3+
tree_file = "{workdir}/results/{query_id}/7_pruned_raxmlng_ancestral/pruned_{query_id}.raxml.ancestralTree",
4+
probabilities = "{workdir}/results/{query_id}/7_pruned_raxmlng_ancestral/pruned_{query_id}.raxml.ancestralProbs",
5+
output:
6+
"{workdir}/results/{query_id}/8_pruned_raxmlng_ancestral_scores/pruned_{query_id}_wol_param_{pattern}.csv",
7+
"{workdir}/results/{query_id}/8_pruned_raxmlng_ancestral_scores/pruned_{query_id}_wl_param_{pattern}.csv",
8+
params:
9+
out = "{workdir}/results/{query_id}/8_pruned_raxmlng_ancestral_scores/pruned_{query_id}",
10+
fasta = "{workdir}/results/{query_id}/6_pruned_msa_tree/{query_id}_pruned_nogap_nooutlier_msa.fasta",
11+
query_fasta = "{workdir}/results/{query_id}/1_psiblast/{query_id}.fasta",
12+
log:
13+
"{workdir}/workflow/logs/rules/{query_id}_pruned_raxmlanc_{pattern}_compute_score.err"
14+
benchmark:
15+
"{workdir}/workflow/logs/benchmarks/{query_id}_pruned_raxmlanc_{pattern}_compute_score.out"
16+
conda:
17+
"../envs/r-base.yml"
18+
shell:
19+
"query=`python scripts/get_query.py {params.query_fasta}` && Rscript scripts/compute_score_RaxmlNg_Final.R {input.tree_file} {input.probabilities} {params.fasta} {params.out} $query {config[weights]} 2>{log}"
20+
Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
rule raxmlng_ancestral:
22
input:
3-
nogap_msa = "{workdir}/results/{query_id}/2_msa/{query_id}_nogap_msa.fasta",
4-
unrooted_tree = "{workdir}/results/{query_id}/3_mltree/{query_id}.raxml.bestTree_unrooted",
3+
no_outlier_msa = "{workdir}/results/{query_id}/2_msa/{query_id}_no_outlier_no_gap.fasta",
4+
unrooted_tree = "{workdir}/results/{query_id}/3_mltree/{query_id}.no_outlier.nwk_unrooted",
55
output:
66
ancestral_probabilities = "{workdir}/results/{query_id}/4_raxmlng_ancestral/{query_id}.raxml.ancestralProbs",
77
ancestral_states = "{workdir}/results/{query_id}/4_raxmlng_ancestral/{query_id}.raxml.ancestralStates",
@@ -10,10 +10,13 @@ rule raxmlng_ancestral:
1010
raxml_ancestral_out_name = "{workdir}/results/{query_id}/4_raxmlng_ancestral/{query_id}",
1111
conda:
1212
"../envs/raxml-ng.yml"
13+
log:
14+
"{workdir}/workflow/logs/rules/{query_id}_raxmlng_ancestral.err"
1315
benchmark:
1416
"{workdir}/workflow/logs/benchmarks/{query_id}_raxmlng_ancestral.out"
1517
cache: True
1618
resources:
17-
time_min=7200,cpus=2
19+
time_min=7200,cpus=8
1820
shell:
19-
"raxml-ng --ancestral --msa {input.nogap_msa} --tree {input.unrooted_tree} --model {config[raxmlng_ancestral_model]} --prefix {params.raxml_ancestral_out_name} --threads {resources.cpus}"
21+
"raxml-ng --ancestral --msa {input.no_outlier_msa} --tree {input.unrooted_tree} --model {config[raxmlng_ancestral_model]} --prefix {params.raxml_ancestral_out_name} --threads {resources.cpus}"
22+

workflow/rules/remove_outliers.smk

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
rule remove_outliers:
2+
input:
3+
nogap_msa = "{workdir}/results/{query_id}/2_msa/{query_id}_nogap_msa.fasta",
4+
input_tree = "{workdir}/results/{query_id}/3_mltree/{query_id}.raxml.bestTree"
5+
output:
6+
no_outlier_msa = "{workdir}/results/{query_id}/2_msa/{query_id}_no_outlier_no_gap.fasta",
7+
no_outlier_tree = "{workdir}/results/{query_id}/3_mltree/{query_id}.no_outlier.nwk",
8+
9+
conda:
10+
"../envs/remove_outlier.yml"
11+
log:
12+
"{workdir}/workflow/logs/rules/{query_id}_remove_outliers.err"
13+
benchmark:
14+
"{workdir}/workflow/logs/benchmarks/{query_id}_no_outlier.out"
15+
resources:
16+
time_min=7200,cpus=1
17+
shell:
18+
"python3 scripts/remove_outliers.py {input.input_tree} {input.nogap_msa} {output.no_outlier_tree} {output.no_outlier_msa} {config[max_deviations]} 2> {log}"
19+

0 commit comments

Comments
 (0)