Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 25 additions & 14 deletions workflow/scripts/report/af_time_correlation_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,19 +48,24 @@ variants <- left_join(variants, metadata, by = c("SAMPLE" = "ID")) %>%
CollectionDate
) %>%
mutate(
interval = as.numeric(difftime(CollectionDate, min(CollectionDate), units = "days"))
interval = as.numeric(difftime(
CollectionDate,
min(CollectionDate),
units = "days"
))
)

# Save processed input
log_info("Writing dated and frequency-filled variants")
write_csv(variants, snakemake@output$fmt_variants)
write_tsv(variants, snakemake@output$fmt_variants)

log_info("Calculating correlations")
log_debug("Calculating unique SNPs")
# Get list with all different polymorphisms
unique.snps <- unique(variants$VARIANT_NAME)

# Create an empty dataframe to be filled
log_debug("Initializing empty results")
cor.df <- data.frame(
variant = "",
min_af = 0,
Expand All @@ -71,21 +76,30 @@ cor.df <- data.frame(
) %>%
filter(p.value != 0)

log_debug("Calculating correlation using method = {snakemake@params$cor_method} and exact p-value = {snakemake@params$cor_exact}")
log_info("Calculating correlation with method={snakemake@params$cor_method} and exact={snakemake@params$cor_exact}")
correlations <- lapply(
unique.snps,
function(snp) {
log_debug("Processing {snp}")
# Select SNP
df <- filter(
variants,
VARIANT_NAME == snp
)
# Perform calculation
test <- cor.test(
df$ALT_FREQ,
df$interval,
method = snakemake@params$cor_method,
exact = snakemake@params$cor_exact
test <- tryCatch(
{
cor.test(
df$ALT_FREQ,
df$interval,
method = snakemake@params$cor_method,
exact = snakemake@params$cor_exact
)
},
error = function(e) {
log_warn("Cannot run cor.test for {snp}: {e}")
list(p.value = NA, estimate = NA)
}
)
# Adjust p-value
p.value.adj <- p.adjust(
Expand Down Expand Up @@ -122,8 +136,7 @@ significant.variants <- correlations %>%
) %>%
pull(variant) %>%
unique()

log_info("Significant: {significant.variants}")
log_debug("Significant: {significant.variants}")

log_info("Selecting variants in positions with more than one alternative allele")
mult.alt.variants <- variants %>%
Expand All @@ -137,13 +150,11 @@ mult.alt.variants <- variants %>%
ungroup() %>%
pull(VARIANT_NAME) %>%
unique()

log_info("Mult all: {mult.alt.variants}")
log_debug("Mult all: {mult.alt.variants}")

# Build selected subset to represent
variant.selection <- unique(c(significant.variants, mult.alt.variants))

log_info("Selection: {variant.selection}")
log_debug("Selection: {variant.selection}")

log_info("Writing selected variants subset")
write_lines(variant.selection, snakemake@output$subset)
2 changes: 1 addition & 1 deletion workflow/scripts/report/af_trajectory_panel_plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ log_threshold(INFO)
source(snakemake@params[["design"]])

log_info("Reading formatted variants table")
variants <- read_csv(snakemake@input$fmt_variants)
variants <- read_tsv(snakemake@input$fmt_variants)

log_info("Reading subset of variants to represent")
selected.variants <- read_lines(snakemake@input$subset)
Expand Down
3 changes: 2 additions & 1 deletion workflow/scripts/report/allele_freq_tree_plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ max.tip.length <- max(
p <- ggtree(tree) %<+% tree_tiplab +
geom_tiplab(aes(label = tip_label)) +
geom_treescale(1.1 * max.tip.length) +
geom_rootedge(0.05 * max.tip.length)
geom_rootedge(0.05 * max.tip.length) +
hexpand(0.2)

ggsave(
filename = snakemake@output$plot,
Expand Down
Loading