From 6851bf644257eb51e33514e68caedfe7fb6d0d3a Mon Sep 17 00:00:00 2001 From: LiaOb21 Date: Fri, 20 Mar 2026 16:56:31 +0000 Subject: [PATCH 01/10] masurca started --- modules/nf-core/masurca/environment.yml | 10 + modules/nf-core/masurca/main.nf | 176 ++++++++++ modules/nf-core/masurca/meta.yml | 77 +++++ modules/nf-core/masurca/tests/main.nf.test | 309 ++++++++++++++++++ .../nf-core/masurca/tests/main.nf.test.snap | 160 +++++++++ 5 files changed, 732 insertions(+) create mode 100644 modules/nf-core/masurca/environment.yml create mode 100644 modules/nf-core/masurca/main.nf create mode 100644 modules/nf-core/masurca/meta.yml create mode 100644 modules/nf-core/masurca/tests/main.nf.test create mode 100644 modules/nf-core/masurca/tests/main.nf.test.snap diff --git a/modules/nf-core/masurca/environment.yml b/modules/nf-core/masurca/environment.yml new file mode 100644 index 000000000000..c949d090cb48 --- /dev/null +++ b/modules/nf-core/masurca/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # TODO nf-core: List required Conda package(s). + # Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). + # For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. + - "bioconda::masurca=4.1.4" diff --git a/modules/nf-core/masurca/main.nf b/modules/nf-core/masurca/main.nf new file mode 100644 index 000000000000..b34ea3eeb8b7 --- /dev/null +++ b/modules/nf-core/masurca/main.nf @@ -0,0 +1,176 @@ +// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) +// https://github.com/nf-core/modules/tree/master/modules/nf-core/ +// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: +// https://nf-co.re/join +// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. +// All other parameters MUST be provided using the "task.ext" directive, see here: +// https://www.nextflow.io/docs/latest/process.html#ext +// where "task.ext" is a string. +// Any parameters that need to be evaluated in the context of a particular sample +// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. +// TODO nf-core: Software that can be piped together SHOULD be added to separate module files +// unless there is a run-time, storage advantage in implementing in this way +// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: +// bwa mem | samtools view -B -T ref.fasta +// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty +// list (`[]`) instead of a file can be used to work around this issue. + +process MASURCA { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/cf/cf6402ed20c3b089ab88cd8884ddace90693501453a515f9188ae681e8ca8556/data': + 'community.wave.seqera.io/library/masurca:4.1.4--d05ef74c4881d55c' }" + + input: + tuple val(meta), path(illumina), path(jump), path(pacbio), path(nanopore), path(other_reads), path(reference_genome) + val fragment_mean + val fragment_stdev + val jump_mean + val jump_stdev + + + output: + tuple val(meta), path("${prefix}/assemble.sh") , emit: script + tuple val(meta), path("${prefix}/CA*/final.genome.scf.fasta"), optional: true, emit: scaffolds + tuple val(meta), path("${prefix}/CA*/final.genome.ctg.fasta"), optional: true, emit: contigs + tuple val(meta), path("${prefix}/flye/assembly.fasta") , optional: true, emit: flye_assembly + tuple val(meta), path("${prefix}/*_masurca_config.txt") , emit: config + tuple val("${task.process}"), val('masurca'), eval("masurca --version | sed 's/version //g'"), topic: versions, emit: versions_masurca + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + //get input reads with absolute paths - illumina are mandatory, jump/pacbio/nanopore are optional + def illumina_reads = meta.single_end ? "\$(readlink -f ${illumina})" : "\$(readlink -f ${illumina[0]}) \$(readlink -f ${illumina[1]})" + def jump_reads = jump ? "\$(readlink -f ${jump[0]}) \$(readlink -f ${jump[1]})" : "" + def pacbio_file = pacbio ? "\$(readlink -f ${pacbio})" : "" + def nanopore_file = nanopore ? "\$(readlink -f ${nanopore})" : "" + def other_reads_file = other_reads ? "\$(readlink -f ${other_reads})" : "" + def reference_genome_file = reference_genome ? "\$(readlink -f ${reference_genome})" : "" + + // Configuration parameters with defaults from task.ext + def extend_jump_reads = task.ext.extend_jump_reads != null ? task.ext.extend_jump_reads : 0 + def graph_kmer_size = task.ext.graph_kmer_size ?: 'auto' + def use_linking_mates = task.ext.use_linking_mates != null ? task.ext.use_linking_mates : 0 + def lhe_coverage = task.ext.lhe_coverage ?: 25 + def mega_reads_one_pass = task.ext.mega_reads_one_pass != null ? task.ext.mega_reads_one_pass : 0 + def limit_jump_coverage = task.ext.limit_jump_coverage ?: 300 + def ca_parameters = task.ext.ca_parameters ?: 'cgwErrorRate=0.15' + def close_gaps = task.ext.close_gaps != null ? task.ext.close_gaps : 1 + def jf_size = task.ext.jf_size ?: 200000000 + def soap_assembly = task.ext.soap_assembly != null ? task.ext.soap_assembly : 0 + def flye_assembly = task.ext.flye_assembly != null ? task.ext.flye_assembly : 0 + """ + echo "DATA" > ${prefix}_masurca_config.txt + echo "#Illumina paired end reads supplied as " >> ${prefix}_masurca_config.txt + echo "#if single-end, do not specify " >> ${prefix}_masurca_config.txt + echo "#MUST HAVE Illumina paired end reads to use MaSuRCA" >> ${prefix}_masurca_config.txt + echo "PE= pe ${fragment_mean} ${fragment_stdev} ${illumina_reads}" >> ${prefix}_masurca_config.txt + + # Jump/mate pair reads (optional) + if [ -n "${jump_reads}" ]; then + echo "#Illumina mate pair reads supplied as " >> ${prefix}_masurca_config.txt + echo "JUMP= sh ${jump_mean} ${jump_stdev} ${jump_reads}" >> ${prefix}_masurca_config.txt + fi + + # PacBio and Nanopore reads handling + # If both exist, concatenate them and supply as NANOPORE (per MaSuRCA docs) + if [ -n "${pacbio_file}" ] && [ -n "${nanopore_file}" ]; then + echo "#if you have both PacBio and Nanopore, supply both as NANOPORE type" >> ${prefix}_masurca_config.txt + cat ${pacbio_file} ${nanopore_file} > ${prefix}_long_reads.fastq.gz + echo "NANOPORE=\$(readlink -f ${prefix}_long_reads.fastq.gz)" >> ${prefix}_masurca_config.txt + elif [ -n "${pacbio_file}" ]; then + echo "#PacBio/CCS reads must be in a single fasta or fastq file with absolute path" >> ${prefix}_masurca_config.txt + echo "PACBIO=${pacbio_file}" >> ${prefix}_masurca_config.txt + elif [ -n "${nanopore_file}" ]; then + echo "#Nanopore reads must be in a single fasta or fastq file with absolute path" >> ${prefix}_masurca_config.txt + echo "NANOPORE=${nanopore_file}" >> ${prefix}_masurca_config.txt + fi + + # Other reads (optional) - Sanger, 454, etc. + if [ -n "${other_reads_file}" ]; then + echo "#Other reads (Sanger, 454, etc) one frg file, concatenate your frg files into one if you have many" >> ${prefix}_masurca_config.txt + echo "OTHER=${other_reads_file}" >> ${prefix}_masurca_config.txt + fi + + # Reference genome (optional) - for synteny-assisted assembly + if [ -n "${reference_genome_file}" ]; then + echo "#synteny-assisted assembly, concatenate all reference genomes into one reference.fa; works for Illumina-only data" >> ${prefix}_masurca_config.txt + echo "REFERENCE=${reference_genome_file}" >> ${prefix}_masurca_config.txt + fi + + echo "END" >> ${prefix}_masurca_config.txt + + + echo "" >> ${prefix}_masurca_config.txt + echo "PARAMETERS" >> ${prefix}_masurca_config.txt + echo "#set this to 1 if your Illumina jumping library reads are shorter than 100bp" >> ${prefix}_masurca_config.txt + echo "EXTEND_JUMP_READS=${extend_jump_reads}" >> ${prefix}_masurca_config.txt + echo "#this is k-mer size for deBruijn graph values between 25 and 127 are supported, auto will compute the optimal size based on the read data and GC content" >> ${prefix}_masurca_config.txt + echo "GRAPH_KMER_SIZE = ${graph_kmer_size}" >> ${prefix}_masurca_config.txt + echo "#set this to 1 for all Illumina-only assemblies" >> ${prefix}_masurca_config.txt + echo "#set this to 0 if you have more than 15x coverage by long reads (Pacbio or Nanopore) or any other long reads/mate pairs (Illumina MP, Sanger, 454, etc)" >> ${prefix}_masurca_config.txt + echo "USE_LINKING_MATES = ${use_linking_mates}" >> ${prefix}_masurca_config.txt + echo "#use at most this much coverage by the longest Pacbio or Nanopore reads, discard the rest of the reads" >> ${prefix}_masurca_config.txt + echo "#can increase this to 30 or 35 if your reads are short (N50<7000bp)" >> ${prefix}_masurca_config.txt + echo "LHE_COVERAGE=${lhe_coverage}" >> ${prefix}_masurca_config.txt + echo "#set to 0 (default) to do two passes of mega-reads for slower, but higher quality assembly, otherwise set to 1" >> ${prefix}_masurca_config.txt + echo "MEGA_READS_ONE_PASS=${mega_reads_one_pass}" >> ${prefix}_masurca_config.txt + echo "#this parameter is useful if you have too many Illumina jumping library mates. Typically set it to 60 for bacteria and 300 for the other organisms" >> ${prefix}_masurca_config.txt + echo "LIMIT_JUMP_COVERAGE = ${limit_jump_coverage}" >> ${prefix}_masurca_config.txt + echo "#these are the additional parameters to Celera Assembler. do not worry about performance, number or processors or batch sizes -- these are computed automatically." >> ${prefix}_masurca_config.txt + echo "#CABOG ASSEMBLY ONLY: set cgwErrorRate=0.25 for bacteria and 0.1<=cgwErrorRate<=0.15 for other organisms." >> ${prefix}_masurca_config.txt + echo "CA_PARAMETERS = ${ca_parameters}" >> ${prefix}_masurca_config.txt + echo "#CABOG ASSEMBLY ONLY: whether to attempt to close gaps in scaffolds with Illumina or long read data" >> ${prefix}_masurca_config.txt + echo "CLOSE_GAPS=${close_gaps}" >> ${prefix}_masurca_config.txt + echo "#number of cpus to use, set this to the number of CPUs/threads per node you will be using" >> ${prefix}_masurca_config.txt + echo "NUM_THREADS = ${task.cpus}" >> ${prefix}_masurca_config.txt + echo "#this is mandatory jellyfish hash size -- a safe value is estimated_genome_size*20" >> ${prefix}_masurca_config.txt + echo "JF_SIZE = ${jf_size}" >> ${prefix}_masurca_config.txt + echo "#ILLUMINA ONLY. Set this to 1 to use SOAPdenovo contigging/scaffolding module." >> ${prefix}_masurca_config.txt + echo "#Assembly will be worse but will run faster. Useful for very large (>=8Gbp) genomes from Illumina-only data" >> ${prefix}_masurca_config.txt + echo "SOAP_ASSEMBLY=${soap_assembly}" >> ${prefix}_masurca_config.txt + echo "#If you are doing Hybrid Illumina paired end + Nanopore/PacBio assembly ONLY (no Illumina mate pairs or OTHER frg files)." >> ${prefix}_masurca_config.txt + echo "#Set this to 1 to use Flye assembler for final assembly of corrected mega-reads." >> ${prefix}_masurca_config.txt + echo "#A lot faster than CABOG, AND QUALITY IS THE SAME OR BETTER." >> ${prefix}_masurca_config.txt + echo "#Works well even when MEGA_READS_ONE_PASS is set to 1." >> ${prefix}_masurca_config.txt + echo "#DO NOT use if you have less than 15x coverage by long reads." >> ${prefix}_masurca_config.txt + echo "FLYE_ASSEMBLY=${flye_assembly}" >> ${prefix}_masurca_config.txt + echo "END" >> ${prefix}_masurca_config.txt + + # Generate assembly script + masurca ${prefix}_masurca_config.txt + + # Create output directory and move files + mkdir -p ${prefix} + mv assemble.sh ${prefix}/ + mv ${prefix}_masurca_config.txt ${prefix}/ + chmod +x ${prefix}/assemble.sh + + # Run the assembly + cd ${prefix} + ./assemble.sh + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo $args + + mkdir -p ${prefix}/CA + mkdir -p ${prefix}/flye + touch ${prefix}/assemble.sh + touch ${prefix}/${prefix}_masurca_config.txt + touch ${prefix}/CA/final.genome.scf.fasta + touch ${prefix}/CA/final.genome.ctg.fasta + touch ${prefix}/flye/assembly.fasta + """ +} diff --git a/modules/nf-core/masurca/meta.yml b/modules/nf-core/masurca/meta.yml new file mode 100644 index 000000000000..45a72409d119 --- /dev/null +++ b/modules/nf-core/masurca/meta.yml @@ -0,0 +1,77 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +# # TODO nf-core: Add a description of the module and list keywords +name: "masurca" +description: write your description here +keywords: + - sort + - example + - genomics +tools: + ## TODO nf-core: Add a description and other details for the software below + - "masurca": + description: "MaSuRCA (Maryland Super-Read Celera Assembler) genome assembly software." + homepage: "https://github.com/alekseyzimin/masurca/blob/v4.1.4/README.md" + documentation: "https://github.com/alekseyzimin/masurca/blob/v4.1.4/README.md" + tool_dev_url: "https://github.com/alekseyzimin/masurca" + doi: "" + licence: ["GPL v3"] + identifier: biotools:masurca + +input: + ### TODO nf-core: Add a description of all of the variables used as input + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: + - edam: "http://edamontology.org/format_2572" # BAM + - edam: "http://edamontology.org/format_2573" # CRAM + - edam: "http://edamontology.org/format_3462" # SAM + +output: + ### TODO nf-core: Add a description of all of the variables used as output + bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.bam": + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: + - edam: "http://edamontology.org/format_2572" # BAM + - edam: "http://edamontology.org/format_2573" # CRAM + - edam: "http://edamontology.org/format_3462" # SAM + versions_masurca: + - - "${task.process}": + type: string + description: The name of the process + - "masurca": + type: string + description: The name of the tool + - "masurca --version": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - masurca: + type: string + description: The name of the tool + - masurca --version: + type: eval + description: The expression to obtain the version of the tool +authors: + - "@LiaOb21" +maintainers: + - "@LiaOb21" diff --git a/modules/nf-core/masurca/tests/main.nf.test b/modules/nf-core/masurca/tests/main.nf.test new file mode 100644 index 000000000000..d6719d65029b --- /dev/null +++ b/modules/nf-core/masurca/tests/main.nf.test @@ -0,0 +1,309 @@ +nextflow_process { + + name "Test Process MASURCA" + script "../main.nf" + process "MASURCA" + + tag "modules" + tag "modules_nfcore" + tag "masurca" + + test("sarscov2 - illumina - single_end") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + [], // no jump reads + [], // no pacbio + [], // no nanopore + [], // no other reads + [] // no reference genome + ] + input[1] = 500 // fragment_mean + input[2] = 50 // fragment_stdev + input[3] = 0 // jump_mean (not used) + input[4] = 0 // jump_stdev (not used) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - illumina - paired_end") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ], + [], // no jump reads + [], // no pacbio + [], // no nanopore + [], // no other reads + [] // no reference genome + ] + input[1] = 500 // fragment_mean + input[2] = 50 // fragment_stdev + input[3] = 0 // jump_mean (not used) + input[4] = 0 // jump_stdev (not used) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.script, + process.out.config, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - illumina - paired_end - with_jump") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ], + [], // no pacbio + [], // no nanopore + [], // no other reads + [] // no reference genome + ] + input[1] = 500 // fragment_mean + input[2] = 50 // fragment_stdev + input[3] = 3600 // jump_mean + input[4] = 200 // jump_stdev + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.script, + process.out.config, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - hybrid - illumina_pacbio") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ], + [], // no jump reads + file(params.test_data['sarscov2']['pacbio']['test_fastq_gz'], checkIfExists: true), + [], // no nanopore + [], // no other reads + [] // no reference genome + ] + input[1] = 500 // fragment_mean + input[2] = 50 // fragment_stdev + input[3] = 0 // jump_mean (not used) + input[4] = 0 // jump_stdev (not used) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.script, + process.out.config, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - hybrid - illumina_nanopore") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ], + [], // no jump reads + [], // no pacbio + file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true), + [], // no other reads + [] // no reference genome + ] + input[1] = 500 // fragment_mean + input[2] = 50 // fragment_stdev + input[3] = 0 // jump_mean (not used) + input[4] = 0 // jump_stdev (not used) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.script, + process.out.config, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - hybrid - illumina_pacbio_nanopore") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ], + [], // no jump reads + file(params.test_data['sarscov2']['pacbio']['test_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true), + [], // no other reads + [] // no reference genome + ] + input[1] = 500 // fragment_mean + input[2] = 50 // fragment_stdev + input[3] = 0 // jump_mean (not used) + input[4] = 0 // jump_stdev (not used) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.script, + process.out.config, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - illumina - with_reference") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ], + [], // no jump reads + [], // no pacbio + [], // no nanopore + [], // no other reads + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[1] = 500 // fragment_mean + input[2] = 50 // fragment_stdev + input[3] = 0 // jump_mean (not used) + input[4] = 0 // jump_stdev (not used) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.script, + process.out.config, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - illumina - paired_end - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ], + [], // no jump reads + [], // no pacbio + [], // no nanopore + [], // no other reads + [] // no reference genome + ] + input[1] = 500 // fragment_mean + input[2] = 50 // fragment_stdev + input[3] = 0 // jump_mean (not used) + input[4] = 0 // jump_stdev (not used) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/masurca/tests/main.nf.test.snap b/modules/nf-core/masurca/tests/main.nf.test.snap new file mode 100644 index 000000000000..daf30de492c0 --- /dev/null +++ b/modules/nf-core/masurca/tests/main.nf.test.snap @@ -0,0 +1,160 @@ +{ + "sarscov2 - illumina - paired_end - with_jump": { + "content": [ + [ + + ], + [ + + ], + null + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-03-20T16:53:33.598889584" + }, + "sarscov2 - hybrid - illumina_nanopore": { + "content": [ + [ + + ], + [ + + ], + null + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-03-20T16:53:55.13953957" + }, + "sarscov2 - illumina - single_end": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "config": [ + + ], + "contigs": [ + + ], + "flye_assembly": [ + + ], + "scaffolds": [ + + ], + "script": [ + + ], + "versions_masurca": [ + + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-03-20T16:53:14.230197251" + }, + "sarscov2 - illumina - paired_end": { + "content": [ + [ + + ], + [ + + ], + null + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-03-20T16:53:21.827256455" + }, + "sarscov2 - illumina - paired_end - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "config": [ + + ], + "contigs": [ + + ], + "flye_assembly": [ + + ], + "scaffolds": [ + + ], + "script": [ + + ], + "versions_masurca": [ + + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-03-20T16:54:17.392502942" + }, + "sarscov2 - illumina - with_reference": { + "content": [ + [ + + ], + [ + + ], + null + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-03-20T16:54:09.994377364" + } +} \ No newline at end of file From 40c1b3baf47b949f91d824b62263026093072294 Mon Sep 17 00:00:00 2001 From: LiaOb21 Date: Mon, 23 Mar 2026 17:30:08 +0000 Subject: [PATCH 02/10] still working on it --- modules/nf-core/masurca/main.nf | 19 +- modules/nf-core/masurca/tests/main.nf.test | 345 +++++++++--------- .../nf-core/masurca/tests/main.nf.test.snap | 95 +++-- 3 files changed, 253 insertions(+), 206 deletions(-) diff --git a/modules/nf-core/masurca/main.nf b/modules/nf-core/masurca/main.nf index b34ea3eeb8b7..d3887a32fb7c 100644 --- a/modules/nf-core/masurca/main.nf +++ b/modules/nf-core/masurca/main.nf @@ -33,11 +33,12 @@ process MASURCA { output: - tuple val(meta), path("${prefix}/assemble.sh") , emit: script - tuple val(meta), path("${prefix}/CA*/final.genome.scf.fasta"), optional: true, emit: scaffolds - tuple val(meta), path("${prefix}/CA*/final.genome.ctg.fasta"), optional: true, emit: contigs - tuple val(meta), path("${prefix}/flye/assembly.fasta") , optional: true, emit: flye_assembly - tuple val(meta), path("${prefix}/*_masurca_config.txt") , emit: config + tuple val(meta), path("assemble.sh") , emit: script + tuple val(meta), path("CA*/final.genome.scf.fasta"), optional: true, emit: scaffolds + tuple val(meta), path("CA*/final.genome.ctg.fasta"), optional: true, emit: contigs + tuple val(meta), path("flye/assembly.fasta") , optional: true, emit: flye_assembly + tuple val(meta), path("*_masurca_config.txt") , emit: config + tuple val(meta), path("*-masurca.log") , emit: log tuple val("${task.process}"), val('masurca'), eval("masurca --version | sed 's/version //g'"), topic: versions, emit: versions_masurca when: @@ -48,7 +49,7 @@ process MASURCA { def prefix = task.ext.prefix ?: "${meta.id}" //get input reads with absolute paths - illumina are mandatory, jump/pacbio/nanopore are optional - def illumina_reads = meta.single_end ? "\$(readlink -f ${illumina})" : "\$(readlink -f ${illumina[0]}) \$(readlink -f ${illumina[1]})" + def illumina_reads = [illumina].flatten().join(' ') def jump_reads = jump ? "\$(readlink -f ${jump[0]}) \$(readlink -f ${jump[1]})" : "" def pacbio_file = pacbio ? "\$(readlink -f ${pacbio})" : "" def nanopore_file = nanopore ? "\$(readlink -f ${nanopore})" : "" @@ -156,15 +157,13 @@ process MASURCA { # Run the assembly cd ${prefix} - ./assemble.sh + ./assemble.sh > ${prefix}-masurca.log 2>&1 """ stub: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - """ - echo $args - + """ mkdir -p ${prefix}/CA mkdir -p ${prefix}/flye touch ${prefix}/assemble.sh diff --git a/modules/nf-core/masurca/tests/main.nf.test b/modules/nf-core/masurca/tests/main.nf.test index d6719d65029b..a892883025c0 100644 --- a/modules/nf-core/masurca/tests/main.nf.test +++ b/modules/nf-core/masurca/tests/main.nf.test @@ -15,7 +15,7 @@ nextflow_process { """ input[0] = [ [ id:'test', single_end:true ], - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz", checkIfExists: true) [], // no jump reads [], // no pacbio [], // no nanopore @@ -30,133 +30,22 @@ nextflow_process { } } - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("sarscov2 - illumina - paired_end") { - - when { - process { - """ - input[0] = [ - [ id:'test', single_end:false ], - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) - ], - [], // no jump reads - [], // no pacbio - [], // no nanopore - [], // no other reads - [] // no reference genome - ] - input[1] = 500 // fragment_mean - input[2] = 50 // fragment_stdev - input[3] = 0 // jump_mean (not used) - input[4] = 0 // jump_stdev (not used) - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot( - process.out.script, - process.out.config, - process.out.versions - ).match() } - ) - } - - } - - test("sarscov2 - illumina - paired_end - with_jump") { - - when { - process { - """ - input[0] = [ - [ id:'test', single_end:false ], - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) - ], - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) - ], - [], // no pacbio - [], // no nanopore - [], // no other reads - [] // no reference genome - ] - input[1] = 500 // fragment_mean - input[2] = 50 // fragment_stdev - input[3] = 3600 // jump_mean - input[4] = 200 // jump_stdev - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot( - process.out.script, - process.out.config, - process.out.versions - ).match() } - ) - } - - } - - test("sarscov2 - hybrid - illumina_pacbio") { - - when { - process { - """ - input[0] = [ - [ id:'test', single_end:false ], - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) - ], - [], // no jump reads - file(params.test_data['sarscov2']['pacbio']['test_fastq_gz'], checkIfExists: true), - [], // no nanopore - [], // no other reads - [] // no reference genome - ] - input[1] = 500 // fragment_mean - input[2] = 50 // fragment_stdev - input[3] = 0 // jump_mean (not used) - input[4] = 0 // jump_stdev (not used) - """ - } - } - then { assertAll( { assert process.success }, { assert snapshot( + process.out.contigs, + process.out.scaffolds, process.out.script, - process.out.config, - process.out.versions + process.out.log, + process.out.versions_masurca ).match() } ) } } - test("sarscov2 - hybrid - illumina_nanopore") { + test("sarscov2 - illumina - paired_end") { when { process { @@ -164,12 +53,12 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) ], [], // no jump reads [], // no pacbio - file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true), + [], // no nanopore [], // no other reads [] // no reference genome ] @@ -185,52 +74,175 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( + process.out.contigs, + process.out.scaffolds, process.out.script, - process.out.config, - process.out.versions + process.out.log, + process.out.versions_masurca ).match() } ) } } - test("sarscov2 - hybrid - illumina_pacbio_nanopore") { - - when { - process { - """ - input[0] = [ - [ id:'test', single_end:false ], - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) - ], - [], // no jump reads - file(params.test_data['sarscov2']['pacbio']['test_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true), - [], // no other reads - [] // no reference genome - ] - input[1] = 500 // fragment_mean - input[2] = 50 // fragment_stdev - input[3] = 0 // jump_mean (not used) - input[4] = 0 // jump_stdev (not used) - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot( - process.out.script, - process.out.config, - process.out.versions - ).match() } - ) - } - - } +// test("sarscov2 - illumina - paired_end - with_jump") { +// +// when { +// process { +// """ +// input[0] = [ +// [ id:'test', single_end:false ], +// [ +// file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), +// file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) +// ], +// [ +// file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test2_1.fastq.gz", checkIfExists: true), +// file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test2_2.fastq.gz", checkIfExists: true) +// ], +// [], // no pacbio +// [], // no nanopore +// [], // no other reads +// [] // no reference genome +// ] +// input[1] = 500 // fragment_mean +// input[2] = 50 // fragment_stdev +// input[3] = 3600 // jump_mean +// input[4] = 200 // jump_stdev +// """ +// } +// } +// +// then { +// assertAll( +// { assert process.success }, +// { assert snapshot( +// process.out.contigs, +// process.out.scaffolds, +// process.out.script, +// process.out.log, +// process.out.versions_masurca +// ).match() } +// ) +// } +// +// } +// +// test("sarscov2 - hybrid - illumina_pacbio") { +// +// when { +// process { +// """ +// input[0] = [ +// [ id:'test', single_end:false ], +// [ +// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true), +// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz", checkIfExists: true) +// ], +// [], // no jump reads +// file(params.modules_testdata_base_path + "genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz", checkIfExists: true), +// [], // no nanopore +// [], // no other reads +// [] // no reference genome +// ] +// input[1] = 500 // fragment_mean +// input[2] = 50 // fragment_stdev +// input[3] = 0 // jump_mean (not used) +// input[4] = 0 // jump_stdev (not used) +// """ +// } +// } +// +// then { +// assertAll( +// { assert process.success }, +// { assert snapshot( +// process.out.flye_assembly, +// process.out.script, +// process.out.log, +// process.out.versions_masurca +// ).match() } +// ) +// } +// +// } +// +// test("sarscov2 - hybrid - illumina_nanopore") { +// +// when { +// process { +// """ +// input[0] = [ +// [ id:'test', single_end:false ], +// [ +// file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), +// file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) +// ], +// [], // no jump reads +// [], // no pacbio +// file(params.modules_testdata_base_path + "genomics/sarscov2/nanopore/fastq/test.fastq.gz", checkIfExists: true), +// [], // no other reads +// [] // no reference genome +// ] +// input[1] = 500 // fragment_mean +// input[2] = 50 // fragment_stdev +// input[3] = 0 // jump_mean (not used) +// input[4] = 0 // jump_stdev (not used) +// """ +// } +// } +// +// then { +// assertAll( +// { assert process.success }, +// { assert snapshot( +// process.out.flye_assembly, +// process.out.script, +// process.out.log, +// process.out.versions_masurca +// ).match() } +// ) +// } +// +// } +// +// test("homo_sapiens - hybrid - illumina_pacbio_nanopore") { +// +// when { +// process { +// """ +// input[0] = [ +// [ id:'test', single_end:false ], +// [ +// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true), +// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz", checkIfExists: true) +// ], +// [], // no jump reads +// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_hifi.fastq.gz", checkIfExists: true), +// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_hifi.fastq.gz", checkIfExists: true), +// [], // no other reads +// [] // no reference genome +// ] +// input[1] = 500 // fragment_mean +// input[2] = 50 // fragment_stdev +// input[3] = 0 // jump_mean (not used) +// input[4] = 0 // jump_stdev (not used) +// """ +// } +// } +// +// then { +// assertAll( +// { assert snapshot( +// process.out.flye_assembly, +// process.out.script, +// process.out.log, +// process.out.versions_masurca +// ).match() } +// ) +// } +// +// } test("sarscov2 - illumina - with_reference") { @@ -240,14 +252,14 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz", checkIfExists: true) ], [], // no jump reads [], // no pacbio [], // no nanopore [], // no other reads - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta.gz", checkIfExists: true) ] input[1] = 500 // fragment_mean input[2] = 50 // fragment_stdev @@ -259,11 +271,12 @@ nextflow_process { then { assertAll( - { assert process.success }, { assert snapshot( + process.out.contigs, + process.out.scaffolds, process.out.script, - process.out.config, - process.out.versions + process.out.log, + process.out.versions_masurca ).match() } ) } @@ -300,7 +313,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } diff --git a/modules/nf-core/masurca/tests/main.nf.test.snap b/modules/nf-core/masurca/tests/main.nf.test.snap index daf30de492c0..ad490f7042ca 100644 --- a/modules/nf-core/masurca/tests/main.nf.test.snap +++ b/modules/nf-core/masurca/tests/main.nf.test.snap @@ -7,13 +7,21 @@ [ ], - null + [ + + ], + [ + + ], + [ + + ] ], "meta": { "nf-test": "0.9.3", "nextflow": "25.10.3" }, - "timestamp": "2026-03-20T16:53:33.598889584" + "timestamp": "2026-03-23T16:33:30.420661493" }, "sarscov2 - hybrid - illumina_nanopore": { "content": [ @@ -23,13 +31,18 @@ [ ], - null + [ + + ], + [ + + ] ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.3" + "nextflow": "25.10.4" }, - "timestamp": "2026-03-20T16:53:55.13953957" + "timestamp": "2026-03-23T16:39:05.105234025" }, "sarscov2 - illumina - single_end": { "content": [ @@ -86,35 +99,46 @@ [ ], - null + [ + + ], + [ + + ], + [ + + ] ], "meta": { "nf-test": "0.9.3", "nextflow": "25.10.3" }, - "timestamp": "2026-03-20T16:53:21.827256455" + "timestamp": "2026-03-23T16:33:21.946468822" + }, + "sarscov2 - hybrid - illumina_pacbio": { + "content": [ + [ + + ], + [ + + ], + [ + + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-23T16:38:57.588039397" }, "sarscov2 - illumina - paired_end - stub": { "content": [ { - "0": [ - - ], - "1": [ - - ], - "2": [ - - ], - "3": [ - - ], - "4": [ - - ], - "5": [ - - ], "config": [ ], @@ -123,6 +147,9 @@ ], "flye_assembly": [ + ], + "log": [ + ], "scaffolds": [ @@ -137,9 +164,9 @@ ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.3" + "nextflow": "25.10.4" }, - "timestamp": "2026-03-20T16:54:17.392502942" + "timestamp": "2026-03-23T16:39:42.421134682" }, "sarscov2 - illumina - with_reference": { "content": [ @@ -149,12 +176,20 @@ [ ], - null + [ + + ], + [ + + ], + [ + + ] ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.3" + "nextflow": "25.10.4" }, - "timestamp": "2026-03-20T16:54:09.994377364" + "timestamp": "2026-03-23T16:39:33.587772541" } } \ No newline at end of file From 5c5630906da5d3e67ad97bdad3c4afc38065bf01 Mon Sep 17 00:00:00 2001 From: LiaOb21 Date: Tue, 24 Mar 2026 23:11:28 +0000 Subject: [PATCH 03/10] probably close --- .vscode/extensions.json | 3 - .vscode/settings.json | 8 - modules/nf-core/masurca/main.nf | 46 +-- modules/nf-core/masurca/tests/main.nf.test | 345 +++++++++--------- .../nf-core/masurca/tests/main.nf.test.snap | 337 ++++++++++++++--- 5 files changed, 475 insertions(+), 264 deletions(-) delete mode 100644 .vscode/extensions.json delete mode 100644 .vscode/settings.json diff --git a/.vscode/extensions.json b/.vscode/extensions.json deleted file mode 100644 index 110de1157123..000000000000 --- a/.vscode/extensions.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "recommendations": ["nextflow.nextflow", "nf-core.nf-core-extensionpack"] -} diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index a2676d66e969..000000000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "nextflow.formatting.harshilAlignment": true, - "yaml.schemas": { - "./modules/meta-schema.json": ["modules/nf-core/**/meta.yml"], - "./modules/environment-schema.json": ["modules/nf-core/**/environment.yml"], - "./subworkflows/yaml-schema.json": ["subworkflows/nf-core/**/meta.yml"] - } -} diff --git a/modules/nf-core/masurca/main.nf b/modules/nf-core/masurca/main.nf index d3887a32fb7c..7c1fa4245abd 100644 --- a/modules/nf-core/masurca/main.nf +++ b/modules/nf-core/masurca/main.nf @@ -34,9 +34,7 @@ process MASURCA { output: tuple val(meta), path("assemble.sh") , emit: script - tuple val(meta), path("CA*/final.genome.scf.fasta"), optional: true, emit: scaffolds - tuple val(meta), path("CA*/final.genome.ctg.fasta"), optional: true, emit: contigs - tuple val(meta), path("flye/assembly.fasta") , optional: true, emit: flye_assembly + tuple val(meta), path("CA*/primary.genome.scf.fasta") , emit: scaffolds tuple val(meta), path("*_masurca_config.txt") , emit: config tuple val(meta), path("*-masurca.log") , emit: log tuple val("${task.process}"), val('masurca'), eval("masurca --version | sed 's/version //g'"), topic: versions, emit: versions_masurca @@ -49,12 +47,12 @@ process MASURCA { def prefix = task.ext.prefix ?: "${meta.id}" //get input reads with absolute paths - illumina are mandatory, jump/pacbio/nanopore are optional - def illumina_reads = [illumina].flatten().join(' ') - def jump_reads = jump ? "\$(readlink -f ${jump[0]}) \$(readlink -f ${jump[1]})" : "" - def pacbio_file = pacbio ? "\$(readlink -f ${pacbio})" : "" - def nanopore_file = nanopore ? "\$(readlink -f ${nanopore})" : "" - def other_reads_file = other_reads ? "\$(readlink -f ${other_reads})" : "" - def reference_genome_file = reference_genome ? "\$(readlink -f ${reference_genome})" : "" + def illumina_reads = illumina.collect { it.toRealPath() }.join(' ') + def jump_reads = jump ? jump.collect { it.toRealPath() }.join(' ') : "" + def pacbio_file = pacbio ? pacbio.toRealPath() : "" + def nanopore_file = nanopore ? nanopore.toRealPath() : "" + def other_reads_file = other_reads ? other_reads.toRealPath() : "" + def reference_genome_file = reference_genome ? reference_genome.toRealPath() : "" // Configuration parameters with defaults from task.ext def extend_jump_reads = task.ext.extend_jump_reads != null ? task.ext.extend_jump_reads : 0 @@ -64,10 +62,8 @@ process MASURCA { def mega_reads_one_pass = task.ext.mega_reads_one_pass != null ? task.ext.mega_reads_one_pass : 0 def limit_jump_coverage = task.ext.limit_jump_coverage ?: 300 def ca_parameters = task.ext.ca_parameters ?: 'cgwErrorRate=0.15' - def close_gaps = task.ext.close_gaps != null ? task.ext.close_gaps : 1 + def close_gaps = task.ext.close_gaps != null ? task.ext.close_gaps : 0 def jf_size = task.ext.jf_size ?: 200000000 - def soap_assembly = task.ext.soap_assembly != null ? task.ext.soap_assembly : 0 - def flye_assembly = task.ext.flye_assembly != null ? task.ext.flye_assembly : 0 """ echo "DATA" > ${prefix}_masurca_config.txt echo "#Illumina paired end reads supplied as " >> ${prefix}_masurca_config.txt @@ -86,7 +82,7 @@ process MASURCA { if [ -n "${pacbio_file}" ] && [ -n "${nanopore_file}" ]; then echo "#if you have both PacBio and Nanopore, supply both as NANOPORE type" >> ${prefix}_masurca_config.txt cat ${pacbio_file} ${nanopore_file} > ${prefix}_long_reads.fastq.gz - echo "NANOPORE=\$(readlink -f ${prefix}_long_reads.fastq.gz)" >> ${prefix}_masurca_config.txt + echo "NANOPORE= ${prefix}_long_reads.fastq.gz" >> ${prefix}_masurca_config.txt elif [ -n "${pacbio_file}" ]; then echo "#PacBio/CCS reads must be in a single fasta or fastq file with absolute path" >> ${prefix}_masurca_config.txt echo "PACBIO=${pacbio_file}" >> ${prefix}_masurca_config.txt @@ -137,26 +133,18 @@ process MASURCA { echo "JF_SIZE = ${jf_size}" >> ${prefix}_masurca_config.txt echo "#ILLUMINA ONLY. Set this to 1 to use SOAPdenovo contigging/scaffolding module." >> ${prefix}_masurca_config.txt echo "#Assembly will be worse but will run faster. Useful for very large (>=8Gbp) genomes from Illumina-only data" >> ${prefix}_masurca_config.txt - echo "SOAP_ASSEMBLY=${soap_assembly}" >> ${prefix}_masurca_config.txt + echo "SOAP_ASSEMBLY=0" >> ${prefix}_masurca_config.txt echo "#If you are doing Hybrid Illumina paired end + Nanopore/PacBio assembly ONLY (no Illumina mate pairs or OTHER frg files)." >> ${prefix}_masurca_config.txt echo "#Set this to 1 to use Flye assembler for final assembly of corrected mega-reads." >> ${prefix}_masurca_config.txt echo "#A lot faster than CABOG, AND QUALITY IS THE SAME OR BETTER." >> ${prefix}_masurca_config.txt echo "#Works well even when MEGA_READS_ONE_PASS is set to 1." >> ${prefix}_masurca_config.txt echo "#DO NOT use if you have less than 15x coverage by long reads." >> ${prefix}_masurca_config.txt - echo "FLYE_ASSEMBLY=${flye_assembly}" >> ${prefix}_masurca_config.txt + echo "FLYE_ASSEMBLY=0" >> ${prefix}_masurca_config.txt echo "END" >> ${prefix}_masurca_config.txt # Generate assembly script masurca ${prefix}_masurca_config.txt - # Create output directory and move files - mkdir -p ${prefix} - mv assemble.sh ${prefix}/ - mv ${prefix}_masurca_config.txt ${prefix}/ - chmod +x ${prefix}/assemble.sh - - # Run the assembly - cd ${prefix} ./assemble.sh > ${prefix}-masurca.log 2>&1 """ @@ -164,12 +152,10 @@ process MASURCA { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - mkdir -p ${prefix}/CA - mkdir -p ${prefix}/flye - touch ${prefix}/assemble.sh - touch ${prefix}/${prefix}_masurca_config.txt - touch ${prefix}/CA/final.genome.scf.fasta - touch ${prefix}/CA/final.genome.ctg.fasta - touch ${prefix}/flye/assembly.fasta + mkdir -p CA + touch assemble.sh + touch ${prefix}_masurca_config.txt + touch CA/primary.genome.scf.fasta + touch ${prefix}-masurca.log """ } diff --git a/modules/nf-core/masurca/tests/main.nf.test b/modules/nf-core/masurca/tests/main.nf.test index a892883025c0..1d6a1afe517d 100644 --- a/modules/nf-core/masurca/tests/main.nf.test +++ b/modules/nf-core/masurca/tests/main.nf.test @@ -8,14 +8,14 @@ nextflow_process { tag "modules_nfcore" tag "masurca" - test("sarscov2 - illumina - single_end") { + test("homo_sapiens - illumina - single_end") { when { process { """ input[0] = [ - [ id:'test', single_end:true ], - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz", checkIfExists: true) + [ id:'test'], + [file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true)], [], // no jump reads [], // no pacbio [], // no nanopore @@ -34,7 +34,6 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - process.out.contigs, process.out.scaffolds, process.out.script, process.out.log, @@ -45,16 +44,16 @@ nextflow_process { } - test("sarscov2 - illumina - paired_end") { + test("homo_sapiens - illumina - paired_end") { when { process { """ input[0] = [ - [ id:'test', single_end:false ], + [ id:'test'], [ - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz", checkIfExists: true) ], [], // no jump reads [], // no pacbio @@ -74,7 +73,6 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - process.out.contigs, process.out.scaffolds, process.out.script, process.out.log, @@ -85,181 +83,142 @@ nextflow_process { } -// test("sarscov2 - illumina - paired_end - with_jump") { -// -// when { -// process { -// """ -// input[0] = [ -// [ id:'test', single_end:false ], -// [ -// file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), -// file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) -// ], -// [ -// file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test2_1.fastq.gz", checkIfExists: true), -// file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test2_2.fastq.gz", checkIfExists: true) -// ], -// [], // no pacbio -// [], // no nanopore -// [], // no other reads -// [] // no reference genome -// ] -// input[1] = 500 // fragment_mean -// input[2] = 50 // fragment_stdev -// input[3] = 3600 // jump_mean -// input[4] = 200 // jump_stdev -// """ -// } -// } -// -// then { -// assertAll( -// { assert process.success }, -// { assert snapshot( -// process.out.contigs, -// process.out.scaffolds, -// process.out.script, -// process.out.log, -// process.out.versions_masurca -// ).match() } -// ) -// } -// -// } -// -// test("sarscov2 - hybrid - illumina_pacbio") { -// -// when { -// process { -// """ -// input[0] = [ -// [ id:'test', single_end:false ], -// [ -// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true), -// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz", checkIfExists: true) -// ], -// [], // no jump reads -// file(params.modules_testdata_base_path + "genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz", checkIfExists: true), -// [], // no nanopore -// [], // no other reads -// [] // no reference genome -// ] -// input[1] = 500 // fragment_mean -// input[2] = 50 // fragment_stdev -// input[3] = 0 // jump_mean (not used) -// input[4] = 0 // jump_stdev (not used) -// """ -// } -// } -// -// then { -// assertAll( -// { assert process.success }, -// { assert snapshot( -// process.out.flye_assembly, -// process.out.script, -// process.out.log, -// process.out.versions_masurca -// ).match() } -// ) -// } -// -// } -// -// test("sarscov2 - hybrid - illumina_nanopore") { -// -// when { -// process { -// """ -// input[0] = [ -// [ id:'test', single_end:false ], -// [ -// file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), -// file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) -// ], -// [], // no jump reads -// [], // no pacbio -// file(params.modules_testdata_base_path + "genomics/sarscov2/nanopore/fastq/test.fastq.gz", checkIfExists: true), -// [], // no other reads -// [] // no reference genome -// ] -// input[1] = 500 // fragment_mean -// input[2] = 50 // fragment_stdev -// input[3] = 0 // jump_mean (not used) -// input[4] = 0 // jump_stdev (not used) -// """ -// } -// } -// -// then { -// assertAll( -// { assert process.success }, -// { assert snapshot( -// process.out.flye_assembly, -// process.out.script, -// process.out.log, -// process.out.versions_masurca -// ).match() } -// ) -// } -// -// } -// -// test("homo_sapiens - hybrid - illumina_pacbio_nanopore") { -// -// when { -// process { -// """ -// input[0] = [ -// [ id:'test', single_end:false ], -// [ -// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true), -// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz", checkIfExists: true) -// ], -// [], // no jump reads -// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_hifi.fastq.gz", checkIfExists: true), -// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_hifi.fastq.gz", checkIfExists: true), -// [], // no other reads -// [] // no reference genome -// ] -// input[1] = 500 // fragment_mean -// input[2] = 50 // fragment_stdev -// input[3] = 0 // jump_mean (not used) -// input[4] = 0 // jump_stdev (not used) -// """ -// } -// } -// -// then { -// assertAll( -// { assert snapshot( -// process.out.flye_assembly, -// process.out.script, -// process.out.log, -// process.out.versions_masurca -// ).match() } -// ) -// } -// -// } - - test("sarscov2 - illumina - with_reference") { + test("homo_sapiens - illumina - paired_end - with_jump") { when { process { """ input[0] = [ - [ id:'test', single_end:false ], + [ id:'test'], [ file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz", checkIfExists: true) ], - [], // no jump reads + [ + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test2_germline_1.fq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test2_germline_2.fq.gz", checkIfExists: true) + ], [], // no pacbio [], // no nanopore [], // no other reads - file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta.gz", checkIfExists: true) + [] // no reference genome + ] + input[1] = 500 // fragment_mean + input[2] = 50 // fragment_stdev + input[3] = 3600 // jump_mean + input[4] = 200 // jump_stdev + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.scaffolds, + process.out.script, + process.out.log, + process.out.versions_masurca + ).match() } + ) + } + + } + + test("genomeassembler - hybrid - illumina_pacbio") { + + when { + process { + """ + input[0] = [ + [ id:'test'], + [ + file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/SR_Col-0_test_data_1.fastq.gz", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/SR_Col-0_test_data_2.fastq.gz", checkIfExists: true) + ], + [], // no jump reads + [file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/HiFi-Col-0_test_data.fastq.gz", checkIfExists: true)], + [], // no nanopore + [], // no other reads + [] // no reference genome + ] + input[1] = 500 // fragment_mean + input[2] = 50 // fragment_stdev + input[3] = 0 // jump_mean (not used) + input[4] = 0 // jump_stdev (not used) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.scaffolds, + process.out.script, + process.out.log, + process.out.versions_masurca + ).match() } + ) + } + + } + + test("genomeassembler - hybrid - illumina_nanopore") { + + when { + process { + """ + input[0] = [ + [ id:'test'], + [ + file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/SR_Col-0_test_data_1.fastq.gz", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/SR_Col-0_test_data_2.fastq.gz", checkIfExists: true) + ], + [], // no jump reads + [], // no pacbio + [file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/ONT-Col-0_test_data.fastq.gz", checkIfExists: true)], + [], // no other reads + [] // no reference genome + ] + input[1] = 500 // fragment_mean + input[2] = 50 // fragment_stdev + input[3] = 0 // jump_mean (not used) + input[4] = 0 // jump_stdev (not used) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.scaffolds, + process.out.script, + process.out.log, + process.out.versions_masurca + ).match() } + ) + } + + } + + test("genomeassembler - hybrid - illumina_pacbio_nanopore") { + + when { + process { + """ + input[0] = [ + [ id:'test'], + [ + file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/SR_Col-0_test_data_1.fastq.gz", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/SR_Col-0_test_data_2.fastq.gz", checkIfExists: true) + ], + [], // no jump reads + [file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/HiFi-Col-0_test_data.fastq.gz", checkIfExists: true)], + [file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/ONT-Col-0_test_data.fastq.gz", checkIfExists: true)], + [], // no other reads + [] // no reference genome ] input[1] = 500 // fragment_mean input[2] = 50 // fragment_stdev @@ -272,7 +231,6 @@ nextflow_process { then { assertAll( { assert snapshot( - process.out.contigs, process.out.scaffolds, process.out.script, process.out.log, @@ -283,6 +241,45 @@ nextflow_process { } +// test("sarscov2 - illumina - with_reference") { +// +// when { +// process { +// """ +// input[0] = [ +// [ id:'test'], +// [ +// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true), +// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz", checkIfExists: true) +// ], +// [], // no jump reads +// [], // no pacbio +// [], // no nanopore +// [], // no other reads +// [file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta.gz", checkIfExists: true)] +// ] +// input[1] = 500 // fragment_mean +// input[2] = 50 // fragment_stdev +// input[3] = 0 // jump_mean (not used) +// input[4] = 0 // jump_stdev (not used) +// """ +// } +// } + +// then { +// assertAll( +// { assert snapshot( +// process.out.contigs, +// process.out.scaffolds, +// process.out.script, +// process.out.log, +// process.out.versions_masurca +// ).match() } +// ) +// } +// +// } + test("sarscov2 - illumina - paired_end - stub") { options "-stub" @@ -291,7 +288,7 @@ nextflow_process { process { """ input[0] = [ - [ id:'test', single_end:false ], + [ id:'test'], [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) diff --git a/modules/nf-core/masurca/tests/main.nf.test.snap b/modules/nf-core/masurca/tests/main.nf.test.snap index ad490f7042ca..c9dc908555b6 100644 --- a/modules/nf-core/masurca/tests/main.nf.test.snap +++ b/modules/nf-core/masurca/tests/main.nf.test.snap @@ -17,11 +17,226 @@ ] ], + "timestamp": "2026-03-23T16:33:30.420661493", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.3" - }, - "timestamp": "2026-03-23T16:33:30.420661493" + } + }, + "homo_sapiens - illumina - single_end": { + "content": [ + [ + + ], + [ + + ], + [ + + ], + [ + + ] + ], + "timestamp": "2026-03-24T12:00:30.630209268", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "genomeassembler - hybrid - illumina_pacbio_nanopore": { + "content": [ + [ + [ + { + "id": "test" + }, + "primary.genome.scf.fasta:md5,2efaf8caf4ee23f5aefdd52c872e72de" + ] + ], + [ + [ + { + "id": "test" + }, + "assemble.sh:md5,86b0e10f065a071e96b0b51d4ea1e0ed" + ] + ], + [ + [ + { + "id": "test" + }, + "test-masurca.log:md5,2ddf4082e9f6c15411bd71b789e08d03" + ] + ], + [ + [ + "MASURCA", + "masurca", + "4.1.4" + ] + ] + ], + "timestamp": "2026-03-24T23:06:29.815457652", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "sarscov2 - illumina - paired_end": { + "content": [ + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ] + ], + "timestamp": "2026-03-23T16:33:21.946468822", + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + } + }, + "sarscov2 - illumina - paired_end - stub": { + "content": [ + { + "config": [ + [ + { + "id": "test" + }, + "test_masurca_config.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test" + }, + "test-masurca.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "scaffolds": [ + [ + { + "id": "test" + }, + "primary.genome.scf.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "script": [ + [ + { + "id": "test" + }, + "assemble.sh:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_masurca": [ + [ + "MASURCA", + "masurca", + "4.1.4" + ] + ] + } + ], + "timestamp": "2026-03-24T21:44:20.998579705", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "genomeassembler - hybrid - illumina_pacbio": { + "content": [ + [ + [ + { + "id": "test" + }, + "primary.genome.scf.fasta:md5,410641699039df2186ead1fd773e8ef6" + ] + ], + [ + [ + { + "id": "test" + }, + "assemble.sh:md5,d0b63ee04399206b0119e0923d98a45b" + ] + ], + [ + [ + { + "id": "test" + }, + "test-masurca.log:md5,e9e2395995ae101377dda494723e0fb7" + ] + ], + [ + [ + "MASURCA", + "masurca", + "4.1.4" + ] + ] + ], + "timestamp": "2026-03-24T23:01:43.498317838", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "homo_sapiens - illumina - paired_end": { + "content": [ + [ + [ + { + "id": "test" + }, + "primary.genome.scf.fasta:md5,e7578c7d3cbbc2f521cfb8ca723080ab" + ] + ], + [ + [ + { + "id": "test" + }, + "assemble.sh:md5,d13e210a74a167db3def9400dbce80fc" + ] + ], + [ + [ + { + "id": "test" + }, + "test-masurca.log:md5,cd27376502e314e826200e1de0a7d79e" + ] + ], + [ + [ + "MASURCA", + "masurca", + "4.1.4" + ] + ] + ], + "timestamp": "2026-03-24T22:51:36.485831994", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } }, "sarscov2 - hybrid - illumina_nanopore": { "content": [ @@ -38,11 +253,11 @@ ] ], + "timestamp": "2026-03-23T16:39:05.105234025", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-23T16:39:05.105234025" + } }, "sarscov2 - illumina - single_end": { "content": [ @@ -85,13 +300,13 @@ ] } ], + "timestamp": "2026-03-20T16:53:14.230197251", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.3" - }, - "timestamp": "2026-03-20T16:53:14.230197251" + } }, - "sarscov2 - illumina - paired_end": { + "sarscov2 - hybrid - illumina_pacbio": { "content": [ [ @@ -104,69 +319,93 @@ ], [ - ], - [ - ] ], + "timestamp": "2026-03-23T16:38:57.588039397", "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.3" - }, - "timestamp": "2026-03-23T16:33:21.946468822" + "nextflow": "25.10.4" + } }, - "sarscov2 - hybrid - illumina_pacbio": { + "homo_sapiens - illumina - paired_end - with_jump": { "content": [ [ - + [ + { + "id": "test" + }, + "primary.genome.scf.fasta:md5,3beee4f499100edd3f4f02fab8edc1a5" + ] ], [ - + [ + { + "id": "test" + }, + "assemble.sh:md5,c9d61b83d203d2882300164205b79c8a" + ] ], [ - + [ + { + "id": "test" + }, + "test-masurca.log:md5,0b390f436b223beabac17b283b075fc5" + ] ], [ - + [ + "MASURCA", + "masurca", + "4.1.4" + ] ] ], + "timestamp": "2026-03-24T22:59:02.078300748", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.4", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-23T16:38:57.588039397" + } }, - "sarscov2 - illumina - paired_end - stub": { + "genomeassembler - hybrid - illumina_nanopore": { "content": [ - { - "config": [ - - ], - "contigs": [ - - ], - "flye_assembly": [ - - ], - "log": [ - - ], - "scaffolds": [ - - ], - "script": [ - - ], - "versions_masurca": [ - + [ + [ + { + "id": "test" + }, + "primary.genome.scf.fasta:md5,3d495a73c465ed3b339c746d310d36f4" ] - } + ], + [ + [ + { + "id": "test" + }, + "assemble.sh:md5,9342ce732b095ea5f422070f5d9b601d" + ] + ], + [ + [ + { + "id": "test" + }, + "test-masurca.log:md5,c3ec9fabae6523d9074b40cd9713f959" + ] + ], + [ + [ + "MASURCA", + "masurca", + "4.1.4" + ] + ] ], + "timestamp": "2026-03-24T23:03:29.504211443", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.4", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-23T16:39:42.421134682" + } }, "sarscov2 - illumina - with_reference": { "content": [ @@ -186,10 +425,10 @@ ] ], + "timestamp": "2026-03-23T16:39:33.587772541", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-23T16:39:33.587772541" + } } } \ No newline at end of file From c8f84d0ddba4678b35824d7864a0d92ec311c1b8 Mon Sep 17 00:00:00 2001 From: LiaOb21 Date: Wed, 25 Mar 2026 18:25:35 +0000 Subject: [PATCH 04/10] still work in progress --- modules/nf-core/masurca/main.nf | 25 +--- modules/nf-core/masurca/meta.yml | 11 +- modules/nf-core/masurca/tests/main.nf.test | 75 ++++++----- .../nf-core/masurca/tests/main.nf.test.snap | 124 ++++++++++-------- 4 files changed, 116 insertions(+), 119 deletions(-) diff --git a/modules/nf-core/masurca/main.nf b/modules/nf-core/masurca/main.nf index 7c1fa4245abd..f210c0b230ee 100644 --- a/modules/nf-core/masurca/main.nf +++ b/modules/nf-core/masurca/main.nf @@ -1,28 +1,11 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules/nf-core/ -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - process MASURCA { tag "$meta.id" label 'process_high' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/cf/cf6402ed20c3b089ab88cd8884ddace90693501453a515f9188ae681e8ca8556/data': - 'community.wave.seqera.io/library/masurca:4.1.4--d05ef74c4881d55c' }" + 'oras://community.wave.seqera.io/library/coreutils_file_masurca_mummer_perl:73ce913377915362': + 'community.wave.seqera.io/library/coreutils_file_masurca_mummer_perl:93f95b0aad1db22b' }" input: tuple val(meta), path(illumina), path(jump), path(pacbio), path(nanopore), path(other_reads), path(reference_genome) @@ -47,8 +30,8 @@ process MASURCA { def prefix = task.ext.prefix ?: "${meta.id}" //get input reads with absolute paths - illumina are mandatory, jump/pacbio/nanopore are optional - def illumina_reads = illumina.collect { it.toRealPath() }.join(' ') - def jump_reads = jump ? jump.collect { it.toRealPath() }.join(' ') : "" + def illumina_reads = [illumina].flatten().collect { it.toRealPath() }.join(' ') + def jump_reads = jump ? [jump].flatten().collect { it.toRealPath() }.join(' ') : "" def pacbio_file = pacbio ? pacbio.toRealPath() : "" def nanopore_file = nanopore ? nanopore.toRealPath() : "" def other_reads_file = other_reads ? other_reads.toRealPath() : "" diff --git a/modules/nf-core/masurca/meta.yml b/modules/nf-core/masurca/meta.yml index 45a72409d119..69496bb5d4e6 100644 --- a/modules/nf-core/masurca/meta.yml +++ b/modules/nf-core/masurca/meta.yml @@ -1,19 +1,18 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -# # TODO nf-core: Add a description of the module and list keywords name: "masurca" -description: write your description here +description: The MaSuRCA (Maryland Super Read Cabog Assembler) genome assembly and analysis toolkit keywords: - - sort - - example + - denovo + - assembly + - debruijn - genomics tools: - ## TODO nf-core: Add a description and other details for the software below - "masurca": description: "MaSuRCA (Maryland Super-Read Celera Assembler) genome assembly software." homepage: "https://github.com/alekseyzimin/masurca/blob/v4.1.4/README.md" documentation: "https://github.com/alekseyzimin/masurca/blob/v4.1.4/README.md" tool_dev_url: "https://github.com/alekseyzimin/masurca" - doi: "" + doi: "10.1101/gr.213405.116" licence: ["GPL v3"] identifier: biotools:masurca diff --git a/modules/nf-core/masurca/tests/main.nf.test b/modules/nf-core/masurca/tests/main.nf.test index 1d6a1afe517d..1f9d8f181c7e 100644 --- a/modules/nf-core/masurca/tests/main.nf.test +++ b/modules/nf-core/masurca/tests/main.nf.test @@ -241,44 +241,43 @@ nextflow_process { } -// test("sarscov2 - illumina - with_reference") { -// -// when { -// process { -// """ -// input[0] = [ -// [ id:'test'], -// [ -// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true), -// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz", checkIfExists: true) -// ], -// [], // no jump reads -// [], // no pacbio -// [], // no nanopore -// [], // no other reads -// [file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta.gz", checkIfExists: true)] -// ] -// input[1] = 500 // fragment_mean -// input[2] = 50 // fragment_stdev -// input[3] = 0 // jump_mean (not used) -// input[4] = 0 // jump_stdev (not used) -// """ -// } -// } - -// then { -// assertAll( -// { assert snapshot( -// process.out.contigs, -// process.out.scaffolds, -// process.out.script, -// process.out.log, -// process.out.versions_masurca -// ).match() } -// ) -// } -// -// } + test("sarscov2 - illumina - with_reference") { + + when { + process { + """ + input[0] = [ + [ id:'test'], + [ + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz", checkIfExists: true) + ], + [], // no jump reads + [], // no pacbio + [], // no nanopore + [], // no other reads + [file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta.gz", checkIfExists: true)] + ] + input[1] = 500 // fragment_mean + input[2] = 50 // fragment_stdev + input[3] = 0 // jump_mean (not used) + input[4] = 0 // jump_stdev (not used) + """ + } + } + + then { + assertAll( + { assert snapshot( + process.out.scaffolds, + process.out.script, + process.out.log, + process.out.versions_masurca + ).match() } + ) + } + + } test("sarscov2 - illumina - paired_end - stub") { diff --git a/modules/nf-core/masurca/tests/main.nf.test.snap b/modules/nf-core/masurca/tests/main.nf.test.snap index c9dc908555b6..d02445871f0e 100644 --- a/modules/nf-core/masurca/tests/main.nf.test.snap +++ b/modules/nf-core/masurca/tests/main.nf.test.snap @@ -17,32 +17,51 @@ ] ], - "timestamp": "2026-03-23T16:33:30.420661493", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.3" - } + }, + "timestamp": "2026-03-23T16:33:30.420661493" }, "homo_sapiens - illumina - single_end": { "content": [ [ - + [ + { + "id": "test" + }, + "primary.genome.scf.fasta:md5,d3365e7157fb041cb4006a2d11001762" + ] ], [ - + [ + { + "id": "test" + }, + "assemble.sh:md5,72fb3063dc7370a4b2a95b93234bcf0e" + ] ], [ - + [ + { + "id": "test" + }, + "test-masurca.log:md5,2a5a4548af7550a71e43cc82068f415f" + ] ], [ - + [ + "MASURCA", + "masurca", + "4.1.4" + ] ] ], - "timestamp": "2026-03-24T12:00:30.630209268", "meta": { - "nf-test": "0.9.4", + "nf-test": "0.9.3", "nextflow": "25.10.4" - } + }, + "timestamp": "2026-03-25T17:52:08.347930423" }, "genomeassembler - hybrid - illumina_pacbio_nanopore": { "content": [ @@ -51,7 +70,7 @@ { "id": "test" }, - "primary.genome.scf.fasta:md5,2efaf8caf4ee23f5aefdd52c872e72de" + "primary.genome.scf.fasta:md5,b4e3d34bb6c27bc38a5e5aafc9367bb1" ] ], [ @@ -59,7 +78,7 @@ { "id": "test" }, - "assemble.sh:md5,86b0e10f065a071e96b0b51d4ea1e0ed" + "assemble.sh:md5,1e0437f3c91ef409b9b743cc4dc4c3f5" ] ], [ @@ -67,7 +86,7 @@ { "id": "test" }, - "test-masurca.log:md5,2ddf4082e9f6c15411bd71b789e08d03" + "test-masurca.log:md5,ef2bb59b6dd640565bad9825c540b032" ] ], [ @@ -78,11 +97,11 @@ ] ] ], - "timestamp": "2026-03-24T23:06:29.815457652", "meta": { - "nf-test": "0.9.4", + "nf-test": "0.9.3", "nextflow": "25.10.4" - } + }, + "timestamp": "2026-03-25T18:05:57.515112182" }, "sarscov2 - illumina - paired_end": { "content": [ @@ -102,11 +121,11 @@ ] ], - "timestamp": "2026-03-23T16:33:21.946468822", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.3" - } + }, + "timestamp": "2026-03-23T16:33:21.946468822" }, "sarscov2 - illumina - paired_end - stub": { "content": [ @@ -152,11 +171,11 @@ ] } ], - "timestamp": "2026-03-24T21:44:20.998579705", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" - } + }, + "timestamp": "2026-03-24T21:44:20.998579705" }, "genomeassembler - hybrid - illumina_pacbio": { "content": [ @@ -165,7 +184,7 @@ { "id": "test" }, - "primary.genome.scf.fasta:md5,410641699039df2186ead1fd773e8ef6" + "primary.genome.scf.fasta:md5,f15d1bf333065e05cd96d46fe1c45f9f" ] ], [ @@ -173,7 +192,7 @@ { "id": "test" }, - "assemble.sh:md5,d0b63ee04399206b0119e0923d98a45b" + "assemble.sh:md5,dfe0ecc3e08715e5fdca93eed0c8f3bd" ] ], [ @@ -181,7 +200,7 @@ { "id": "test" }, - "test-masurca.log:md5,e9e2395995ae101377dda494723e0fb7" + "test-masurca.log:md5,e2f44ba808dfd6d759b97fb177d02049" ] ], [ @@ -192,11 +211,11 @@ ] ] ], - "timestamp": "2026-03-24T23:01:43.498317838", "meta": { - "nf-test": "0.9.4", + "nf-test": "0.9.3", "nextflow": "25.10.4" - } + }, + "timestamp": "2026-03-25T18:01:40.129969288" }, "homo_sapiens - illumina - paired_end": { "content": [ @@ -205,7 +224,7 @@ { "id": "test" }, - "primary.genome.scf.fasta:md5,e7578c7d3cbbc2f521cfb8ca723080ab" + "primary.genome.scf.fasta:md5,f8e715fa707cf39603cf09b700666d57" ] ], [ @@ -213,7 +232,7 @@ { "id": "test" }, - "assemble.sh:md5,d13e210a74a167db3def9400dbce80fc" + "assemble.sh:md5,c4252fd02352bc365b95ac090c8bda6c" ] ], [ @@ -221,7 +240,7 @@ { "id": "test" }, - "test-masurca.log:md5,cd27376502e314e826200e1de0a7d79e" + "test-masurca.log:md5,6e9580c582a91f814b9870238e9d80ca" ] ], [ @@ -232,11 +251,11 @@ ] ] ], - "timestamp": "2026-03-24T22:51:36.485831994", "meta": { - "nf-test": "0.9.4", + "nf-test": "0.9.3", "nextflow": "25.10.4" - } + }, + "timestamp": "2026-03-25T17:53:33.025513309" }, "sarscov2 - hybrid - illumina_nanopore": { "content": [ @@ -253,11 +272,11 @@ ] ], - "timestamp": "2026-03-23T16:39:05.105234025", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.4" - } + }, + "timestamp": "2026-03-23T16:39:05.105234025" }, "sarscov2 - illumina - single_end": { "content": [ @@ -300,11 +319,11 @@ ] } ], - "timestamp": "2026-03-20T16:53:14.230197251", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.3" - } + }, + "timestamp": "2026-03-20T16:53:14.230197251" }, "sarscov2 - hybrid - illumina_pacbio": { "content": [ @@ -321,11 +340,11 @@ ] ], - "timestamp": "2026-03-23T16:38:57.588039397", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.4" - } + }, + "timestamp": "2026-03-23T16:38:57.588039397" }, "homo_sapiens - illumina - paired_end - with_jump": { "content": [ @@ -334,7 +353,7 @@ { "id": "test" }, - "primary.genome.scf.fasta:md5,3beee4f499100edd3f4f02fab8edc1a5" + "primary.genome.scf.fasta:md5,cae25f87f26c09f972eae5aa1cc65617" ] ], [ @@ -342,7 +361,7 @@ { "id": "test" }, - "assemble.sh:md5,c9d61b83d203d2882300164205b79c8a" + "assemble.sh:md5,a14782d876cbb76684e7599489a84b0b" ] ], [ @@ -350,7 +369,7 @@ { "id": "test" }, - "test-masurca.log:md5,0b390f436b223beabac17b283b075fc5" + "test-masurca.log:md5,7ed0e3502298ef267e4fc61758edb564" ] ], [ @@ -361,11 +380,11 @@ ] ] ], - "timestamp": "2026-03-24T22:59:02.078300748", "meta": { - "nf-test": "0.9.4", + "nf-test": "0.9.3", "nextflow": "25.10.4" - } + }, + "timestamp": "2026-03-25T17:59:29.231599584" }, "genomeassembler - hybrid - illumina_nanopore": { "content": [ @@ -374,7 +393,7 @@ { "id": "test" }, - "primary.genome.scf.fasta:md5,3d495a73c465ed3b339c746d310d36f4" + "primary.genome.scf.fasta:md5,2181552d037a61458d322f732fe814dd" ] ], [ @@ -382,7 +401,7 @@ { "id": "test" }, - "assemble.sh:md5,9342ce732b095ea5f422070f5d9b601d" + "assemble.sh:md5,b8268c6dcb9d7b36b7312967359a73b5" ] ], [ @@ -390,7 +409,7 @@ { "id": "test" }, - "test-masurca.log:md5,c3ec9fabae6523d9074b40cd9713f959" + "test-masurca.log:md5,635979d40fc9bc1e159a10185637bcbe" ] ], [ @@ -401,11 +420,11 @@ ] ] ], - "timestamp": "2026-03-24T23:03:29.504211443", "meta": { - "nf-test": "0.9.4", + "nf-test": "0.9.3", "nextflow": "25.10.4" - } + }, + "timestamp": "2026-03-25T18:03:32.971062838" }, "sarscov2 - illumina - with_reference": { "content": [ @@ -420,15 +439,12 @@ ], [ - ], - [ - ] ], - "timestamp": "2026-03-23T16:39:33.587772541", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.4" - } + }, + "timestamp": "2026-03-25T15:27:36.181316757" } } \ No newline at end of file From d6e1274896a172ce22e5193dad7f5908f99111f5 Mon Sep 17 00:00:00 2001 From: LiaOb21 Date: Wed, 25 Mar 2026 18:26:08 +0000 Subject: [PATCH 05/10] still work in progress --- modules/nf-core/masurca/main.nf | 7 ------- 1 file changed, 7 deletions(-) diff --git a/modules/nf-core/masurca/main.nf b/modules/nf-core/masurca/main.nf index f210c0b230ee..468928e35d46 100644 --- a/modules/nf-core/masurca/main.nf +++ b/modules/nf-core/masurca/main.nf @@ -34,7 +34,6 @@ process MASURCA { def jump_reads = jump ? [jump].flatten().collect { it.toRealPath() }.join(' ') : "" def pacbio_file = pacbio ? pacbio.toRealPath() : "" def nanopore_file = nanopore ? nanopore.toRealPath() : "" - def other_reads_file = other_reads ? other_reads.toRealPath() : "" def reference_genome_file = reference_genome ? reference_genome.toRealPath() : "" // Configuration parameters with defaults from task.ext @@ -74,12 +73,6 @@ process MASURCA { echo "NANOPORE=${nanopore_file}" >> ${prefix}_masurca_config.txt fi - # Other reads (optional) - Sanger, 454, etc. - if [ -n "${other_reads_file}" ]; then - echo "#Other reads (Sanger, 454, etc) one frg file, concatenate your frg files into one if you have many" >> ${prefix}_masurca_config.txt - echo "OTHER=${other_reads_file}" >> ${prefix}_masurca_config.txt - fi - # Reference genome (optional) - for synteny-assisted assembly if [ -n "${reference_genome_file}" ]; then echo "#synteny-assisted assembly, concatenate all reference genomes into one reference.fa; works for Illumina-only data" >> ${prefix}_masurca_config.txt From fd692f5332ec430b2fb7539466f53c84d270e837 Mon Sep 17 00:00:00 2001 From: LiaOb21 Date: Thu, 2 Apr 2026 16:55:18 +0100 Subject: [PATCH 06/10] Add .vscode from master --- .vscode/extensions.json | 3 +++ .vscode/settings.json | 8 ++++++++ 2 files changed, 11 insertions(+) create mode 100644 .vscode/extensions.json create mode 100644 .vscode/settings.json diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 000000000000..110de1157123 --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,3 @@ +{ + "recommendations": ["nextflow.nextflow", "nf-core.nf-core-extensionpack"] +} diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000000..a2676d66e969 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,8 @@ +{ + "nextflow.formatting.harshilAlignment": true, + "yaml.schemas": { + "./modules/meta-schema.json": ["modules/nf-core/**/meta.yml"], + "./modules/environment-schema.json": ["modules/nf-core/**/environment.yml"], + "./subworkflows/yaml-schema.json": ["subworkflows/nf-core/**/meta.yml"] + } +} From 999d62e0e2bc149ae8fb17e286b81973c6795769 Mon Sep 17 00:00:00 2001 From: LiaOb21 Date: Thu, 2 Apr 2026 17:13:31 +0100 Subject: [PATCH 07/10] attempt to set up tests... failed --- modules/nf-core/masurca/main.nf | 22 +- modules/nf-core/masurca/tests/main.nf.test | 156 ++++++------ .../nf-core/masurca/tests/main.nf.test.snap | 228 ++++++------------ 3 files changed, 167 insertions(+), 239 deletions(-) diff --git a/modules/nf-core/masurca/main.nf b/modules/nf-core/masurca/main.nf index 468928e35d46..98aae9b66795 100644 --- a/modules/nf-core/masurca/main.nf +++ b/modules/nf-core/masurca/main.nf @@ -3,12 +3,10 @@ process MASURCA { label 'process_high' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://community.wave.seqera.io/library/coreutils_file_masurca_mummer_perl:73ce913377915362': - 'community.wave.seqera.io/library/coreutils_file_masurca_mummer_perl:93f95b0aad1db22b' }" + container "quay.io/ecoflowucl/masurca@sha256:49509d5c7d5e95e7de38127bb2d9bdd14e0d736e7397928132d05f2611aead05" input: - tuple val(meta), path(illumina), path(jump), path(pacbio), path(nanopore), path(other_reads), path(reference_genome) + tuple val(meta), path(illumina), path(jump), path(pacbio), path(nanopore), path(other_reads) val fragment_mean val fragment_stdev val jump_mean @@ -17,7 +15,7 @@ process MASURCA { output: tuple val(meta), path("assemble.sh") , emit: script - tuple val(meta), path("CA*/primary.genome.scf.fasta") , emit: scaffolds + tuple val(meta), path("*scaffolds.fa.gz") , emit: scaffolds tuple val(meta), path("*_masurca_config.txt") , emit: config tuple val(meta), path("*-masurca.log") , emit: log tuple val("${task.process}"), val('masurca'), eval("masurca --version | sed 's/version //g'"), topic: versions, emit: versions_masurca @@ -34,7 +32,6 @@ process MASURCA { def jump_reads = jump ? [jump].flatten().collect { it.toRealPath() }.join(' ') : "" def pacbio_file = pacbio ? pacbio.toRealPath() : "" def nanopore_file = nanopore ? nanopore.toRealPath() : "" - def reference_genome_file = reference_genome ? reference_genome.toRealPath() : "" // Configuration parameters with defaults from task.ext def extend_jump_reads = task.ext.extend_jump_reads != null ? task.ext.extend_jump_reads : 0 @@ -73,12 +70,6 @@ process MASURCA { echo "NANOPORE=${nanopore_file}" >> ${prefix}_masurca_config.txt fi - # Reference genome (optional) - for synteny-assisted assembly - if [ -n "${reference_genome_file}" ]; then - echo "#synteny-assisted assembly, concatenate all reference genomes into one reference.fa; works for Illumina-only data" >> ${prefix}_masurca_config.txt - echo "REFERENCE=${reference_genome_file}" >> ${prefix}_masurca_config.txt - fi - echo "END" >> ${prefix}_masurca_config.txt @@ -122,6 +113,10 @@ process MASURCA { masurca ${prefix}_masurca_config.txt ./assemble.sh > ${prefix}-masurca.log 2>&1 + + if [ -f CA*/primary.genome.scf.fasta ]; then + gzip -cn CA*/primary.genome.scf.fasta > ${prefix}.scaffolds.fa.gz + fi """ stub: @@ -131,7 +126,8 @@ process MASURCA { mkdir -p CA touch assemble.sh touch ${prefix}_masurca_config.txt - touch CA/primary.genome.scf.fasta + touch ${prefix}.scaffolds.fa.gz touch ${prefix}-masurca.log """ } + \ No newline at end of file diff --git a/modules/nf-core/masurca/tests/main.nf.test b/modules/nf-core/masurca/tests/main.nf.test index 1f9d8f181c7e..3b0ef3bdc67a 100644 --- a/modules/nf-core/masurca/tests/main.nf.test +++ b/modules/nf-core/masurca/tests/main.nf.test @@ -19,8 +19,7 @@ nextflow_process { [], // no jump reads [], // no pacbio [], // no nanopore - [], // no other reads - [] // no reference genome + [] // no other readse ] input[1] = 500 // fragment_mean input[2] = 50 // fragment_stdev @@ -33,11 +32,19 @@ nextflow_process { then { assertAll( { assert process.success }, + { assert path(process.out.scaffolds[0][1]).linesGzip.any { it.contains('AAA') } }, + { assert path(process.out.scaffolds[0][1]).linesGzip.any { it.contains('CCC') } }, + { assert path(process.out.script[0][1]).readLines().any { it.contains("# assemble.sh generated by masurca") } }, + { assert path(process.out.script[0][1]).readLines().any { it.contains("head -n 1 ESTIMATED_GENOME_SIZE.txt") } }, + { assert path(process.out.config[0][1]).readLines().last { it.contains("END") } }, + { assert path(process.out.log[0][1]).readLines().any { it.contains("All done") } }, + { assert path(process.out.log[0][1]).readLines().any { it.contains("primary.genome.scf.fasta") } }, { assert snapshot( - process.out.scaffolds, - process.out.script, - process.out.log, - process.out.versions_masurca + file(process.out.scaffolds[0][1]).name, + file(process.out.script[0][1]).name, + file(process.out.config[0][1]).name, + file(process.out.log[0][1]).name, + process.out.findAll { key, val -> key.startsWith('versions') } ).match() } ) } @@ -58,8 +65,7 @@ nextflow_process { [], // no jump reads [], // no pacbio [], // no nanopore - [], // no other reads - [] // no reference genome + [] // no other reads ] input[1] = 500 // fragment_mean input[2] = 50 // fragment_stdev @@ -72,11 +78,19 @@ nextflow_process { then { assertAll( { assert process.success }, + { assert path(process.out.scaffolds[0][1]).linesGzip.any { it.contains('AAA') } }, + { assert path(process.out.scaffolds[0][1]).linesGzip.any { it.contains('CCC') } }, + { assert path(process.out.script[0][1]).readLines().any { it.contains("# assemble.sh generated by masurca") } }, + { assert path(process.out.script[0][1]).readLines().any { it.contains("head -n 1 ESTIMATED_GENOME_SIZE.txt") } }, + { assert path(process.out.config[0][1]).readLines().last { it.contains("END") } }, + { assert path(process.out.log[0][1]).readLines().any { it.contains("All done") } }, + { assert path(process.out.log[0][1]).readLines().any { it.contains("primary.genome.scf.fasta") } }, { assert snapshot( - process.out.scaffolds, - process.out.script, - process.out.log, - process.out.versions_masurca + file(process.out.scaffolds[0][1]).name, + file(process.out.script[0][1]).name, + file(process.out.config[0][1]).name, + file(process.out.log[0][1]).name, + process.out.findAll { key, val -> key.startsWith('versions') } ).match() } ) } @@ -100,8 +114,7 @@ nextflow_process { ], [], // no pacbio [], // no nanopore - [], // no other reads - [] // no reference genome + [] // no other reads ] input[1] = 500 // fragment_mean input[2] = 50 // fragment_stdev @@ -114,11 +127,19 @@ nextflow_process { then { assertAll( { assert process.success }, + { assert path(process.out.scaffolds[0][1]).linesGzip.any { it.contains('AAA') } }, + { assert path(process.out.scaffolds[0][1]).linesGzip.any { it.contains('CCC') } }, + { assert path(process.out.script[0][1]).readLines().any { it.contains("# assemble.sh generated by masurca") } }, + { assert path(process.out.script[0][1]).readLines().any { it.contains("head -n 1 ESTIMATED_GENOME_SIZE.txt") } }, + { assert path(process.out.config[0][1]).readLines().last { it.contains("END") } }, + { assert path(process.out.log[0][1]).readLines().any { it.contains("All done") } }, + { assert path(process.out.log[0][1]).readLines().any { it.contains("primary.genome.scf.fasta") } }, { assert snapshot( - process.out.scaffolds, - process.out.script, - process.out.log, - process.out.versions_masurca + file(process.out.scaffolds[0][1]).name, + file(process.out.script[0][1]).name, + file(process.out.config[0][1]).name, + file(process.out.log[0][1]).name, + process.out.findAll { key, val -> key.startsWith('versions') } ).match() } ) } @@ -139,8 +160,7 @@ nextflow_process { [], // no jump reads [file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/HiFi-Col-0_test_data.fastq.gz", checkIfExists: true)], [], // no nanopore - [], // no other reads - [] // no reference genome + [] // no other reads ] input[1] = 500 // fragment_mean input[2] = 50 // fragment_stdev @@ -153,11 +173,19 @@ nextflow_process { then { assertAll( { assert process.success }, + { assert path(process.out.scaffolds[0][1]).linesGzip.any { it.contains('AAA') } }, + { assert path(process.out.scaffolds[0][1]).linesGzip.any { it.contains('CCC') } }, + { assert path(process.out.script[0][1]).readLines().any { it.contains("# assemble.sh generated by masurca") } }, + { assert path(process.out.script[0][1]).readLines().any { it.contains("head -n 1 ESTIMATED_GENOME_SIZE.txt") } }, + { assert path(process.out.config[0][1]).readLines().last { it.contains("END") } }, + { assert path(process.out.log[0][1]).readLines().any { it.contains("All done") } }, + { assert path(process.out.log[0][1]).readLines().any { it.contains("primary.genome.scf.fasta") } }, { assert snapshot( - process.out.scaffolds, - process.out.script, - process.out.log, - process.out.versions_masurca + file(process.out.scaffolds[0][1]).name, + file(process.out.script[0][1]).name, + file(process.out.config[0][1]).name, + file(process.out.log[0][1]).name, + process.out.findAll { key, val -> key.startsWith('versions') } ).match() } ) } @@ -178,8 +206,7 @@ nextflow_process { [], // no jump reads [], // no pacbio [file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/ONT-Col-0_test_data.fastq.gz", checkIfExists: true)], - [], // no other reads - [] // no reference genome + [] // no other reads ] input[1] = 500 // fragment_mean input[2] = 50 // fragment_stdev @@ -192,11 +219,19 @@ nextflow_process { then { assertAll( { assert process.success }, + { assert path(process.out.scaffolds[0][1]).linesGzip.any { it.contains('AAA') } }, + { assert path(process.out.scaffolds[0][1]).linesGzip.any { it.contains('CCC') } }, + { assert path(process.out.script[0][1]).readLines().any { it.contains("# assemble.sh generated by masurca") } }, + { assert path(process.out.script[0][1]).readLines().any { it.contains("head -n 1 ESTIMATED_GENOME_SIZE.txt") } }, + { assert path(process.out.config[0][1]).readLines().last { it.contains("END") } }, + { assert path(process.out.log[0][1]).readLines().any { it.contains("All done") } }, + { assert path(process.out.log[0][1]).readLines().any { it.contains("primary.genome.scf.fasta") } }, { assert snapshot( - process.out.scaffolds, - process.out.script, - process.out.log, - process.out.versions_masurca + file(process.out.scaffolds[0][1]).name, + file(process.out.script[0][1]).name, + file(process.out.config[0][1]).name, + file(process.out.log[0][1]).name, + process.out.findAll { key, val -> key.startsWith('versions') } ).match() } ) } @@ -217,46 +252,7 @@ nextflow_process { [], // no jump reads [file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/HiFi-Col-0_test_data.fastq.gz", checkIfExists: true)], [file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/ONT-Col-0_test_data.fastq.gz", checkIfExists: true)], - [], // no other reads - [] // no reference genome - ] - input[1] = 500 // fragment_mean - input[2] = 50 // fragment_stdev - input[3] = 0 // jump_mean (not used) - input[4] = 0 // jump_stdev (not used) - """ - } - } - - then { - assertAll( - { assert snapshot( - process.out.scaffolds, - process.out.script, - process.out.log, - process.out.versions_masurca - ).match() } - ) - } - - } - - test("sarscov2 - illumina - with_reference") { - - when { - process { - """ - input[0] = [ - [ id:'test'], - [ - file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz", checkIfExists: true) - ], - [], // no jump reads - [], // no pacbio - [], // no nanopore - [], // no other reads - [file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta.gz", checkIfExists: true)] + [] // no other reads ] input[1] = 500 // fragment_mean input[2] = 50 // fragment_stdev @@ -268,11 +264,20 @@ nextflow_process { then { assertAll( + { assert process.success }, + { assert path(process.out.scaffolds[0][1]).linesGzip.any { it.contains('AAA') } }, + { assert path(process.out.scaffolds[0][1]).linesGzip.any { it.contains('CCC') } }, + { assert path(process.out.script[0][1]).readLines().any { it.contains("# assemble.sh generated by masurca") } }, + { assert path(process.out.script[0][1]).readLines().any { it.contains("head -n 1 ESTIMATED_GENOME_SIZE.txt") } }, + { assert path(process.out.config[0][1]).readLines().last { it.contains("END") } }, + { assert path(process.out.log[0][1]).readLines().any { it.contains("All done") } }, + { assert path(process.out.log[0][1]).readLines().any { it.contains("primary.genome.scf.fasta") } }, { assert snapshot( - process.out.scaffolds, - process.out.script, - process.out.log, - process.out.versions_masurca + file(process.out.scaffolds[0][1]).name, + file(process.out.script[0][1]).name, + file(process.out.config[0][1]).name, + file(process.out.log[0][1]).name, + process.out.findAll { key, val -> key.startsWith('versions') } ).match() } ) } @@ -295,8 +300,7 @@ nextflow_process { [], // no jump reads [], // no pacbio [], // no nanopore - [], // no other reads - [] // no reference genome + [] // no other reads ] input[1] = 500 // fragment_mean input[2] = 50 // fragment_stdev diff --git a/modules/nf-core/masurca/tests/main.nf.test.snap b/modules/nf-core/masurca/tests/main.nf.test.snap index d02445871f0e..e3ed40e66900 100644 --- a/modules/nf-core/masurca/tests/main.nf.test.snap +++ b/modules/nf-core/masurca/tests/main.nf.test.snap @@ -17,91 +17,55 @@ ] ], + "timestamp": "2026-03-23T16:33:30.420661493", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.3" - }, - "timestamp": "2026-03-23T16:33:30.420661493" + } }, "homo_sapiens - illumina - single_end": { "content": [ - [ - [ - { - "id": "test" - }, - "primary.genome.scf.fasta:md5,d3365e7157fb041cb4006a2d11001762" - ] - ], - [ - [ - { - "id": "test" - }, - "assemble.sh:md5,72fb3063dc7370a4b2a95b93234bcf0e" - ] - ], - [ - [ - { - "id": "test" - }, - "test-masurca.log:md5,2a5a4548af7550a71e43cc82068f415f" - ] - ], - [ - [ - "MASURCA", - "masurca", - "4.1.4" + "test.scaffolds.fa.gz", + "assemble.sh", + "test_masurca_config.txt", + "test-masurca.log", + { + "versions_masurca": [ + [ + "MASURCA", + "masurca", + "4.1.4" + ] ] - ] + } ], + "timestamp": "2026-04-02T17:02:36.723413139", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.4", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-25T17:52:08.347930423" + } }, "genomeassembler - hybrid - illumina_pacbio_nanopore": { "content": [ - [ - [ - { - "id": "test" - }, - "primary.genome.scf.fasta:md5,b4e3d34bb6c27bc38a5e5aafc9367bb1" - ] - ], - [ - [ - { - "id": "test" - }, - "assemble.sh:md5,1e0437f3c91ef409b9b743cc4dc4c3f5" - ] - ], - [ - [ - { - "id": "test" - }, - "test-masurca.log:md5,ef2bb59b6dd640565bad9825c540b032" - ] - ], - [ - [ - "MASURCA", - "masurca", - "4.1.4" + "test.scaffolds.fa.gz", + "assemble.sh", + "test_masurca_config.txt", + "test-masurca.log", + { + "versions_masurca": [ + [ + "MASURCA", + "masurca", + "4.1.4" + ] ] - ] + } ], + "timestamp": "2026-04-02T17:12:19.186769917", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.4", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-25T18:05:57.515112182" + } }, "sarscov2 - illumina - paired_end": { "content": [ @@ -121,11 +85,11 @@ ] ], + "timestamp": "2026-03-23T16:33:21.946468822", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.3" - }, - "timestamp": "2026-03-23T16:33:21.946468822" + } }, "sarscov2 - illumina - paired_end - stub": { "content": [ @@ -171,51 +135,33 @@ ] } ], + "timestamp": "2026-03-24T21:44:20.998579705", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-24T21:44:20.998579705" + } }, "genomeassembler - hybrid - illumina_pacbio": { "content": [ - [ - [ - { - "id": "test" - }, - "primary.genome.scf.fasta:md5,f15d1bf333065e05cd96d46fe1c45f9f" - ] - ], - [ - [ - { - "id": "test" - }, - "assemble.sh:md5,dfe0ecc3e08715e5fdca93eed0c8f3bd" - ] - ], - [ - [ - { - "id": "test" - }, - "test-masurca.log:md5,e2f44ba808dfd6d759b97fb177d02049" - ] - ], - [ - [ - "MASURCA", - "masurca", - "4.1.4" + "test.scaffolds.fa.gz", + "assemble.sh", + "test_masurca_config.txt", + "test-masurca.log", + { + "versions_masurca": [ + [ + "MASURCA", + "masurca", + "4.1.4" + ] ] - ] + } ], + "timestamp": "2026-04-02T17:06:10.293289505", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.4", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-25T18:01:40.129969288" + } }, "homo_sapiens - illumina - paired_end": { "content": [ @@ -251,11 +197,11 @@ ] ] ], + "timestamp": "2026-03-25T17:53:33.025513309", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-25T17:53:33.025513309" + } }, "sarscov2 - hybrid - illumina_nanopore": { "content": [ @@ -272,11 +218,11 @@ ] ], + "timestamp": "2026-03-23T16:39:05.105234025", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-23T16:39:05.105234025" + } }, "sarscov2 - illumina - single_end": { "content": [ @@ -319,11 +265,11 @@ ] } ], + "timestamp": "2026-03-20T16:53:14.230197251", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.3" - }, - "timestamp": "2026-03-20T16:53:14.230197251" + } }, "sarscov2 - hybrid - illumina_pacbio": { "content": [ @@ -340,11 +286,11 @@ ] ], + "timestamp": "2026-03-23T16:38:57.588039397", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-23T16:38:57.588039397" + } }, "homo_sapiens - illumina - paired_end - with_jump": { "content": [ @@ -380,51 +326,33 @@ ] ] ], + "timestamp": "2026-03-25T17:59:29.231599584", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-25T17:59:29.231599584" + } }, "genomeassembler - hybrid - illumina_nanopore": { "content": [ - [ - [ - { - "id": "test" - }, - "primary.genome.scf.fasta:md5,2181552d037a61458d322f732fe814dd" - ] - ], - [ - [ - { - "id": "test" - }, - "assemble.sh:md5,b8268c6dcb9d7b36b7312967359a73b5" - ] - ], - [ - [ - { - "id": "test" - }, - "test-masurca.log:md5,635979d40fc9bc1e159a10185637bcbe" - ] - ], - [ - [ - "MASURCA", - "masurca", - "4.1.4" + "test.scaffolds.fa.gz", + "assemble.sh", + "test_masurca_config.txt", + "test-masurca.log", + { + "versions_masurca": [ + [ + "MASURCA", + "masurca", + "4.1.4" + ] ] - ] + } ], + "timestamp": "2026-04-02T17:08:18.247500418", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.4", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-25T18:03:32.971062838" + } }, "sarscov2 - illumina - with_reference": { "content": [ @@ -441,10 +369,10 @@ ] ], + "timestamp": "2026-03-25T15:27:36.181316757", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-25T15:27:36.181316757" + } } } \ No newline at end of file From 20d7f8f9f9337281419be0f38c9b81d2da6f25d0 Mon Sep 17 00:00:00 2001 From: LiaOb21 Date: Thu, 2 Apr 2026 18:00:52 +0100 Subject: [PATCH 08/10] syntax error fixed --- modules/nf-core/masurca/tests/main.nf.test | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/nf-core/masurca/tests/main.nf.test b/modules/nf-core/masurca/tests/main.nf.test index 3b0ef3bdc67a..47b342d943b7 100644 --- a/modules/nf-core/masurca/tests/main.nf.test +++ b/modules/nf-core/masurca/tests/main.nf.test @@ -36,7 +36,7 @@ nextflow_process { { assert path(process.out.scaffolds[0][1]).linesGzip.any { it.contains('CCC') } }, { assert path(process.out.script[0][1]).readLines().any { it.contains("# assemble.sh generated by masurca") } }, { assert path(process.out.script[0][1]).readLines().any { it.contains("head -n 1 ESTIMATED_GENOME_SIZE.txt") } }, - { assert path(process.out.config[0][1]).readLines().last { it.contains("END") } }, + { assert path(process.out.config[0][1]).readLines().last().contains("END") }, { assert path(process.out.log[0][1]).readLines().any { it.contains("All done") } }, { assert path(process.out.log[0][1]).readLines().any { it.contains("primary.genome.scf.fasta") } }, { assert snapshot( @@ -82,7 +82,7 @@ nextflow_process { { assert path(process.out.scaffolds[0][1]).linesGzip.any { it.contains('CCC') } }, { assert path(process.out.script[0][1]).readLines().any { it.contains("# assemble.sh generated by masurca") } }, { assert path(process.out.script[0][1]).readLines().any { it.contains("head -n 1 ESTIMATED_GENOME_SIZE.txt") } }, - { assert path(process.out.config[0][1]).readLines().last { it.contains("END") } }, + { assert path(process.out.config[0][1]).readLines().last().contains("END") }, { assert path(process.out.log[0][1]).readLines().any { it.contains("All done") } }, { assert path(process.out.log[0][1]).readLines().any { it.contains("primary.genome.scf.fasta") } }, { assert snapshot( @@ -131,7 +131,7 @@ nextflow_process { { assert path(process.out.scaffolds[0][1]).linesGzip.any { it.contains('CCC') } }, { assert path(process.out.script[0][1]).readLines().any { it.contains("# assemble.sh generated by masurca") } }, { assert path(process.out.script[0][1]).readLines().any { it.contains("head -n 1 ESTIMATED_GENOME_SIZE.txt") } }, - { assert path(process.out.config[0][1]).readLines().last { it.contains("END") } }, + { assert path(process.out.config[0][1]).readLines().last().contains("END") }, { assert path(process.out.log[0][1]).readLines().any { it.contains("All done") } }, { assert path(process.out.log[0][1]).readLines().any { it.contains("primary.genome.scf.fasta") } }, { assert snapshot( @@ -177,7 +177,7 @@ nextflow_process { { assert path(process.out.scaffolds[0][1]).linesGzip.any { it.contains('CCC') } }, { assert path(process.out.script[0][1]).readLines().any { it.contains("# assemble.sh generated by masurca") } }, { assert path(process.out.script[0][1]).readLines().any { it.contains("head -n 1 ESTIMATED_GENOME_SIZE.txt") } }, - { assert path(process.out.config[0][1]).readLines().last { it.contains("END") } }, + { assert path(process.out.config[0][1]).readLines().last().contains("END") }, { assert path(process.out.log[0][1]).readLines().any { it.contains("All done") } }, { assert path(process.out.log[0][1]).readLines().any { it.contains("primary.genome.scf.fasta") } }, { assert snapshot( @@ -223,7 +223,7 @@ nextflow_process { { assert path(process.out.scaffolds[0][1]).linesGzip.any { it.contains('CCC') } }, { assert path(process.out.script[0][1]).readLines().any { it.contains("# assemble.sh generated by masurca") } }, { assert path(process.out.script[0][1]).readLines().any { it.contains("head -n 1 ESTIMATED_GENOME_SIZE.txt") } }, - { assert path(process.out.config[0][1]).readLines().last { it.contains("END") } }, + { assert path(process.out.config[0][1]).readLines().last().contains("END") }, { assert path(process.out.log[0][1]).readLines().any { it.contains("All done") } }, { assert path(process.out.log[0][1]).readLines().any { it.contains("primary.genome.scf.fasta") } }, { assert snapshot( @@ -269,7 +269,7 @@ nextflow_process { { assert path(process.out.scaffolds[0][1]).linesGzip.any { it.contains('CCC') } }, { assert path(process.out.script[0][1]).readLines().any { it.contains("# assemble.sh generated by masurca") } }, { assert path(process.out.script[0][1]).readLines().any { it.contains("head -n 1 ESTIMATED_GENOME_SIZE.txt") } }, - { assert path(process.out.config[0][1]).readLines().last { it.contains("END") } }, + { assert path(process.out.config[0][1]).readLines().last().contains("END") }, { assert path(process.out.log[0][1]).readLines().any { it.contains("All done") } }, { assert path(process.out.log[0][1]).readLines().any { it.contains("primary.genome.scf.fasta") } }, { assert snapshot( From cc2154d6a40edadfe1697016a97658039ef6e535 Mon Sep 17 00:00:00 2001 From: LiaOb21 Date: Thu, 2 Apr 2026 22:55:01 +0100 Subject: [PATCH 09/10] tests work locally --- modules/nf-core/masurca/environment.yml | 3 - modules/nf-core/masurca/main.nf | 13 +- modules/nf-core/masurca/meta.yml | 112 +++++-- modules/nf-core/masurca/tests/main.nf.test | 31 +- .../nf-core/masurca/tests/main.nf.test.snap | 282 ++---------------- 5 files changed, 138 insertions(+), 303 deletions(-) diff --git a/modules/nf-core/masurca/environment.yml b/modules/nf-core/masurca/environment.yml index c949d090cb48..e29097684bfe 100644 --- a/modules/nf-core/masurca/environment.yml +++ b/modules/nf-core/masurca/environment.yml @@ -4,7 +4,4 @@ channels: - conda-forge - bioconda dependencies: - # TODO nf-core: List required Conda package(s). - # Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - # For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - "bioconda::masurca=4.1.4" diff --git a/modules/nf-core/masurca/main.nf b/modules/nf-core/masurca/main.nf index 98aae9b66795..4261580a1173 100644 --- a/modules/nf-core/masurca/main.nf +++ b/modules/nf-core/masurca/main.nf @@ -3,10 +3,10 @@ process MASURCA { label 'process_high' conda "${moduleDir}/environment.yml" - container "quay.io/ecoflowucl/masurca@sha256:49509d5c7d5e95e7de38127bb2d9bdd14e0d736e7397928132d05f2611aead05" + container "ecoflowucl/masurca:v4.1.4" input: - tuple val(meta), path(illumina), path(jump), path(pacbio), path(nanopore), path(other_reads) + tuple val(meta), path(illumina), path(jump), path(pacbio), path(nanopore) val fragment_mean val fragment_stdev val jump_mean @@ -108,10 +108,10 @@ process MASURCA { echo "#DO NOT use if you have less than 15x coverage by long reads." >> ${prefix}_masurca_config.txt echo "FLYE_ASSEMBLY=0" >> ${prefix}_masurca_config.txt echo "END" >> ${prefix}_masurca_config.txt - + # Generate assembly script masurca ${prefix}_masurca_config.txt - + ./assemble.sh > ${prefix}-masurca.log 2>&1 if [ -f CA*/primary.genome.scf.fasta ]; then @@ -122,12 +122,11 @@ process MASURCA { stub: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - """ + """ mkdir -p CA touch assemble.sh touch ${prefix}_masurca_config.txt - touch ${prefix}.scaffolds.fa.gz + echo "" | gzip > ${prefix}.scaffolds.fa.gz touch ${prefix}-masurca.log """ } - \ No newline at end of file diff --git a/modules/nf-core/masurca/meta.yml b/modules/nf-core/masurca/meta.yml index 69496bb5d4e6..ee721e6bed4a 100644 --- a/modules/nf-core/masurca/meta.yml +++ b/modules/nf-core/masurca/meta.yml @@ -1,6 +1,6 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "masurca" -description: The MaSuRCA (Maryland Super Read Cabog Assembler) genome assembly and analysis toolkit +description: The MaSuRCA (Maryland Super Read Cabog Assembler) genome assembly + and analysis toolkit keywords: - denovo - assembly @@ -13,52 +13,114 @@ tools: documentation: "https://github.com/alekseyzimin/masurca/blob/v4.1.4/README.md" tool_dev_url: "https://github.com/alekseyzimin/masurca" doi: "10.1101/gr.213405.116" - licence: ["GPL v3"] + licence: + - "GPL v3" identifier: biotools:masurca - input: - ### TODO nf-core: Add a description of all of the variables used as input - - meta: type: map description: | Groovy Map containing sample information e.g. `[ id:'sample1' ]` - - bam: + - illumina: type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + description: | + Input paired-end FastQ files (R1 and R2). + pattern: "*.{fastq.gz,fastq,fq.gz,fq}" + ontologies: + - edam: http://edamontology.org/format_1930 + - jump: + type: file + description: | + Jump/mate-pair FastQ files. + pattern: "*.{fastq.gz,fastq,fq.gz,fq}" ontologies: - - edam: "http://edamontology.org/format_2572" # BAM - - edam: "http://edamontology.org/format_2573" # CRAM - - edam: "http://edamontology.org/format_3462" # SAM - + - edam: http://edamontology.org/format_1930 + - pacbio: + type: file + description: | + PacBio FastQ files. + pattern: "*.{fastq.gz,fastq,fq.gz,fq}" + ontologies: + - edam: http://edamontology.org/format_1930 + - nanopore: + type: file + description: | + Nanopore FastQ files. + pattern: "*.{fastq.gz,fastq,fq.gz,fq}" + ontologies: + - edam: http://edamontology.org/format_1930 + - fragment_mean: + type: integer + description: Mean fragment size for Illumina paired-end reads + - fragment_stdev: + type: integer + description: Standard deviation of fragment size for Illumina paired-end + reads + - jump_mean: + type: integer + description: Mean fragment size for jump/mate-pair reads + - jump_stdev: + type: integer + description: Standard deviation of fragment size for jump/mate-pair reads output: - ### TODO nf-core: Add a description of all of the variables used as output - bam: + script: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "assemble.sh": + type: file + description: MaSuRCA assembly script + pattern: "assemble.sh" + ontologies: [] + scaffolds: - - meta: type: map description: | Groovy Map containing sample information e.g. `[ id:'sample1' ]` - - "*.bam": + - "*scaffolds.fa.gz": type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + description: Assembled scaffolds in FASTA format + pattern: "*-scaffolds.fa.gz" ontologies: - - edam: "http://edamontology.org/format_2572" # BAM - - edam: "http://edamontology.org/format_2573" # CRAM - - edam: "http://edamontology.org/format_3462" # SAM + - edam: http://edamontology.org/data_0925 + - edam: http://edamontology.org/format_1929 + - edam: http://edamontology.org/format_3989 + config: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*_masurca_config.txt": + type: file + description: MaSuRCA configuration file + pattern: "*_masurca_config.txt" + ontologies: [] + log: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*-masurca.log": + type: file + description: MaSuRCA assembly log file + pattern: "*-masurca.log" + ontologies: [] versions_masurca: - - - "${task.process}": + - - ${task.process}: type: string description: The name of the process - - "masurca": + - masurca: type: string description: The name of the tool - - "masurca --version": + - masurca --version | sed 's/version //g': type: eval description: The expression to obtain the version of the tool - topics: versions: - - ${task.process}: @@ -67,7 +129,7 @@ topics: - masurca: type: string description: The name of the tool - - masurca --version: + - masurca --version | sed 's/version //g': type: eval description: The expression to obtain the version of the tool authors: diff --git a/modules/nf-core/masurca/tests/main.nf.test b/modules/nf-core/masurca/tests/main.nf.test index 47b342d943b7..c4c201d3b5c0 100644 --- a/modules/nf-core/masurca/tests/main.nf.test +++ b/modules/nf-core/masurca/tests/main.nf.test @@ -18,8 +18,7 @@ nextflow_process { [file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true)], [], // no jump reads [], // no pacbio - [], // no nanopore - [] // no other readse + [] // no nanopore ] input[1] = 500 // fragment_mean input[2] = 50 // fragment_stdev @@ -64,8 +63,7 @@ nextflow_process { ], [], // no jump reads [], // no pacbio - [], // no nanopore - [] // no other reads + [] // no nanopore ] input[1] = 500 // fragment_mean input[2] = 50 // fragment_stdev @@ -113,8 +111,7 @@ nextflow_process { file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test2_germline_2.fq.gz", checkIfExists: true) ], [], // no pacbio - [], // no nanopore - [] // no other reads + [] // no nanopore ] input[1] = 500 // fragment_mean input[2] = 50 // fragment_stdev @@ -159,8 +156,7 @@ nextflow_process { ], [], // no jump reads [file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/HiFi-Col-0_test_data.fastq.gz", checkIfExists: true)], - [], // no nanopore - [] // no other reads + [] // no nanopore ] input[1] = 500 // fragment_mean input[2] = 50 // fragment_stdev @@ -205,8 +201,7 @@ nextflow_process { ], [], // no jump reads [], // no pacbio - [file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/ONT-Col-0_test_data.fastq.gz", checkIfExists: true)], - [] // no other reads + [file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/ONT-Col-0_test_data.fastq.gz", checkIfExists: true)] ] input[1] = 500 // fragment_mean input[2] = 50 // fragment_stdev @@ -251,8 +246,7 @@ nextflow_process { ], [], // no jump reads [file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/HiFi-Col-0_test_data.fastq.gz", checkIfExists: true)], - [file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/ONT-Col-0_test_data.fastq.gz", checkIfExists: true)], - [] // no other reads + [file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/ONT-Col-0_test_data.fastq.gz", checkIfExists: true)] ] input[1] = 500 // fragment_mean input[2] = 50 // fragment_stdev @@ -299,8 +293,7 @@ nextflow_process { ], [], // no jump reads [], // no pacbio - [], // no nanopore - [] // no other reads + [] // no nanopore ] input[1] = 500 // fragment_mean input[2] = 50 // fragment_stdev @@ -313,10 +306,16 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(sanitizeOutput(process.out)).match() } + { assert snapshot( + file(process.out.scaffolds[0][1]).name, + file(process.out.script[0][1]).name, + file(process.out.config[0][1]).name, + file(process.out.log[0][1]).name, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } ) } } -} \ No newline at end of file +} diff --git a/modules/nf-core/masurca/tests/main.nf.test.snap b/modules/nf-core/masurca/tests/main.nf.test.snap index e3ed40e66900..be7eb9380be1 100644 --- a/modules/nf-core/masurca/tests/main.nf.test.snap +++ b/modules/nf-core/masurca/tests/main.nf.test.snap @@ -1,28 +1,4 @@ { - "sarscov2 - illumina - paired_end - with_jump": { - "content": [ - [ - - ], - [ - - ], - [ - - ], - [ - - ], - [ - - ] - ], - "timestamp": "2026-03-23T16:33:30.420661493", - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.3" - } - }, "homo_sapiens - illumina - single_end": { "content": [ "test.scaffolds.fa.gz", @@ -67,65 +43,13 @@ "nextflow": "25.10.4" } }, - "sarscov2 - illumina - paired_end": { - "content": [ - [ - - ], - [ - - ], - [ - - ], - [ - - ], - [ - - ] - ], - "timestamp": "2026-03-23T16:33:21.946468822", - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.3" - } - }, "sarscov2 - illumina - paired_end - stub": { "content": [ + "test.scaffolds.fa.gz", + "assemble.sh", + "test_masurca_config.txt", + "test-masurca.log", { - "config": [ - [ - { - "id": "test" - }, - "test_masurca_config.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "log": [ - [ - { - "id": "test" - }, - "test-masurca.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "scaffolds": [ - [ - { - "id": "test" - }, - "primary.genome.scf.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "script": [ - [ - { - "id": "test" - }, - "assemble.sh:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], "versions_masurca": [ [ "MASURCA", @@ -135,7 +59,7 @@ ] } ], - "timestamp": "2026-03-24T21:44:20.998579705", + "timestamp": "2026-04-02T20:15:28.52290314", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -165,170 +89,45 @@ }, "homo_sapiens - illumina - paired_end": { "content": [ - [ - [ - { - "id": "test" - }, - "primary.genome.scf.fasta:md5,f8e715fa707cf39603cf09b700666d57" - ] - ], - [ - [ - { - "id": "test" - }, - "assemble.sh:md5,c4252fd02352bc365b95ac090c8bda6c" - ] - ], - [ - [ - { - "id": "test" - }, - "test-masurca.log:md5,6e9580c582a91f814b9870238e9d80ca" - ] - ], - [ - [ - "MASURCA", - "masurca", - "4.1.4" - ] - ] - ], - "timestamp": "2026-03-25T17:53:33.025513309", - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.4" - } - }, - "sarscov2 - hybrid - illumina_nanopore": { - "content": [ - [ - - ], - [ - - ], - [ - - ], - [ - - ] - ], - "timestamp": "2026-03-23T16:39:05.105234025", - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.4" - } - }, - "sarscov2 - illumina - single_end": { - "content": [ + "test.scaffolds.fa.gz", + "assemble.sh", + "test_masurca_config.txt", + "test-masurca.log", { - "0": [ - - ], - "1": [ - - ], - "2": [ - - ], - "3": [ - - ], - "4": [ - - ], - "5": [ - - ], - "config": [ - - ], - "contigs": [ - - ], - "flye_assembly": [ - - ], - "scaffolds": [ - - ], - "script": [ - - ], "versions_masurca": [ - + [ + "MASURCA", + "masurca", + "4.1.4" + ] ] } ], - "timestamp": "2026-03-20T16:53:14.230197251", + "timestamp": "2026-04-02T17:23:14.003329552", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.3" - } - }, - "sarscov2 - hybrid - illumina_pacbio": { - "content": [ - [ - - ], - [ - - ], - [ - - ], - [ - - ] - ], - "timestamp": "2026-03-23T16:38:57.588039397", - "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.4", "nextflow": "25.10.4" } }, "homo_sapiens - illumina - paired_end - with_jump": { "content": [ - [ - [ - { - "id": "test" - }, - "primary.genome.scf.fasta:md5,cae25f87f26c09f972eae5aa1cc65617" - ] - ], - [ - [ - { - "id": "test" - }, - "assemble.sh:md5,a14782d876cbb76684e7599489a84b0b" - ] - ], - [ - [ - { - "id": "test" - }, - "test-masurca.log:md5,7ed0e3502298ef267e4fc61758edb564" - ] - ], - [ - [ - "MASURCA", - "masurca", - "4.1.4" + "test.scaffolds.fa.gz", + "assemble.sh", + "test_masurca_config.txt", + "test-masurca.log", + { + "versions_masurca": [ + [ + "MASURCA", + "masurca", + "4.1.4" + ] ] - ] + } ], - "timestamp": "2026-03-25T17:59:29.231599584", + "timestamp": "2026-04-02T17:33:32.958497714", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.4", "nextflow": "25.10.4" } }, @@ -353,26 +152,5 @@ "nf-test": "0.9.4", "nextflow": "25.10.4" } - }, - "sarscov2 - illumina - with_reference": { - "content": [ - [ - - ], - [ - - ], - [ - - ], - [ - - ] - ], - "timestamp": "2026-03-25T15:27:36.181316757", - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.4" - } } } \ No newline at end of file From c3c4faf5b05f48f808618bac1a66a4de4f637263 Mon Sep 17 00:00:00 2001 From: LiaOb21 Date: Tue, 7 Apr 2026 12:36:01 +0100 Subject: [PATCH 10/10] moved task.ext to input due to linting errors --- modules/nf-core/masurca/main.nf | 27 ++++++---- modules/nf-core/masurca/meta.yml | 29 ++++++++++ modules/nf-core/masurca/tests/main.nf.test | 63 ++++++++++++++++++++++ 3 files changed, 110 insertions(+), 9 deletions(-) diff --git a/modules/nf-core/masurca/main.nf b/modules/nf-core/masurca/main.nf index 4261580a1173..d4890e07fdea 100644 --- a/modules/nf-core/masurca/main.nf +++ b/modules/nf-core/masurca/main.nf @@ -11,6 +11,15 @@ process MASURCA { val fragment_stdev val jump_mean val jump_stdev + val extend_jump_reads + val graph_kmer_size + val use_linking_mates + val lhe_coverage + val mega_reads_one_pass + val limit_jump_coverage + val ca_parameters + val close_gaps + val jf_size output: @@ -34,15 +43,15 @@ process MASURCA { def nanopore_file = nanopore ? nanopore.toRealPath() : "" // Configuration parameters with defaults from task.ext - def extend_jump_reads = task.ext.extend_jump_reads != null ? task.ext.extend_jump_reads : 0 - def graph_kmer_size = task.ext.graph_kmer_size ?: 'auto' - def use_linking_mates = task.ext.use_linking_mates != null ? task.ext.use_linking_mates : 0 - def lhe_coverage = task.ext.lhe_coverage ?: 25 - def mega_reads_one_pass = task.ext.mega_reads_one_pass != null ? task.ext.mega_reads_one_pass : 0 - def limit_jump_coverage = task.ext.limit_jump_coverage ?: 300 - def ca_parameters = task.ext.ca_parameters ?: 'cgwErrorRate=0.15' - def close_gaps = task.ext.close_gaps != null ? task.ext.close_gaps : 0 - def jf_size = task.ext.jf_size ?: 200000000 + // def extend_jump_reads = task.ext.extend_jump_reads != null ? task.ext.extend_jump_reads : 0 + // def graph_kmer_size = task.ext.graph_kmer_size ?: 'auto' + // def use_linking_mates = task.ext.use_linking_mates != null ? task.ext.use_linking_mates : 0 + // def lhe_coverage = task.ext.lhe_coverage ?: 25 + // def mega_reads_one_pass = task.ext.mega_reads_one_pass != null ? task.ext.mega_reads_one_pass : 0 + // def limit_jump_coverage = task.ext.limit_jump_coverage ?: 300 + // def ca_parameters = task.ext.ca_parameters ?: 'cgwErrorRate=0.15' + // def close_gaps = task.ext.close_gaps != null ? task.ext.close_gaps : 0 + // def jf_size = task.ext.jf_size ?: 200000000 """ echo "DATA" > ${prefix}_masurca_config.txt echo "#Illumina paired end reads supplied as " >> ${prefix}_masurca_config.txt diff --git a/modules/nf-core/masurca/meta.yml b/modules/nf-core/masurca/meta.yml index ee721e6bed4a..f96165adb2fd 100644 --- a/modules/nf-core/masurca/meta.yml +++ b/modules/nf-core/masurca/meta.yml @@ -63,6 +63,35 @@ input: - jump_stdev: type: integer description: Standard deviation of fragment size for jump/mate-pair reads + - extend_jump_reads: + type: boolean + description: "Whether to extend jump reads (default: 0 - false; use 1 for true)" + - graph_kmer_size: + type: string + description: "K-mer size for the de Bruijn graph (default: 'auto', it can be an + integer or 'auto')" + - use_linking_mates: + type: boolean + description: "Whether to use linking mates (default: 0 - false; use 1 for true)" + - lhe_coverage: + type: integer + description: "LHE coverage (default: 25)" + - mega_reads_one_pass: + type: boolean + description: "Whether to perform one pass of mega-reads (default: 0 - false; use + 1 for true)" + - limit_jump_coverage: + type: integer + description: "Limit for jump read coverage (default: 300)" + - ca_parameters: + type: string + description: "Parameters for the Celera Assembler (default: 'cgwErrorRate=0.15')" + - close_gaps: + type: boolean + description: "Whether to close gaps (default: 0 - false; use 1 for true)" + - jf_size: + type: integer + description: "Jellyfish hash size (default: 200000000)" output: script: - - meta: diff --git a/modules/nf-core/masurca/tests/main.nf.test b/modules/nf-core/masurca/tests/main.nf.test index c4c201d3b5c0..dffba62d12ad 100644 --- a/modules/nf-core/masurca/tests/main.nf.test +++ b/modules/nf-core/masurca/tests/main.nf.test @@ -24,6 +24,15 @@ nextflow_process { input[2] = 50 // fragment_stdev input[3] = 0 // jump_mean (not used) input[4] = 0 // jump_stdev (not used) + input[5] = 0 // extend_jump_reads + input[6] = 'auto' // graph_kmer_size + input[7] = 0 // use_linking_mates + input[8] = 25 // lhe_coverage + input[9] = 0 // mega_reads_one_pass + input[10] = 300 // limit_jump_coverage + input[11] = 'cgwErrorRate=0.15' // ca_parameters + input[12] = 0 // close_gaps + input[13] = 200000000 // jf_size """ } } @@ -69,6 +78,15 @@ nextflow_process { input[2] = 50 // fragment_stdev input[3] = 0 // jump_mean (not used) input[4] = 0 // jump_stdev (not used) + input[5] = 0 // extend_jump_reads + input[6] = 'auto' // graph_kmer_size + input[7] = 0 // use_linking_mates + input[8] = 25 // lhe_coverage + input[9] = 0 // mega_reads_one_pass + input[10] = 300 // limit_jump_coverage + input[11] = 'cgwErrorRate=0.15' // ca_parameters + input[12] = 0 // close_gaps + input[13] = 200000000 // jf_size """ } } @@ -117,6 +135,15 @@ nextflow_process { input[2] = 50 // fragment_stdev input[3] = 3600 // jump_mean input[4] = 200 // jump_stdev + input[5] = 0 // extend_jump_reads + input[6] = 'auto' // graph_kmer_size + input[7] = 0 // use_linking_mates + input[8] = 25 // lhe_coverage + input[9] = 0 // mega_reads_one_pass + input[10] = 300 // limit_jump_coverage + input[11] = 'cgwErrorRate=0.15' // ca_parameters + input[12] = 0 // close_gaps + input[13] = 200000000 // jf_size """ } } @@ -162,6 +189,15 @@ nextflow_process { input[2] = 50 // fragment_stdev input[3] = 0 // jump_mean (not used) input[4] = 0 // jump_stdev (not used) + input[5] = 0 // extend_jump_reads + input[6] = 'auto' // graph_kmer_size + input[7] = 0 // use_linking_mates + input[8] = 25 // lhe_coverage + input[9] = 0 // mega_reads_one_pass + input[10] = 300 // limit_jump_coverage + input[11] = 'cgwErrorRate=0.15' // ca_parameters + input[12] = 0 // close_gaps + input[13] = 200000000 // jf_size """ } } @@ -207,6 +243,15 @@ nextflow_process { input[2] = 50 // fragment_stdev input[3] = 0 // jump_mean (not used) input[4] = 0 // jump_stdev (not used) + input[5] = 0 // extend_jump_reads + input[6] = 'auto' // graph_kmer_size + input[7] = 0 // use_linking_mates + input[8] = 25 // lhe_coverage + input[9] = 0 // mega_reads_one_pass + input[10] = 300 // limit_jump_coverage + input[11] = 'cgwErrorRate=0.15' // ca_parameters + input[12] = 0 // close_gaps + input[13] = 200000000 // jf_size """ } } @@ -252,6 +297,15 @@ nextflow_process { input[2] = 50 // fragment_stdev input[3] = 0 // jump_mean (not used) input[4] = 0 // jump_stdev (not used) + input[5] = 0 // extend_jump_reads + input[6] = 'auto' // graph_kmer_size + input[7] = 0 // use_linking_mates + input[8] = 25 // lhe_coverage + input[9] = 0 // mega_reads_one_pass + input[10] = 300 // limit_jump_coverage + input[11] = 'cgwErrorRate=0.15' // ca_parameters + input[12] = 0 // close_gaps + input[13] = 200000000 // jf_size """ } } @@ -299,6 +353,15 @@ nextflow_process { input[2] = 50 // fragment_stdev input[3] = 0 // jump_mean (not used) input[4] = 0 // jump_stdev (not used) + input[5] = 0 // extend_jump_reads + input[6] = 'auto' // graph_kmer_size + input[7] = 0 // use_linking_mates + input[8] = 25 // lhe_coverage + input[9] = 0 // mega_reads_one_pass + input[10] = 300 // limit_jump_coverage + input[11] = 'cgwErrorRate=0.15' // ca_parameters + input[12] = 0 // close_gaps + input[13] = 200000000 // jf_size """ } }