Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions modules/nf-core/seqkit/sample/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- bioconda::seqkit=2.13.0
54 changes: 54 additions & 0 deletions modules/nf-core/seqkit/sample/main.nf
Comment thread
emmcauley marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
process SEQKIT_SAMPLE {
tag "${meta.id}"
label 'process_low'

conda "${moduleDir}/environment.yml"
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
? 'oras://community.wave.seqera.io/library/seqkit:2.13.0--205358a3675c7775'
: 'community.wave.seqera.io/library/seqkit:2.13.0--05c0a96bf9fb2751'}"

input:
tuple val(meta), path(fastx)

output:
tuple val(meta), path("${prefix}.${extension}"), emit: fastx
tuple val("${task.process}"), val('seqkit'), eval("seqkit version | sed 's/^.*v//'"), emit: versions_seqkit, topic: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
extension = "fastq"
if ("${fastx}" ==~ /.+\.(fasta|fa|fas|fna|fsa)(\.gz)?/) {
extension = "fasta"
}
extension = fastx.toString().endsWith('.gz') ? "${extension}.gz" : extension
if ("${prefix}.${extension}" == "${fastx}") {
error("Input and output names are the same, use \"task.ext.prefix\" to disambiguate!")
}
"""
seqkit \\
sample \\
--threads ${task.cpus} \\
${args} \\
${fastx} \\
-o ${prefix}.${extension}
"""

stub:
prefix = task.ext.prefix ?: "${meta.id}"
extension = "fastq"
if ("${fastx}" ==~ /.+\.fasta|.+\.fasta\.gz|.+\.fa|.+\.fa\.gz|.+\.fas|.+\.fas\.gz|.+\.fna|.+\.fna\.gz|.+\.fsa|.+\.fsa\.gz/) {
extension = "fasta"
}
extension = fastx.toString().endsWith('.gz') ? "${extension}.gz" : extension
if ("${prefix}.${extension}" == "${fastx}") {
error("Input and output names are the same, use \"task.ext.prefix\" to disambiguate!")
}
def create_cmd = extension.endsWith('.gz') ? "echo '' | gzip >" : "touch"
"""
${create_cmd} ${prefix}.${extension}
"""
}
69 changes: 69 additions & 0 deletions modules/nf-core/seqkit/sample/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "seqkit_sample"
description: Sample sequences from FASTA/FASTQ files by number or proportion
keywords:
- genomics
- fasta
- fastq
- sample
- subset
- seqkit
tools:
- "seqkit":
description: "A cross-platform and ultrafast toolkit for FASTA/Q file manipulation"
homepage: "https://bioinf.shenwei.me/seqkit/"
documentation: "https://bioinf.shenwei.me/seqkit/usage/"
tool_dev_url: "https://github.com/shenwei356/seqkit"
doi: "10.1371/journal.pone.0163962"
licence: ["MIT"]
identifier: biotools:seqkit
input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- fastx:
type: file
description: Input FASTA or FASTQ file
pattern: "*.{fsa,fas,fa,fasta,fastq,fq}{,.gz}"
ontologies:
- edam: http://edamontology.org/format_1930 # FASTQ
output:
fastx:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- ${prefix}.${extension}:
type: file
description: Sampled output FASTA or FASTQ file
pattern: "*.{fasta,fasta.gz,fastq,fastq.gz}"
ontologies:
- edam: http://edamontology.org/format_1930 # FASTQ
versions_seqkit:
- - ${task.process}:
type: string
description: The name of the process
- seqkit:
type: string
description: The name of the tool
- seqkit version | sed 's/^.*v//':
type: string
description: The expression to obtain the version of the tool
topics:
versions:
- - ${task.process}:
type: string
description: The name of the process
- seqkit:
type: string
description: The name of the tool
- seqkit version | sed 's/^.*v//':
type: string
description: The expression to obtain the version of the tool
authors:
- "@emmcauley"
maintainers:
- "@emmcauley"
103 changes: 103 additions & 0 deletions modules/nf-core/seqkit/sample/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
nextflow_process {

name "Test Process SEQKIT_SAMPLE"
script "../main.nf"
process "SEQKIT_SAMPLE"

tag "modules"
tag "modules_nfcore"
tag "seqkit"
tag "seqkit/sample"

test("sarscov2 - fastq_gz - proportion") {
config './nextflow.config'
when {
params {
module_args = '--proportion 0.5 --rand-seed 11'
}
process {
"""
input[0] = [
[ id:'test' ],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
]
"""
}
}

then {
assert snapshot(sanitizeOutput(process.out)).match()
}

}

test("sarscov2 - genome_fasta - proportion") {
config './nextflow.config'
when {
params {
module_args = '--proportion 0.5 --rand-seed 11'
}
process {
"""
input[0] = [
[ id:'test' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
]
"""
}
}

then {
assert snapshot(sanitizeOutput(process.out)).match()
}

}

test("file_name_conflict - fail_with_error") {
config './nextflow.config'
when {
params {
module_args = '--proportion 0.5 --rand-seed 11'
}
process {
"""
input[0] = [
[ id:'test_1' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
]
"""
}
}

then {
assert process.failed
assert process.errorReport.contains("Input and output names are the same")
}

}

test("sarscov2 - fastq_gz - proportion - stub") {
config './nextflow.config'
options "-stub"

when {
params {
module_args = '--proportion 0.5 --rand-seed 11'
}
process {
"""
input[0] = [
[ id:'test' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
]
"""
}
}

then {
assert snapshot(sanitizeOutput(process.out)).match()
}

}

}
80 changes: 80 additions & 0 deletions modules/nf-core/seqkit/sample/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
{
"sarscov2 - fastq_gz - proportion": {
"content": [
{
"fastx": [
[
{
"id": "test"
},
"test.fastq.gz:md5,8074b48d72bb56223c77080e7a0e251a"
]
],
"versions_seqkit": [
[
"SEQKIT_SAMPLE",
"seqkit",
"2.13.0"
]
]
}
],
"meta": {
"nf-test": "0.9.3",
"nextflow": "25.10.2"
},
"timestamp": "2026-04-08T11:58:29.490916"
},
"sarscov2 - genome_fasta - proportion": {
"content": [
{
"fastx": [
[
{
"id": "test"
},
"test.fasta:md5,483f4a5dfe60171c86ee9b7e6dff908b"
]
],
"versions_seqkit": [
[
"SEQKIT_SAMPLE",
"seqkit",
"2.13.0"
]
]
}
],
"meta": {
"nf-test": "0.9.3",
"nextflow": "25.10.2"
},
"timestamp": "2026-04-08T11:58:48.033786"
},
"sarscov2 - fastq_gz - proportion - stub": {
"content": [
{
"fastx": [
[
{
"id": "test"
},
"test.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"versions_seqkit": [
[
"SEQKIT_SAMPLE",
"seqkit",
"2.13.0"
]
]
}
],
"meta": {
"nf-test": "0.9.3",
"nextflow": "25.10.2"
},
"timestamp": "2026-04-08T11:59:23.617655"
}
}
5 changes: 5 additions & 0 deletions modules/nf-core/seqkit/sample/tests/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
process {
withName: 'SEQKIT_SAMPLE' {
ext.args = params.module_args
}
}
Loading