From 3a43b42282fc4c5872ab684b39de32f843a40e7b Mon Sep 17 00:00:00 2001 From: afshinfard Date: Wed, 22 Feb 2023 15:03:30 -0800 Subject: [PATCH 01/12] working prototype --- include/btllib/indexlr.hpp | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/include/btllib/indexlr.hpp b/include/btllib/indexlr.hpp index 1d64391c..b3fcbd7d 100644 --- a/include/btllib/indexlr.hpp +++ b/include/btllib/indexlr.hpp @@ -100,6 +100,7 @@ class Indexlr bool forward = false; std::string seq; std::string qual; + bool valid = true; }; using HashedKmer = Minimizer; @@ -232,7 +233,8 @@ class Indexlr static void filter_kmer_qual(Indexlr::HashedKmer& hk, const std::string& kmer_qual, - size_t q); + size_t q, + const std::string& filter_mode = "mask"); static size_t calc_kmer_quality(const std::string& qual); static void calc_minimizer( @@ -488,11 +490,16 @@ Indexlr::filter_hashed_kmer(Indexlr::HashedKmer& hk, inline void Indexlr::filter_kmer_qual(Indexlr::HashedKmer& hk, const std::string& kmer_qual, - size_t q) + size_t q, + const std::string& filter_mode) { if (calc_kmer_quality(kmer_qual) < q) { - hk.min_hash = std::numeric_limits::max(); - } + if (filter_mode == "mask") { + hk.min_hash = std::numeric_limits::max(); + } else if (filter_mode == "drop") { + hk.valid = false; + } + } } inline size_t @@ -546,7 +553,10 @@ Indexlr::calc_minimizer( if (ssize_t(min_current->pos) > min_pos_prev && min_current->min_hash != std::numeric_limits::max()) { min_pos_prev = ssize_t(min_current->pos); - minimizers.push_back(*min_current); + + if (min_current->valid) { // if the kmer is valid (not suppressed by filters ) + minimizers.push_back(*min_current); + } } } @@ -576,7 +586,7 @@ Indexlr::minimize(const std::string& seq, const std::string& qual) const hk, filter_in(), filter_out(), filter_in_bf.get(), filter_out_bf.get()); if (q > 0) { - filter_kmer_qual(hk, qual.substr(nh.get_pos(), k), q); + filter_kmer_qual(hk, qual.substr(nh.get_pos(), k), q, "drop"); } if (idx + 1 >= w) { From d5f0ff29bd4318a95ae069c919c46e67167affb1 Mon Sep 17 00:00:00 2001 From: afshinfard Date: Thu, 23 Feb 2023 14:55:46 -0800 Subject: [PATCH 02/12] drop and partAvg --- include/btllib/indexlr.hpp | 65 +++++++++++++++++++++++++++----------- recipes/indexlr.cpp | 15 +++++++-- 2 files changed, 59 insertions(+), 21 deletions(-) diff --git a/include/btllib/indexlr.hpp b/include/btllib/indexlr.hpp index b3fcbd7d..43498e55 100644 --- a/include/btllib/indexlr.hpp +++ b/include/btllib/indexlr.hpp @@ -52,12 +52,18 @@ class Indexlr static const unsigned LONG_MODE = 64; /** Include read sequence Phred score along with minimizer information. */ static const unsigned QUAL = 128; + /** Exclude kmers with quality score below the threshold. */ + static const unsigned Q_DROP = 256; + /** Consider only 10% of the base quality scores for averaging. */ + static const unsigned PART_AVG = 512; }; bool output_id() const { return bool(~flags & Flag::NO_ID); } bool output_bx() const { return bool(flags & Flag::BX); } bool output_seq() const { return bool(flags & Flag::SEQ); } bool output_qual() const { return bool(flags & Flag::QUAL); } + bool q_drop() const { return bool(flags & Flag::Q_DROP); } + bool part_avg() const { return bool(flags & Flag::PART_AVG); } bool filter_in() const { return bool(flags & Flag::FILTER_IN); } bool filter_out() const { return bool(flags & Flag::FILTER_OUT); } bool short_mode() const { return bool(flags & Flag::SHORT_MODE); } @@ -229,13 +235,15 @@ class Indexlr bool filter_in, bool filter_out, const BloomFilter& filter_in_bf, - const BloomFilter& filter_out_bf); + const BloomFilter& filter_out_bf, + bool drop); static void filter_kmer_qual(Indexlr::HashedKmer& hk, const std::string& kmer_qual, size_t q, - const std::string& filter_mode = "mask"); - static size_t calc_kmer_quality(const std::string& qual); + bool drop, + bool partial); + static size_t calc_kmer_quality(const std::string& qual, bool partial = false); static void calc_minimizer( const std::vector& hashed_kmers_buffer, @@ -468,21 +476,35 @@ Indexlr::filter_hashed_kmer(Indexlr::HashedKmer& hk, bool filter_in, bool filter_out, const BloomFilter& filter_in_bf, - const BloomFilter& filter_out_bf) + const BloomFilter& filter_out_bf, + bool drop) { if (filter_in && filter_out) { std::vector tmp; tmp = { hk.min_hash }; if (!filter_in_bf.contains(tmp) || filter_out_bf.contains(tmp)) { - hk.min_hash = std::numeric_limits::max(); + //hk.min_hash = std::numeric_limits::max(); + if (drop) { + hk.valid = false; + } else { + hk.min_hash = std::numeric_limits::max(); + } } } else if (filter_in) { if (!filter_in_bf.contains({ hk.min_hash })) { - hk.min_hash = std::numeric_limits::max(); + if (drop) { + hk.valid = false; + } else { + hk.min_hash = std::numeric_limits::max(); + } } } else if (filter_out) { if (filter_out_bf.contains({ hk.min_hash })) { - hk.min_hash = std::numeric_limits::max(); + if (drop) { + hk.valid = false; + } else { + hk.min_hash = std::numeric_limits::max(); + } } } } @@ -491,19 +513,20 @@ inline void Indexlr::filter_kmer_qual(Indexlr::HashedKmer& hk, const std::string& kmer_qual, size_t q, - const std::string& filter_mode) + bool drop, + bool partial) { - if (calc_kmer_quality(kmer_qual) < q) { - if (filter_mode == "mask") { - hk.min_hash = std::numeric_limits::max(); - } else if (filter_mode == "drop") { + if (calc_kmer_quality(kmer_qual, partial) < q) { + if (drop) { hk.valid = false; + } else { + hk.min_hash = std::numeric_limits::max(); } } } inline size_t -Indexlr::calc_kmer_quality(const std::string& qual) +Indexlr::calc_kmer_quality(const std::string& qual, bool partial) { // convert the quality scores to integers std::vector qual_ints; @@ -512,12 +535,16 @@ Indexlr::calc_kmer_quality(const std::string& qual) for (auto c : qual) { qual_ints.push_back(c - thirty_three); } - // calculate the mean (potential improvement: use other statistics) + // sort the quality scores + std::sort(qual_ints.begin(), qual_ints.end()); + + // calculate the mean quality score (consider only 10% of the lowest scores if partial) size_t sum = 0; - for (auto q : qual_ints) { - sum += q; + size_t n = (partial ? qual_ints.size() / 10 : qual_ints.size()); + for (size_t i = 0; i < n; ++i) { + sum += qual_ints[i]; } - return (sum / qual_ints.size()); + return sum / n; } inline void @@ -583,10 +610,10 @@ Indexlr::minimize(const std::string& seq, const std::string& qual) const output_qual() ? qual.substr(nh.get_pos(), k) : ""); filter_hashed_kmer( - hk, filter_in(), filter_out(), filter_in_bf.get(), filter_out_bf.get()); + hk, filter_in(), filter_out(), filter_in_bf.get(), filter_out_bf.get(), q_drop()); if (q > 0) { - filter_kmer_qual(hk, qual.substr(nh.get_pos(), k), q, "drop"); + filter_kmer_qual(hk, qual.substr(nh.get_pos(), k), q, q_drop(), part_avg()); } if (idx + 1 >= w) { diff --git a/recipes/indexlr.cpp b/recipes/indexlr.cpp index f381952f..26df38c4 100644 --- a/recipes/indexlr.cpp +++ b/recipes/indexlr.cpp @@ -43,12 +43,14 @@ print_usage() std::cerr << "Usage: " << PROGNAME << " -k K -w W [-q Q] [-r repeat_bf_path] [-s solid_bf_path] [--id] " - "[--bx] [--pos] [--seq] [--qual]" + "[--bx] [--pos] [--seq] [--qual] [--filter-mode]" "[-o FILE] FILE...\n\n" " -k K Use K as k-mer size.\n" " -w W Use W as sliding-window size.\n" " -q Q Filter kmers with average quality (Phred score) lower " "than Q [0]. \n" + " --q-drop Drop filtered kemrs instead of disqualifying from generating minimizers. \n" + " --part-avg Consider only 1/10 of the (lowest value) base quality scores for averaging. \n" " --id Include input sequence ids in the output. " "(Default if --bx is not provided)\n" " --bx Include input sequence barcodes in the output.\n" @@ -96,11 +98,12 @@ main(int argc, char* argv[]) bool verbose = false; unsigned k = 0, w = 0, t = DEFAULT_THREADS; size_t q = 0; + std::string filter_mode(""); bool w_set = false; bool k_set = false; bool q_set = false; int with_id = 0, with_bx = 0, with_len = 0, with_pos = 0, with_strand = 0, - with_seq = 0, with_qual = 0; + with_seq = 0, with_qual = 0, with_q_drop = 0, with_part_avg = 0; std::unique_ptr repeat_bf, solid_bf; bool with_repeat = false, with_solid = false; int long_mode = 0; @@ -113,6 +116,8 @@ main(int argc, char* argv[]) { "pos", no_argument, &with_pos, 1 }, { "strand", no_argument, &with_strand, 1 }, { "seq", no_argument, &with_seq, 1 }, + { "q-drop", no_argument, &with_q_drop, 1 }, + { "part-avg", no_argument, &with_part_avg, 1 }, { "qual", no_argument, &with_qual, 1 }, { "long", no_argument, &long_mode, 1 }, { "help", no_argument, &help, 1 }, @@ -231,6 +236,12 @@ main(int argc, char* argv[]) if (bool(with_qual)) { flags |= btllib::Indexlr::Flag::QUAL; } + if (bool(with_q_drop)) { + flags |= btllib::Indexlr::Flag::Q_DROP; + } + if (bool(with_part_avg)) { + flags |= btllib::Indexlr::Flag::PART_AVG; + } if (bool(long_mode)) { flags |= btllib::Indexlr::Flag::LONG_MODE; } else { From 508a303749a5fc17a4bbd6069e0e7614845ef3ac Mon Sep 17 00:00:00 2001 From: afshinfard Date: Thu, 23 Feb 2023 14:59:34 -0800 Subject: [PATCH 03/12] rm temp code --- recipes/indexlr.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/indexlr.cpp b/recipes/indexlr.cpp index 26df38c4..2dbf6a08 100644 --- a/recipes/indexlr.cpp +++ b/recipes/indexlr.cpp @@ -43,7 +43,7 @@ print_usage() std::cerr << "Usage: " << PROGNAME << " -k K -w W [-q Q] [-r repeat_bf_path] [-s solid_bf_path] [--id] " - "[--bx] [--pos] [--seq] [--qual] [--filter-mode]" + "[--bx] [--pos] [--seq] [--qual] [--q-drop]" "[-o FILE] FILE...\n\n" " -k K Use K as k-mer size.\n" " -w W Use W as sliding-window size.\n" From 33dc3a2658182f279babd5b9a80519d73f16a762 Mon Sep 17 00:00:00 2001 From: afshinfard Date: Thu, 23 Feb 2023 15:00:33 -0800 Subject: [PATCH 04/12] rm old code --- recipes/indexlr.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/recipes/indexlr.cpp b/recipes/indexlr.cpp index 2dbf6a08..18e266e1 100644 --- a/recipes/indexlr.cpp +++ b/recipes/indexlr.cpp @@ -98,7 +98,6 @@ main(int argc, char* argv[]) bool verbose = false; unsigned k = 0, w = 0, t = DEFAULT_THREADS; size_t q = 0; - std::string filter_mode(""); bool w_set = false; bool k_set = false; bool q_set = false; From 96f5a1cfac69d8453e6877f41af550a9aff4b608 Mon Sep 17 00:00:00 2001 From: afshinfard Date: Thu, 23 Feb 2023 15:20:11 -0800 Subject: [PATCH 05/12] happy clang --- include/btllib/indexlr.hpp | 9 +++++---- recipes/indexlr.cpp | 6 ++++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/include/btllib/indexlr.hpp b/include/btllib/indexlr.hpp index 43498e55..5c33ef98 100644 --- a/include/btllib/indexlr.hpp +++ b/include/btllib/indexlr.hpp @@ -483,7 +483,6 @@ Indexlr::filter_hashed_kmer(Indexlr::HashedKmer& hk, std::vector tmp; tmp = { hk.min_hash }; if (!filter_in_bf.contains(tmp) || filter_out_bf.contains(tmp)) { - //hk.min_hash = std::numeric_limits::max(); if (drop) { hk.valid = false; } else { @@ -538,13 +537,13 @@ Indexlr::calc_kmer_quality(const std::string& qual, bool partial) // sort the quality scores std::sort(qual_ints.begin(), qual_ints.end()); - // calculate the mean quality score (consider only 10% of the lowest scores if partial) + // calculate the mean quality score size_t sum = 0; size_t n = (partial ? qual_ints.size() / 10 : qual_ints.size()); for (size_t i = 0; i < n; ++i) { sum += qual_ints[i]; } - return sum / n; + return sum / n; } inline void @@ -610,7 +609,9 @@ Indexlr::minimize(const std::string& seq, const std::string& qual) const output_qual() ? qual.substr(nh.get_pos(), k) : ""); filter_hashed_kmer( - hk, filter_in(), filter_out(), filter_in_bf.get(), filter_out_bf.get(), q_drop()); + hk, filter_in(), filter_out(), + filter_in_bf.get(), filter_out_bf.get(), + q_drop()); if (q > 0) { filter_kmer_qual(hk, qual.substr(nh.get_pos(), k), q, q_drop(), part_avg()); diff --git a/recipes/indexlr.cpp b/recipes/indexlr.cpp index 18e266e1..79cc5b35 100644 --- a/recipes/indexlr.cpp +++ b/recipes/indexlr.cpp @@ -49,8 +49,10 @@ print_usage() " -w W Use W as sliding-window size.\n" " -q Q Filter kmers with average quality (Phred score) lower " "than Q [0]. \n" - " --q-drop Drop filtered kemrs instead of disqualifying from generating minimizers. \n" - " --part-avg Consider only 1/10 of the (lowest value) base quality scores for averaging. \n" + " --q-drop Drop filtered kemrs instead of disqualifying them" + "from generating minimizers. \n" + " --part-avg Consider only 1/10 of the (lowest value) base quality " + "scores for averaging. \n" " --id Include input sequence ids in the output. " "(Default if --bx is not provided)\n" " --bx Include input sequence barcodes in the output.\n" From 0f3b6f907cadfe64fe07179ad9780e85ad4ec8c1 Mon Sep 17 00:00:00 2001 From: afshinfard Date: Thu, 23 Feb 2023 15:51:41 -0800 Subject: [PATCH 06/12] happy clang --- include/btllib/indexlr.hpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/include/btllib/indexlr.hpp b/include/btllib/indexlr.hpp index 5c33ef98..63ff63a2 100644 --- a/include/btllib/indexlr.hpp +++ b/include/btllib/indexlr.hpp @@ -243,7 +243,8 @@ class Indexlr size_t q, bool drop, bool partial); - static size_t calc_kmer_quality(const std::string& qual, bool partial = false); + static size_t calc_kmer_quality(const std::string& qual, + bool partial = false); static void calc_minimizer( const std::vector& hashed_kmers_buffer, @@ -521,7 +522,7 @@ Indexlr::filter_kmer_qual(Indexlr::HashedKmer& hk, } else { hk.min_hash = std::numeric_limits::max(); } - } + } } inline size_t @@ -579,8 +580,7 @@ Indexlr::calc_minimizer( if (ssize_t(min_current->pos) > min_pos_prev && min_current->min_hash != std::numeric_limits::max()) { min_pos_prev = ssize_t(min_current->pos); - - if (min_current->valid) { // if the kmer is valid (not suppressed by filters ) + if (min_current->valid) { // if the kmer is valid (not suppressed by filters) minimizers.push_back(*min_current); } } @@ -608,13 +608,12 @@ Indexlr::minimize(const std::string& seq, const std::string& qual) const output_seq() ? seq.substr(nh.get_pos(), k) : "", output_qual() ? qual.substr(nh.get_pos(), k) : ""); - filter_hashed_kmer( - hk, filter_in(), filter_out(), - filter_in_bf.get(), filter_out_bf.get(), - q_drop()); + filter_hashed_kmer(hk, filter_in(), filter_out(), + filter_in_bf.get(), filter_out_bf.get(), q_drop()); if (q > 0) { - filter_kmer_qual(hk, qual.substr(nh.get_pos(), k), q, q_drop(), part_avg()); + filter_kmer_qual(hk, qual.substr(nh.get_pos(), k), + q, q_drop(), part_avg()); } if (idx + 1 >= w) { From 69e9b3cfc4a3d4c375300850e014496dcc4b422a Mon Sep 17 00:00:00 2001 From: afshinfard Date: Thu, 2 Mar 2023 10:33:57 -0800 Subject: [PATCH 07/12] clang-formatted --- include/btllib/indexlr.hpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/include/btllib/indexlr.hpp b/include/btllib/indexlr.hpp index 63ff63a2..7172568a 100644 --- a/include/btllib/indexlr.hpp +++ b/include/btllib/indexlr.hpp @@ -580,7 +580,8 @@ Indexlr::calc_minimizer( if (ssize_t(min_current->pos) > min_pos_prev && min_current->min_hash != std::numeric_limits::max()) { min_pos_prev = ssize_t(min_current->pos); - if (min_current->valid) { // if the kmer is valid (not suppressed by filters) + if (min_current->valid) { + // if the kmer is valid (not suppressed by filters) minimizers.push_back(*min_current); } } @@ -608,12 +609,16 @@ Indexlr::minimize(const std::string& seq, const std::string& qual) const output_seq() ? seq.substr(nh.get_pos(), k) : "", output_qual() ? qual.substr(nh.get_pos(), k) : ""); - filter_hashed_kmer(hk, filter_in(), filter_out(), - filter_in_bf.get(), filter_out_bf.get(), q_drop()); + filter_hashed_kmer(hk, + filter_in(), + filter_out(), + filter_in_bf.get(), + filter_out_bf.get(), + q_drop()); if (q > 0) { - filter_kmer_qual(hk, qual.substr(nh.get_pos(), k), - q, q_drop(), part_avg()); + filter_kmer_qual( + hk, qual.substr(nh.get_pos(), k), q, q_drop(), part_avg()); } if (idx + 1 >= w) { From 98a60cf6ae61d5e5d47420ca2dcc5020f5d907d1 Mon Sep 17 00:00:00 2001 From: afshinfard Date: Thu, 2 Mar 2023 11:20:10 -0800 Subject: [PATCH 08/12] clang-tidy --- include/btllib/indexlr.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/btllib/indexlr.hpp b/include/btllib/indexlr.hpp index 7172568a..eecd64cf 100644 --- a/include/btllib/indexlr.hpp +++ b/include/btllib/indexlr.hpp @@ -531,6 +531,7 @@ Indexlr::calc_kmer_quality(const std::string& qual, bool partial) // convert the quality scores to integers std::vector qual_ints; const int thirty_three = 33; + const size_t ten = 10; qual_ints.reserve(qual.size()); for (auto c : qual) { qual_ints.push_back(c - thirty_three); @@ -540,7 +541,7 @@ Indexlr::calc_kmer_quality(const std::string& qual, bool partial) // calculate the mean quality score size_t sum = 0; - size_t n = (partial ? qual_ints.size() / 10 : qual_ints.size()); + size_t n = (partial ? qual_ints.size() / ten : qual_ints.size()); for (size_t i = 0; i < n; ++i) { sum += qual_ints[i]; } From 207226e1bf7e15c861e121ff568b60c1c095fb8f Mon Sep 17 00:00:00 2001 From: afshinfard Date: Thu, 2 Mar 2023 11:58:55 -0800 Subject: [PATCH 09/12] happy mac tests --- include/btllib/indexlr.hpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/btllib/indexlr.hpp b/include/btllib/indexlr.hpp index eecd64cf..97fa0663 100644 --- a/include/btllib/indexlr.hpp +++ b/include/btllib/indexlr.hpp @@ -542,6 +542,13 @@ Indexlr::calc_kmer_quality(const std::string& qual, bool partial) // calculate the mean quality score size_t sum = 0; size_t n = (partial ? qual_ints.size() / ten : qual_ints.size()); + if (n == 0) { + if (qual_ints.size() != 0) { + n = 1; + } else { + return 0; + } + } for (size_t i = 0; i < n; ++i) { sum += qual_ints[i]; } From 89473a4aebdc6331ff457aa5fd660aad84f5768b Mon Sep 17 00:00:00 2001 From: afshinfard Date: Thu, 2 Mar 2023 12:30:46 -0800 Subject: [PATCH 10/12] clang formatted --- include/btllib/indexlr.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/btllib/indexlr.hpp b/include/btllib/indexlr.hpp index 97fa0663..40a77f2f 100644 --- a/include/btllib/indexlr.hpp +++ b/include/btllib/indexlr.hpp @@ -543,7 +543,7 @@ Indexlr::calc_kmer_quality(const std::string& qual, bool partial) size_t sum = 0; size_t n = (partial ? qual_ints.size() / ten : qual_ints.size()); if (n == 0) { - if (qual_ints.size() != 0) { + if (!qual_ints.empty()) { n = 1; } else { return 0; From 0819ec48d289b84df66a5533251a113296774fc9 Mon Sep 17 00:00:00 2001 From: Amirhossein Afshinfard <31241012+aafshinfard@users.noreply.github.com> Date: Thu, 2 Mar 2023 16:44:02 -0800 Subject: [PATCH 11/12] Update recipes/indexlr.cpp Co-authored-by: Lauren Coombe --- recipes/indexlr.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/indexlr.cpp b/recipes/indexlr.cpp index 79cc5b35..9765d975 100644 --- a/recipes/indexlr.cpp +++ b/recipes/indexlr.cpp @@ -49,7 +49,7 @@ print_usage() " -w W Use W as sliding-window size.\n" " -q Q Filter kmers with average quality (Phred score) lower " "than Q [0]. \n" - " --q-drop Drop filtered kemrs instead of disqualifying them" + " --q-drop Drop filtered kmers instead of disqualifying them" "from generating minimizers. \n" " --part-avg Consider only 1/10 of the (lowest value) base quality " "scores for averaging. \n" From 86bfb9865749e5ef8e29f192d25fdf6446b483bb Mon Sep 17 00:00:00 2001 From: Amirhossein Afshinfard <31241012+aafshinfard@users.noreply.github.com> Date: Thu, 2 Mar 2023 16:44:13 -0800 Subject: [PATCH 12/12] Update recipes/indexlr.cpp Co-authored-by: Lauren Coombe --- recipes/indexlr.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/indexlr.cpp b/recipes/indexlr.cpp index 9765d975..8fac8c49 100644 --- a/recipes/indexlr.cpp +++ b/recipes/indexlr.cpp @@ -43,7 +43,7 @@ print_usage() std::cerr << "Usage: " << PROGNAME << " -k K -w W [-q Q] [-r repeat_bf_path] [-s solid_bf_path] [--id] " - "[--bx] [--pos] [--seq] [--qual] [--q-drop]" + "[--bx] [--pos] [--seq] [--qual] [--q-drop] [--part-avg]" "[-o FILE] FILE...\n\n" " -k K Use K as k-mer size.\n" " -w W Use W as sliding-window size.\n"