fixed hard coded 24 region bug

Julius Booth · Julius Booth · commit 83b92bf044e4 · 2019-05-02T12:20:42.000-07:00
diff --git a/bin/prince b/bin/prince
@@ -13,6 +13,8 @@ from prince import __version__
 
 DEFAULT_K = 25
 DEFAULT_BOOST_OUTPUT = resource_filename('prince.resources', 'training_data_w_extensions.txt')
+DEFAULT_PRIMERS = resource_filename('prince.resources', 'TB_primers_extended.json')
+DEFAULT_TEMPLATES = resource_filename('prince.resources', 'templates.fasta')
 
 def main():
     parser = argparse.ArgumentParser(description='Prince Options.')
@@ -21,7 +23,7 @@ def main():
                         help="output file for training data / training data used to predict copy numbers for queries")
     parser.add_argument('-to', '--target_output', default="results/predictions.csv",
                         help="output file for query copy number predictions")
-    parser.add_argument('-tmp','--templates', default="templates.fasta",
+    parser.add_argument('-tmp','--templates', default=DEFAULT_TEMPLATES,
                 help="VNTR templates. Default is for M.TB")
     parser.add_argument('-tf', '--target_file', default=None,
                 help="target genome names in a text file")
@@ -31,7 +33,7 @@ def main():
                 help="Kmer size used during read recruitment.")
     parser.add_argument('-cn', '--copynumber', default=1,type=int,
                 help="Copy number for training genome.")
-    parser.add_argument('-p', '--primers', default="TB_primers_extended.json",
+    parser.add_argument('-p', '--primers', default=DEFAULT_PRIMERS,
                 help="Flanking sequences used in coverage adjustments")
     parser.add_argument('-np', '--num_procs', default=1,type=int,
                 help="Number of cores for parallel processing.")
@@ -44,12 +46,12 @@ def main():
     if prince_options.k != DEFAULT_K and prince_options.boost_output == DEFAULT_BOOST_OUTPUT:
         warnings.warn("Warning: Target kmer size does not equal training settings. May lead to inaccurate predictions.")
     
-    with open(resource_filename('prince.resources', prince_options.primers)) as primers:
+    with open(prince_options.primers) as primers:
         primers=json.load(primers)
 
     #Template data initialized
         
-    templates = list(SeqIO.parse(resource_filename('prince.resources', prince_options.templates), "fasta"))
+    templates = list(SeqIO.parse(prince_options.templates, "fasta"))
     templateNames = [t.id for t in templates]
     templates = [str(t.seq) for t in templates]
 
diff --git a/prince/match_score.py b/prince/match_score.py
@@ -146,10 +146,10 @@ def compute_match_score(query, template_obj, kmerLength, primers):
 
     #Run reads through Fine Filtering to get score for each template
     matchScore, flanking_coverage = fine_filtering(template_obj, recruitedReads, kmerLength, primers)
-    print(matchScore)
-    print(flanking_coverage)
+    print("VNTR Coverage:     ", matchScore)
+    print("Flanking Coverage: ", flanking_coverage)
     matchScore = [score/float(1+flanking_coverage[i]) for i,score in enumerate(matchScore)]
-    print(matchScore)
+    print("Adjusted Coverage: ", matchScore)
     print("\n")
     return matchScore, filename
 
diff --git a/prince/predict.py b/prince/predict.py
@@ -14,9 +14,9 @@ def get_X_and_Y(data,template):
             Y.append(cn)
     return(X,Y)
 
-def get_equations(data):
+def get_equations(data, number_of_equations):
     equations = []
-    for t in range(24):
+    for t in range(number_of_equations):
         X,Y = get_X_and_Y(data,t)
         X = np.array(X, dtype=np.float64)
         Y = np.array(Y, dtype=np.float64)
diff --git a/prince/query_sample.py b/prince/query_sample.py
@@ -6,6 +6,8 @@
 import multiprocessing as mp
 
 def test_target(opts, template_obj, primers):
+    NUM_LOCI = len(primers)
+
     # Get the query paths
     with open(opts.target_file) as file:
             queries = [line.rstrip("\n") for line in file]
@@ -18,7 +20,7 @@ def test_target(opts, template_obj, primers):
     
     # Write results
     data = get_data(opts.boost_output)
-    equations = get_equations(data)
+    equations = get_equations(data, NUM_LOCI)
     with open(opts.target_output,'a+') as file:
         if os.path.getsize(opts.target_output) == 0:
             file.write("Templates,")
diff --git a/prince/resources/TB_primers.json b/prince/resources/TB_primers.json