-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathfilter_fasta.py
More file actions
executable file
·71 lines (54 loc) · 1.73 KB
/
filter_fasta.py
File metadata and controls
executable file
·71 lines (54 loc) · 1.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/env python
"""Input files are expected to be in fasta format. The script will traverse all files
in the input dir, so the input dir should contain only fasta files. The taxon list
should be a line-delimited text file containing the names of tips as they
correspond to those in the fasta alignments."""
import sys
import os
def filter(ifname, taxa_approved): #, ofname, taxa_approved):
ofname = ifname + "." + "filtered"
print "\nfiltering " + ifname + " > " + ifname + ".filtered"
tempofname = ofname + ".temp"
infile = open(ifname, "rb")
outfile = open(tempofname, "wb")
ntax = 0
OnValidTaxon = False
firstline = True
for line in infile:
if line[0] == '>':
cur_taxon = line.split()[0].strip('_')
if cur_taxon.strip('>') in taxa_approved:
outfile.write(cur_taxon + "\n")
ntax += 1
OnValidTaxon = True
else:
OnValidTaxon = False
elif OnValidTaxon:
outfile.write(line)
outfile.close()
if ntax == 0:
os.remove(tempofname)
print "No acceptable taxa found. No output saved."
else:
os.rename(tempofname,ofname)
print "Retained " + str(ntaxfound) + " taxa."
if __name__ == "__main__":
if len(sys.argv) < 3:
print "usage: filterfasta <fastafile> <acceptednamesfile>"
sys.exit(0)
ipath = sys.argv[1]
taxlistpath = sys.argv[2]
taxlist = open(taxlistpath, "rU")
global taxa_approved
taxa_approved = [taxon.strip() for taxon in taxlist.readlines()]
if os.path.isdir(ipath):
usedir = True
if ipath[len(ipath) - 1] != '/':
ipath += '/'
ifiles = os.listdir(ipath)
for ifname in ifiles:
if ifname[0] != '.':
filter(ipath + ifname, taxa_approved) # ipath + ofname, taxa_approved) #ifname)
else:
filter(ipath, taxa_approved)
# print "single case not yet implemented"