-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathadd_expression.py
More file actions
executable file
·85 lines (67 loc) · 2.28 KB
/
add_expression.py
File metadata and controls
executable file
·85 lines (67 loc) · 2.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/python
# -*- coding: latin-1 -*-
#
# Simon H. Rasmussen
def extract_gene_ID(meta_info):
for l in meta_info.split(";"):
ll = l.split("=")
if ll[0] == "ID":
this_id = ll[1]
return this_id.split(":")[0]
elif ll[0] == "Name":
this_id = ll[1]
return this_id.split(":")[0]
def mk_anno(expr_file, f2_name,cols):
'''
Extracts annotations from a gff3 file,
adds the expression value and produces a bed-file.
'''
f1 = open(expr_file)
f2 = open(f2_name)
current_transcript = ""
current_gene = ""
exon_nr = 0
boundary_site_dic = {}
ex_dic = {}
i = 0
for l in f1:
fields = l.split()
if "gene_ID" == fields[0]:
i = i + 1
continue
# assignment
ex_ID = fields[0]
vals = []
val_mean = 0
for c in cols.split(","):
val_mean = val_mean + float(fields[int(c)])
vals.append(float(fields[int(c)]))
# Calc mean
val_mean = val_mean/len(vals)
ex_dic[ex_ID] = val_mean
for l in f2:
fields = l.split()
# assignment
chrom = fields[0]
start = (fields[1])
end = (fields[2])
# gene ID
ID = fields[3]
# transcript ID
ID = fields[3].split("_")[0].split(".")[0]
strand = fields[5]
try:
expression = "{0:.2f}".format(ex_dic[ID])
print "\t".join([chrom, start, end, fields[3], expression, strand])
except:
print "\t".join([chrom, start, end, fields[3], "NA", strand])
# Regions
# gene, 3utr, 5utr, cds, all, ncrna, lncrna
if __name__ == "__main__":
from optparse import OptionParser
parser = OptionParser()
parser.add_option("-1", action="store", type="string", dest="f1",default="", help="expression file")
parser.add_option("-2", action="store", type="string", dest="f2",default="", help="Annotation file")
parser.add_option("-c", action="store", type="string", dest="cols",default="", help="Columns to consider in expression file, 1-based. supple comma-separated list, more than 1 value will mean expression is averaged")
(options, args) = parser.parse_args()
mk_anno(options.f1, options.f2,options.cols)