-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmatchbedsnpsstogenes.py
More file actions
70 lines (63 loc) · 2.21 KB
/
matchbedsnpsstogenes.py
File metadata and controls
70 lines (63 loc) · 2.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#requires interlap
#pip3 install interlap
#python3 matchpeakstogenes.py
from interlap import InterLap
import pandas as pd
#size of distance outside the peak to look
windowsize = 10000
#extract name from gff defline
def def_parse(astr):
dd = {}
a = astr.split(';')
for b in a:
c = b.split('=')
if len(c) < 2: continue
dd[c[0]] = c[1]
return dd
chr_tree = {}
fh = open("Zm-B73-REFERENCE-NAM-5.0_Zm00001eb.1.gff3") # file downloaded from https://download.maizegdb.org/Zm-B73-REFERENCE-NAM-5.0/
for x in fh:
if x[0] == '#': continue
y = x.strip().split('\t')
if len(y) < 2: continue
if y[2] != 'gene': continue
mychr = y[0]
mystart = int(y[3])
mystop = int(y[4])
dd = def_parse(y[-1])
myname = dd['ID']
if not mychr in chr_tree: chr_tree[mychr] = InterLap()
chr_tree[mychr].add((mystart,mystop,{"name":myname,"strand":y[6]}))
fh = open("WW-MM_bQTL.bed") # file containing bQTL
for x in fh:
y = x.strip().split('\t')
mychr = y[0].split('-')[1]
if not mychr in chr_tree:
continue
gene_dist_dict = {}
upstream_genes = chr_tree[mychr].find((int(y[1])-windowsize,int(y[1])))
for g in upstream_genes:
mystart,mystop,myinfodict = g
if not myinfodict["strand"] == '-': continue
gene_dist_dict[myinfodict["name"]] = int(y[1]) - mystop
if gene_dist_dict[myinfodict["name"]] < 0:
gene_dist_dict[myinfodict["name"]] = 0
downstream_genes = chr_tree[mychr].find((int(y[1]),int(y[1])+windowsize))
for g in downstream_genes:
mystart,mystop,myinfodict = g
if not myinfodict["strand"] == '+': continue
gene_dist_dict[myinfodict["name"]] = mystart - int(y[1])
if gene_dist_dict[myinfodict["name"]] < 0:
gene_dist_dict[myinfodict["name"]] = 0
plist = y[:]
if len(gene_dist_dict) == 0:
plist.extend(["intergenic","intergenic"])
else:
gene_list = list(gene_dist_dict)
gene_list.sort(key=lambda a:gene_dist_dict[a])
if gene_dist_dict[gene_list[0]] == 0:
plist.append("intragenic")
else:
plist.append(str(gene_dist_dict[gene_list[0]]))
plist.append(gene_list[0])
print("\t".join(plist))