-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbrowserShotsFromSessionTextFile.py
More file actions
executable file
·88 lines (74 loc) · 3.42 KB
/
browserShotsFromSessionTextFile.py
File metadata and controls
executable file
·88 lines (74 loc) · 3.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/bin/env python
import argparse
import requests
from bs4 import BeautifulSoup
import re
import sys
import os
def get_browsershot(server_url, session_file, position_string, ofname):
if "genometest.gs.washington.edu" in server_url:
hgsid = "?hgsid=100000"
else:
hgsid = ""
url = "".join([server_url, "/cgi-bin/hgTracks", hgsid, "?hgS_doLoadUrl=submit&hgS_loadUrlName=", session_file, "&hgt.psOutput=on&pix=2000", position_string])
page = requests.get(url)
print(url)
if page.status_code != requests.codes.ok:
print ("Invalid page URL: %s\n" % url)
print ("Make sure your session file is globally readable and in a web-accessible directory\n")
sys.exit(1)
soup = BeautifulSoup(page.text, "html.parser")
relative_url = None
for entry in soup.find_all(href=re.compile("pdf")):
if entry.parent.find(text=re.compile("the current browser graphic in PDF")) is not None:
relative_url = entry.get("href")
break
if relative_url is None:
print ("Could not find browsershot pdf at %s\n" % url)
print ("Make sure your session file is globally readable and in a web-accessible directory\n")
sys.exit(1)
pdf_url = server_url + relative_url.replace("../", "/")
print (ofname)
with open(ofname, "wb") as outfile:
r = requests.get(pdf_url)
if r.status_code == requests.codes.ok:
outfile.write(r.content)
else:
print (r.headers)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
# group = parser.add_mutually_exclusive_group(required=True)
parser.add_argument("--regions_file", type = argparse.FileType("r"), help="Gets browsershots for multiple regions. Must be a path to tab-delimited file with list of region, outfile name, and session name (chr,start,end,ofname,sessionname).")
parser.add_argument("--region", default = None, nargs=5, metavar=("chr", "start", "end", "outfile", "session_file"), help="Get browsershot for single region.")
parser.add_argument("--region_highlight", default = None, nargs=3, metavar=("chr", "start", "end"), help="Highlight a region in the browsershot.")
parser.add_argument("--tempSessionFile", default = None, help="path to a tempSessionFile.")
parser.add_argument("--server_url", default="https://genome.gs.washington.edu", help="Server URL (May require username and password)")
args = parser.parse_args()
if args.regions_file is not None:
regions = []
for line in args.regions_file:
regions.append(line.rstrip().split())
for region in regions:
position_string = "&position=%s:%s-%s" % (region[0], region[1], region[2])
outfile = region[3]
session_file = os.path.abspath(region[4])
get_browsershot(args.server_url, session_file, position_string, outfile)
else:
position = "%s:%s-%s" % tuple(args.region[0:3])
outfile = args.region[3]
session_file = os.path.abspath(args.region[4])
position_string = "&position=" + position
if args.region_highlight is not None and args.tempSessionFile is not None:
region_highlight = "%s:%s-%s" % tuple(args.region_highlight) + "#ffeda0"
with open(session_file) as fin , open(args.tempSessionFile, "w") as fout:
for line in fin:
if line.startswith("db"):
db = line.strip().split()[1]
region_highlight = db + "." + region_highlight
fout.write(line)
elif line.startswith("highlight"):
fout.write("highlight " + region_highlight + "\n")
else:
fout.write(line)
session_file = args.tempSessionFile
get_browsershot(args.server_url, session_file, position_string, outfile)