-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathReadUIRL.py
More file actions
32 lines (30 loc) · 1.13 KB
/
ReadUIRL.py
File metadata and controls
32 lines (30 loc) · 1.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# Example file to export images from a web page
import re
import requests
from bs4 import BeautifulSoup
import csv
baseURL = "http://www.avajava.com/tutorials/lessons/"
with open("export.csv") as sourceFile:
csv_reader = csv.reader(sourceFile, delimiter=',')
line_count = 0
for row in csv_reader:
if line_count == 0:
print(f'Column names are {", ".join(row)}')
line_count = line_count + 1
else:
line_count = line_count + 1
print(f'\t{row[0]} and {row[1]}')
pageURL = baseURL + str(row[1])
response = requests.get(str(pageURL))
response.raw.decode_content = True
soup = BeautifulSoup(response.text, 'html.parser')
img_tags = soup.find_all('img')
urls = [img['src'] for img in img_tags]
for url in urls:
filename = re.search(r'/([\w_-]+[.](jpg|gif|png))$', url)
with open(filename.group(1), 'wb') as f:
if 'http' not in url:
url = '{}{}'.format(pageURL, url)
response = requests.get(url)
f.write(response.content)
# This is good