-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstream_loader.py
More file actions
65 lines (48 loc) · 1.77 KB
/
stream_loader.py
File metadata and controls
65 lines (48 loc) · 1.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/python3
# -*- coding: UTF-8 -*-
import logging
import threading
from queue import Queue
import os
import urllib.request
class Downloader(threading.Thread):
"""Stream file downloader"""
def __init__(self, number: int, queue: Queue):
threading.Thread.__init__(self)
self.number = number
self.queue = queue
def run(self):
while True:
url = self.queue.get()
logging.info(f'Thread {self.number}: %s', f'got URL - {url}')
self.download_file(url)
self.queue.task_done()
def download_file(self, url: str):
handle = urllib.request.urlopen(url)
file_name = os.path.basename(url)
logging.info(f'Thread {self.number}: %s', f'starts to downloading file: {file_name}')
with open(file_name, 'wb') as f:
while True:
chunk = handle.read(1024)
if not chunk:
logging.info(f'Thread {self.number}: %s', f'file {file_name} was downloaded')
break
f.write(chunk)
def main(urls):
queue = Queue()
for number in range(len(urls)):
t = Downloader(number, queue)
t.setDaemon(True)
t.start()
for url in urls:
queue.put(url)
# waiting for the completion of the queue
queue.join()
if __name__ == '__main__':
logging.basicConfig(format='%(asctime)s: %(message)s', level=logging.INFO, datefmt='%H:%M:%S')
urls = ['http://www.irs.gov/pub/irs-pdf/f1040.pdf',
'https://www.irs.gov/pub/irs-prior/f1040a--2015.pdf',
'https://www.irs.gov/pub/irs-prior/i1040a--2015.pdf',
'https://www.irs.gov/pub/irs-prior/i1040a--2016.pdf',
'https://www.irs.gov/pub/irs-prior/i1040a--2017.pdf']
main(urls)