UpdatesChecker/main.py at master · caleberocha/UpdatesChecker · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
import requests as r
from bs4 import BeautifulSoup as bs
import re
from datetime import datetime, timezone
import os
import os.path
from glob import glob
from urllib.parse import unquote as decode_uri
import json
from errors import *


def get_filename(url):
    return url.split("?")[0].split("#")[0].split("/")[-1]


def convert_date(date):
    # ''' Exemplo de data: 'Tue, 02 Apr 2019 07:52:22 GMT'
    return (
        datetime.strptime(date, "%a, %d %b %Y %H:%M:%S GMT")
        .replace(tzinfo=timezone.utc)
        .astimezone()
    )


def convert_timestamp_to_date(timestamp):
    return (
        datetime.fromtimestamp(timestamp)
        .replace(tzinfo=datetime.now(timezone.utc).astimezone().tzinfo)
        .astimezone()
    )


def get_properties(url):
    props = {"name": None, "date": None, "length": None, "url": None}

    with r.get(url, stream=True, timeout=5) as c:
        props["url"] = c.url
        try:
            props["date"] = convert_date(c.headers["last-modified"])
        except (KeyError, AttributeError):
            props["date"] = datetime.min.replace(tzinfo=timezone.utc)

        try:
            props["length"] = int(c.headers["content-length"])
        except (KeyError, AttributeError):
            props["length"] = -1

        try:
            props["name"] = re.search(
                'filename="?(.+?)(?:"|;|$)', c.headers["content-disposition"]
            ).group(1)
        except (KeyError, AttributeError):
            props["name"] = get_filename(c.url)

    return props


def get_downloadurl(url, search_type=None, search=None, ssl_verify=True):
    """
    Procura e retorna a URL de download direto do programa.

    :param url: URL da página onde será realizada na pesquisa. Caso já seja um link direto, o mesmo será retornado.

    :param search_type: Tipo de pesquisa. As opções disponíveis são select, select_re e find.

    :param search: Um dict cujo conteúdo depende de search_type.

    :returns: URL encontrada, que corresponde ao link de download direto.

    Se search_type for selector, o parâmetro search deve conter a chave selector.\n
    Exemplo: {'selector': 'a.external-link[target=_self]'}

    Se search_type for selector_re, o parâmetro search deve conter as chaves selector, attr e pattern.
    onde attr é o atributo onde será feita a pesquisa e pattern é uma expressão regular para pesquisa do texto do conteúdo da tag.\n
    Exemplo: {'selector': 'a.external-link[target=_self]', 'attr': 'href', 'pattern': 'Receitanet-[0-9]\\\.[0-9][0-9]\\\.exe'}

    Se search_type for find, o parâmetro search deve conter os filtros de pesquisa do BeautifulSoup.\n
    Exemplo: {'name': 'a', 'string': '\\nWindows Off-line'}
    """
    with r.get(url, stream=True, verify=ssl_verify) as rs:
        if rs.headers["content-type"].startswith("application"):
            return url

        if (
            not rs.headers["content-type"].startswith("text")
            or search_type is None
            or search is None
        ):
            return None

        site = bs(rs.content, features="html.parser")

        if search_type == "selector":
            for s in site.select(search["selector"]):
                if s.has_attr("href"):
                    return s["href"]
            # return [s['href'] for s in site.select(search) if s.has_attr('href')]
        elif search_type == "selector_re":
            # m = []
            for result in site.select(search["selector"]):
                if search["attr"] in ("text", "string"):
                    val = result.string
                else:
                    val = result[search["attr"]]
                if re.search(search["pattern"], val):
                    return result["href"]
                    # m.append(result['href'])
            # return m
        elif search_type == "find":
            # return [s['href'] for s in site.find_all(**search) if s.has_attr('href')]
            for s in site.find_all(**search):
                if s.has_attr("href"):
                    return s["href"]

    raise UrlNotFoundError('Não foi encontrada URL de download')


def find_update(current_file, update_url):
    update_props = get_properties(update_url)
    if update_props is None:
        raise UrlNotAvailableError("URL de atualização não disponível")

    files = sorted(glob(current_file), key=os.path.getmtime, reverse=True)
    if len(files) == 0:
        return {"current": None, "update": update_props}

    current_file_length = os.path.getsize(files[0])
    current_file_date = convert_timestamp_to_date(os.path.getmtime(files[0]))
    if update_props["date"] > current_file_date or (
        update_props["date"] <= current_file_date
        and update_props["length"] != current_file_length
    ):
        return {
            "current": {
                "name": files[0],
                "date": current_file_date,
                "length": current_file_length,
            },
            "update": update_props,
        }

    return {
        "current": {
            "name": files[0],
            "date": current_file_date,
            "length": current_file_length,
        },
        "update": None,
    }


def check_update(
    name,
    filepath,
    update_page,
    search_url_type=None,
    search_url_params=None,
    ssl_verify=True,
):
    if name is not None:
        print("Verificando " + name)

    try:
        d_url = get_downloadurl(update_page, search_url_type, search_url_params, ssl_verify)
        d_url = (
            d_url
            if re.search(r"(?:https?|ftp):\/\/.+", d_url)
            else update_page + "/" + d_url
        )
        filepath = os.path.abspath(os.path.expandvars(filepath))
        upd = find_update(filepath, d_url)
    except Exception as e:
        print("ERRO: {}".format(e))
        return None

    if upd["current"] is None and upd["update"] is None:
        print("Algo de errado não está certo")
        return None

    if upd["current"] is not None:
        print(
            "Programa encontrado: {}, {}, {}".format(
                upd["current"]["name"],
                upd["current"]["date"].strftime("%d/%m/%Y %H:%M:%S"),
                upd["current"]["length"],
            )
        )
    else:
        print("Programa não encontrado")

    if upd["update"] is not None:
        print(
            "Atualização disponível: {}, {}, {}, {}".format(
                decode_uri(upd["update"]["name"]),
                upd["update"]["date"].strftime("%d/%m/%Y %H:%M:%S"),
                upd["update"]["length"],
                upd["update"]["url"],
            )
        )
    elif upd["current"] is not None:
        print("Programa atualizado")

    print()


if __name__ == "__main__":
    with open("programs.json", "r") as f:
        updates_to_check = json.load(f)

    for uc in updates_to_check["programs"]:
        try:
            ssl_verify = uc["ssl_verify"]
        except KeyError:
            ssl_verify = True

        check_update(
            uc["name"],
            updates_to_check["root_directory"] + "/" + uc["file"],
            uc["url"],
            uc["search_type"],
            uc["search_params"],
            ssl_verify,
        )


# print("Verificando LibreOffice")
# upd_page = get_downloadurl(
#     "https://tdf.c3sl.ufpr.br/libreoffice/stable/",
#     "selector_re",
#     {
#         "selector": "tr:nth-last-child(2) > td:nth-child(2) > a",
#         "attr": "href",
#         "pattern": r"([0-9]?)\.([0-9]?)\.([0-9]?)",
#     },
# )
# if upd_page is None:
#     print("URL de atualização não disponível")
# else:
#     upd_page = "https://tdf.c3sl.ufpr.br/libreoffice/stable/" + upd_page + "win/x86"
#     check_update(
#         None,
#         updates_to_check["root_directory"] + "/LibreOffice/LibreOffice_*_Win_x86.msi",
#         upd_page,
#         "selector_re",
#         {"selector": "a", "attr": "href", "pattern": r"Win_x86.msi$"},
#     )