server-metadata/availabilities.py at main · hapi-server/server-metadata · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
# Usage:
#   python availability.py [server_id1,server_id2,...]

import os
import pandas

import utilrsw

from datetime import datetime, timedelta
from hapiclient import hapitime2datetime
from hapimeta import logger, data_dir, cli, server_error, server_error_write

log = logger('availabilities')

debug_layout = False
debug_svglinks = False
n_datasets = None # For debugging, only process the first n_datasets datasets
# Number of time range bars per plot
lines_per_plot = 50
# File formats to save. 'png' and 'svg' are supported.
savefig_fmts = ['svg', 'png']

dpi        = 300
fig_width  = 3840           # pixels
fig_width  = fig_width/dpi  # inches
fig_height = 2160           # pixels
fig_height = fig_height/dpi # inches

base_dir          = os.path.join(data_dir, 'availabilities') # Base directory
catalogs_all_file = f'{data_dir}/catalogs-all.pkl' # Input file

def write(fname, data, logger=None):
  try:
    log.info(f"Writing {fname}")
    utilrsw.write(fname, data, logger=logger)
  except Exception as e:
    log.error(f"Error writing {fname}: {e}")
    raise e


def plot(server, server_url, server_dir, title, datasets, starts, stops,
         lines_per_plot=lines_per_plot,
         fig_width=fig_width, fig_height=fig_height):

  import math
  import numpy

  import matplotlib.pyplot as plt
  # The following is needed to prevent Matplotlib from writing
  # text as paths. If text is written as paths, the SVG file will not
  # be searchable using CTRL+F.
  plt.rcParams['svg.fonttype'] = 'none'
  plt.rcParams['font.family'] = 'times new roman'

  from datetick import datetick

  special_chars = {
    'ts': ' ',       # Unicode thin space
    'rarrow': '→ ',  # Unicode right arrow
    'larrow': '←'    # Unicode left arrow
  }

  def newfig():
    plt.close('all')
    fig, ax = plt.subplots()
    fig.set_figheight(fig_height)
    fig.set_figwidth(fig_width)
    return fig, ax

  def config(ax, starts_min, stops_max, title=None, left_margin=None, right_margin=None):

    if title is not None:
      ax.text(0.5, 1.0, title, transform=ax.transAxes, va='top', ha='center', fontsize=10, backgroundcolor='white',)
      #ax.set_title(title)
    ax.set_xlim([starts_min, stops_max])
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.grid(axis='x', which='minor', alpha=0.5, linestyle=':')
    ax.grid(axis='x', which='major', color='k', alpha=0.5)
    ax.set_yticks(ticks=[])
    datetick('x')
    if left_margin is not None and right_margin is not None:
      plt.subplots_adjust(left=left_margin, right=right_margin)
    plt.subplots_adjust(top=1.0, bottom=0.03)

  def id_strip(id):
    for key, value in special_chars.items():
      id = id.strip().replace(value, '')
    return id

  def savefig(fn):

    if 'svg' in savefig_fmts:
      _fname = os.path.join(server_dir, "svg", f"{server}.{fn}.svg")
      if not os.path.exists(os.path.dirname(_fname)):
        os.makedirs(os.path.dirname(_fname))
      log.info(f'Writing {_fname}')
      plt.savefig(f"{_fname}")
      utilrsw.svg.svglinks(_fname, link_attribs={'target': '_blank'}, debug=debug_svglinks)

    if 'png' in savefig_fmts:
      _fname = os.path.join(server_dir, "png", f"{server}.{fn}.png")
      if not os.path.exists(os.path.dirname(_fname)):
        os.makedirs(os.path.dirname(_fname))
      log.info(f'Writing {_fname}')
      plt.savefig(f"{_fname}", dpi=dpi)

    return f"{server}.{fn}"

  def draw(ax, n, lines_per_plot, starts, stops, datasets, start_text, max_len=None):
    gid_bar = f"https://hapi-server.org/servers/#server={server}&dataset={id_strip(datasets[n])}"
    gid_txt = f"https://hapi-server.org/plot/?server={server_url}&dataset={id_strip(datasets[n])}&format=gallery&usecache=true&usedatacache=true&mode=thumb"

    y = lines_per_plot - n
    colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
    color = colors[n % len(colors)]
    line, = ax.plot([starts[n], stops[n]], [y, y], gid=gid_bar, linewidth=0.5)
    rect = plt.Rectangle(
              (starts[n], y - 0.5),
              stops[n] - starts[n],
              0.8,
              color=color, alpha=1, gid=gid_bar)
    rect.set_linewidth(0)
    ax.add_patch(rect)

    if max_len is None:
      label = datasets[n].rstrip()
    else:
      label = f'{datasets[n]:{max_len}s}'

    text_kwargs = {
      #'family': 'monospace', # Causes extra right padding in SVG
      'color': color,
      'verticalalignment': 'center',
      'size': 8,
      'gid': gid_txt,
      'bbox': dict(facecolor='white', alpha=0.5, pad=0, lw=0)
    }
    ax.text(stops[n], y, label, **text_kwargs)
    if start_text[n] is not None:
      text_kwargs['horizontalalignment'] = 'right'
      ax.text(starts[n], y, start_text[n], **text_kwargs)

  n_plots = math.ceil(len(datasets)/lines_per_plot)
  pad = max(1, math.ceil(math.log10(n_plots + 1)))
  stops_max = datetime.now() + timedelta(days=5*365)
  starts_min = datetime(1960, 1, 1, 0, 0, 0)
  max_len = 0
  start_text = []
  for ds in range(len(datasets)):
    datasets[ds] = f"{special_chars['ts']}{datasets[ds]}"
    if stops[ds] > stops_max:
      stops[ds] = stops_max
      datasets[ds] = f"{special_chars['rarrow']}{datasets[ds]}"
    if starts[ds] < starts_min:
      starts[ds] = starts_min
      start_text.append(special_chars['larrow'])
    else:
      start_text.append(None)
    max_len = max(max_len, len(datasets[ds]))

  fig, ax = newfig()
  for n in range(len(datasets)):
    draw(ax, n, lines_per_plot, starts, stops, datasets, start_text, max_len=max_len)

  config(ax, starts_min, stops_max)
  l, b, w, h = ax.get_position().bounds
  if debug_layout:
    file = savefig('all-before-tight-layout')
    print(f"Left margin: {l}")
    print(f"Bottom margin: {b}")
    print(f"Width: {w}")
    print(f"Height: {h}")
  fig.tight_layout()
  l, b, w, h = ax.get_position().bounds
  if debug_layout:
    file = savefig('all-after-tight-layout')
    print(f"Left margin: {l}")
    print(f"Bottom margin: {b}")
    print(f"Width: {w}")
    print(f"Height: {h}")
  # 2*l instead of l so we have the same margin on the right as on the left
  # (instead of zero on right)
  right_margin = w+l
  left_margin = l

  fn = 0
  files = []
  fig, ax = newfig()
  for n in range(len(datasets)):
    draw(ax, n, lines_per_plot, starts, stops, datasets, start_text)
    if (n + 1) % lines_per_plot == 0:
      fn = fn + 1
      fn_padded = f"{fn:0{pad}d}"
      title_ = title + f" | {fn}/{n_plots}"
      config(ax, starts_min, stops_max, title_, left_margin, right_margin)
      file = savefig(fn_padded)
      files.append(file)

      fig, ax = newfig()

  # Finish last plot, if needed
  if (n + 1) % lines_per_plot != 0:
    fn = fn + 1
    fn_padded = f"{fn:0{pad}d}"
    title_ = title + f" | {fn}/{n_plots}"
    config(ax, starts_min, stops_max, title_, left_margin, right_margin)
    file = savefig(fn_padded)
    files.append(file)

  return files


def html(files, server_dir, server):
  import base64

  # Create the HTML content with the embedded PNG data
  html_content = """
  <!DOCTYPE html>
  <html lang="en">
  <script>
  function searchKey() {
    if (navigator.platform.toUpperCase().startsWith("MAC")) {
      return "⌘+F";
    }
    return "Ctrl+F";
  }
  </script>
  <head>
    <style>
      /* Force scrollbar to show on OS-X (so user knows it is scrollable */
      /* https://simurai.com/blog/2011/07/26/webkit-scrollbar */
      /* Needed here for when this page is in an iframe */
      body::-webkit-scrollbar {
        -webkit-appearance: none;
        width: 7px;
        height: 7px;
      }
      body::-webkit-scrollbar-thumb {
          border-radius: 4px;
          background-color: rgba(0,0,0,.5);
          box-shadow: 0 0 1px rgba(255,255,255,.5);
      }
    </style>
    <link rel="icon" href="data:image/x-icon;base64,AAABAAEAEBAQAAEABAAoAQAAFgAAACgAAAAQAAAAIAAAAAEABAAAAAAAgAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAA/4QAAA0ODwAASP8Ab/8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACIiIgAAAAAAAAAAAAAAAAAAAAAAAAAAADMzMzMzMwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAARERERAAAAAAAAAAAAAAAAAAAAAAAAAAAEREREREREREAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD//wAA//8AAAP/AAD//wAA//8AAAAPAAD//wAA//8AAP//AAAA/wAA//8AAP//AAAAAAAA//8AAP//AAD//wAA">
    <script async src="https://www.googletagmanager.com/gtag/js?id=G-5X7EXZ3BBW"></script><script>window.dataLayer = window.dataLayer || [];function gtag(){dataLayer.push(arguments);} gtag("js", new Date());gtag("config", "G-5X7EXZ3BBW");</script>
    <meta http-equiv="Content-type" content="text/html;charset=UTF-8">
    <meta name="keywords" content="TITLE HAPI Heliophysics Data Availability UI">
    <meta name="description"
      content="HAPI Server Availability for TITLE; https://github.com/hapi-server/servers">
    <meta name="keywords" content="TITLE">
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>TITLE</title>
  </head>
  <body>
      Time range of datasets available from the <a href="https://hapi-server.org/servers/#server=TITLE" target="_blank">TITLE</a> HAPI server.
      <a href="https://hapi-server.org/meta/availabilities/TITLE/TITLE.csv" target="_blank">Time range data</a> |
      <a href="https://hapi-server.org/meta/availabilities/TITLE/" target="_blank">Plot files</a> |
      <a href="https://github.com/hapi-server/server-metadata" target="_blank">Plot generation code</a>
    SEARCH
    DIVS
  </body>
  </html>
  """

  search = """
    <br>
    <b>Search:</b>
    <ul style="margin-top:0.2em; margin-bottom:0.2em; padding-inline-start: 1.5em;">
      <li>Use <script>document.write(searchKey());</script> to search for a dataset.</li>
      <li>Click a dataset name to view information about dataset.</li>
      <li>Click a bar to view plots of parameters in dataset.</li>
    </ul>
  """

  # Remove leading two spaces from each line
  html_content = "\n".join([line[2:] for line in html_content.split("\n")])
  html_content = html_content[1:] # Remove first line break
  divs_svg = ""
  divs_png = ""
  file_svg = None
  file_png = None
  for file in files:
    if 'svg' in savefig_fmts:
      file_svg = os.path.join(server_dir, "svg", f"{file}.svg")
      with open(file_svg, "rb") as f:
        svg_data = f.read()
        divs_svg += svg_data.decode('utf-8')
      file = os.path.basename(file)
    if 'png' in savefig_fmts:
      file_png = os.path.join(server_dir, "png", f"{file}.png")
      with open(file_png, "rb") as f:
        png_data = f.read()
        png_base64 = base64.b64encode(png_data).decode('utf-8')
        divs_png += f'<img width="100%" src="data:image/png;base64,{png_base64}" alt="{file}">\n'

  html_content = html_content.replace("TITLE", server)

  if 'svg' in savefig_fmts and file_svg is not None:
    html_content_svg = html_content
    html_content_svg = html_content_svg.replace("DIVS", divs_svg)
    html_content_svg = html_content_svg.replace("SEARCH", search)
    fname = os.path.join(os.path.dirname(file_svg), f'{server}.html')
    write(fname, html_content_svg)

  if 'png' in savefig_fmts and file_png is not None:
    html_content_png = html_content
    html_content_png = html_content_png.replace("DIVS", divs_png)
    html_content_png = html_content_png.replace("SEARCH", "")
    fname = os.path.join(os.path.dirname(file_png), f'{server}.html')
    write(fname, html_content_png)


def process_server(server, catalog_all):

  def extract_time(info, key):
    if key not in info:
      server_error(server, dataset['id'], f"key '{key}' is not in info.", log)
      return None, None

    if info[key] is None:
      server_error(server, dataset['id'], f"info[{key}] not found.", log)
      return None, None

    if info[key].strip() == "":
      server_error(server, dataset['id'], f"info[{key}].strip() == ''", log)
      return None, None

    hapitime = info[key]
    try:
      dt = hapitime2datetime(hapitime, allow_missing_Z=True)
      dt = dt[0].replace(tzinfo=None)
    except Exception as e:
      import traceback
      trace = traceback.format_exc()
      msg = f"hapitime2datetime({hapitime}) returned:\n{trace}"
      server_error(server, dataset['id'], msg, log)
      return None, None

    return info[key], dt

  lines = []
  ids = []
  starts = []
  stops = []

  datasets = utilrsw.get_path(catalog_all, 'catalog/catalog', sep='/')
  if datasets is None:
    log.info(f"{server}: No datasets found in catalog")
    return None

  log.info(f"{server}: {len(datasets)} datasets")
  for dataset in datasets:

    if 'id' not in dataset:
      server_error(server, "_", "No 'id' key in dataset object", log)
      continue

    log.info(f"  Processing dataset: {dataset['id']}")

    if 'info' not in dataset:
      server_error(server, dataset['id'], "Missing /info response data.", log)
      continue

    info = dataset['info']

    startDate, startDate_datetime = extract_time(info, 'startDate')
    stopDate, stopDate_datetime = extract_time(info, 'stopDate')

    if startDate_datetime is not None and stopDate_datetime is not None:
      line_str = [server, dataset["id"], startDate, stopDate]
      log.info("    " + ", ".join(line_str))
      line = [server, dataset["id"], startDate_datetime, stopDate_datetime]
      lines.append(line)
      stops.append(stopDate_datetime)
      starts.append(startDate_datetime)
      ids.append(dataset['id'])

  df = pandas.DataFrame(lines, columns=["server", "dataset", "start", "stop"])

  server_dir = os.path.join(base_dir, server)
  fname = os.path.join(server_dir, f'{server}.csv')
  write(fname, df)

  if len(ids) == 0:
    log.info(f"{server}: No datasets with valid startDate and stopDate found in catalog")
    return df

  log.info("Plotting availabilities")

  server_url = catalog_all['about']['x_url']
  x_LastUpdate = catalog_all['catalog'].get('x_LastUpdate', '')
  title = f"{server} | {server_url} | {len(ids)} datasets | {x_LastUpdate}"

  if n_datasets is not None and len(ids) > n_datasets:
    # For debugging, only process the first n_datasets datasets
    ids = ids[:n_datasets]
    starts = starts[:n_datasets]
    stops = stops[:n_datasets]

  files = plot(server, server_url, server_dir, title, ids, starts, stops,
               lines_per_plot=lines_per_plot,
               fig_width=fig_width, fig_height=fig_height)

  for savefig_fmt in savefig_fmts:
    fname = os.path.join(server_dir, savefig_fmt, f"{server}.json")
    log.info(f"Writing {fname}")
    write(fname, files)

  html(files, server_dir, server)

  return df

catalogs_all = utilrsw.read(catalogs_all_file)

servers_only = cli()
if servers_only:
  log.info(f"Generating availability for {servers_only}")
else:
  log.info(f"Generating availability for all servers in {catalogs_all_file}")

servers = []
for server in catalogs_all.keys():
  if servers_only is not None and server not in servers_only:
    continue
  servers.append(server)

if len(servers) == 0:
  log.error(f"No servers to process. Possible servers: {catalogs_all.keys()}")
  exit(1)

dfs = []
for server in servers:
  df = process_server(server, catalogs_all[server])
  server_error_write(server, log)
  dfs.append(df)

dfs = pandas.concat([d for d in dfs if d is not None], ignore_index=True)
write(f"{base_dir}/availabilities.pkl", dfs)
write(f"{base_dir}/availabilities.csv", dfs)