Skip to content
This repository was archived by the owner on Apr 13, 2026. It is now read-only.

Commit e59241d

Browse files
authored
merging docker health monitor into main (#22)
* monitor and track docker health tracks two containers with various variables for each and stores them in influxDB 3 * fixed naming convention * modified READme, easier to understand * exposed an http endpoint for daq website fetch request * updated allowed origins * Update .gitignore * Edit .gitignore * Review fixes for docker-health-v2: remove dual-write, add scan duration metric - Drop _best_effort_write_health_to_influx from /api/health-status — health-monitor sidecar is the authoritative writer; frontend calls were creating duplicate points - Add depends_on: data-downloader-api to health-monitor in docker-compose so it doesn't log API errors on cold start - Replace meaningless runs_per_minute (run count / poll interval) with last_scan_duration_seconds, computed from started_at/finished_at already tracked in scanner_status.json — measures how long the slicks InfluxDB scan actually took - Expose last_scan_duration_seconds in /api/health-status response - Add health-monitor env vars to .env.example * Fix missing BackgroundTasks import removed during health-status cleanup * Updates to Data Downloader, allow past season scans * Bump slicks to 0.2.1 and fix ghost run accumulation in storage Old pre-slicks entries with non-round-hour timestamps would persist forever because merge_scanned_runs kept all vanished runs, not just ones with user notes. Now only preserves vanished entries that have a note, so noise artifacts are cleaned out on each fresh scan. * Fix CI slicks version typo: 2.0.1 -> 0.2.1
1 parent c7033f6 commit e59241d

17 files changed

Lines changed: 1210 additions & 29 deletions

File tree

.github/workflows/sandbox-integration.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ jobs:
2222
python-version: "3.11"
2323

2424
- name: Install slicks
25-
run: pip install "slicks>=2.0.1"
25+
run: pip install "slicks>=0.2.1"
2626

2727
- name: Discover sensors for September 2025
2828
env:

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,3 +219,7 @@ installer/slackbot/*.jpeg
219219

220220
# Generated CSV data files
221221
generated-days/
222+
223+
# Node dependencies
224+
node_modules/
225+
.claude/settings.local.json

installer/data-downloader/.env.example

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,12 @@ SENSOR_WINDOW_DAYS=7
1212
SENSOR_LOOKBACK_DAYS=30
1313
SCAN_INTERVAL_SECONDS=3600
1414
VITE_API_BASE_URL=http://localhost:8000
15-
ALLOWED_ORIGINS=http://localhost:3000,http://localhost:5173
15+
ALLOWED_ORIGINS=http://localhost:3000,http://localhost:5173,https://daq.westernformularacing.org
16+
17+
# Health monitor (optional — defaults work for standard docker-compose stack)
18+
INFLUXDB_HEALTH_DATABASE=monitoring
19+
HEALTH_MONITOR_INTERVAL_SECONDS=60
20+
HEALTH_MONITOR_INFLUXDB_CONTAINER=influxdb3
21+
HEALTH_MONITOR_SCANNER_CONTAINER=data-downloader-scanner
22+
HEALTH_MONITOR_SCANNER_API_URL=http://data-downloader-api:8000
23+
HEALTH_MONITOR_INFLUXDB_VOLUME_SUFFIX=influxdb3-data

installer/data-downloader/backend/app.py

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
from __future__ import annotations
22

3-
from datetime import datetime
3+
from datetime import datetime, timezone
4+
import logging
5+
from typing import List
6+
7+
import docker
48

59
from fastapi import BackgroundTasks, FastAPI, HTTPException
610
from fastapi.middleware.cors import CORSMiddleware
@@ -25,6 +29,7 @@ class DataQueryPayload(BaseModel):
2529

2630
settings = get_settings()
2731
service = DataDownloaderService(settings)
32+
logger = logging.getLogger(__name__)
2833

2934
app = FastAPI(title="DAQ Data Downloader API")
3035
app.add_middleware(
@@ -41,6 +46,36 @@ def healthcheck() -> dict:
4146
return {"status": "ok"}
4247

4348

49+
def _docker_container_running(container_name: str) -> bool:
50+
"""Return True if Docker container is in Running state."""
51+
try:
52+
docker_client = docker.from_env()
53+
container = docker_client.containers.get(container_name)
54+
return bool(container.attrs.get("State", {}).get("Running", False))
55+
except docker.errors.NotFound:
56+
return False
57+
except Exception as e:
58+
raise RuntimeError(f"Docker inspection failed for {container_name}: {e}") from e
59+
60+
61+
@app.get("/api/health-status")
62+
def health_status() -> dict:
63+
"""Container health derived from live Docker inspection."""
64+
try:
65+
scanner_status = service.get_scanner_status()
66+
now = datetime.now(timezone.utc).isoformat()
67+
return {
68+
"influxdb3": _docker_container_running("influxdb3"),
69+
"scanner": _docker_container_running("data-downloader-scanner"),
70+
"last_updated": now,
71+
"last_scan_duration_seconds": scanner_status.get("last_scan_duration_seconds"),
72+
}
73+
except HTTPException:
74+
raise
75+
except Exception as e:
76+
raise HTTPException(status_code=503, detail=str(e))
77+
78+
4479
@app.get("/api/seasons")
4580
def list_seasons() -> List[dict]:
4681
return service.get_seasons()
@@ -70,8 +105,9 @@ def save_note(key: str, payload: NotePayload, season: str | None = None) -> dict
70105

71106

72107
@app.post("/api/scan")
73-
def trigger_scan(background_tasks: BackgroundTasks) -> dict:
74-
background_tasks.add_task(service.run_full_scan, "manual")
108+
def trigger_scan(background_tasks: BackgroundTasks, season: str | None = None) -> dict:
109+
season_names = [season] if season else None
110+
background_tasks.add_task(service.run_full_scan, "manual", season_names)
75111
return {"status": "scheduled"}
76112

77113

installer/data-downloader/backend/config.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,18 @@ def _parse_origins(raw: str | None) -> List[str]:
1313

1414

1515
class SeasonConfig(BaseModel):
16-
name: str # e.g. "WFR25"
17-
year: int # e.g. 2025
16+
name: str # e.g. "WFR25"
17+
year: int # e.g. 2025
1818
database: str # e.g. "WFR25"
19-
color: str | None = None # e.g. "222 76 153"
19+
table: str # e.g. "WFR25" — InfluxDB table name inside the database
20+
color: str | None = None
2021

2122

2223
def _parse_seasons(raw: str | None) -> List[SeasonConfig]:
2324
"""Parse SEASONS env var: "WFR25:2025:222 76 153,WFR26:2026:..."."""
2425
if not raw:
2526
# Default fallback if not set
26-
return [SeasonConfig(name="WFR25", year=2025, database="WFR25", color="#DE4C99")]
27+
return [SeasonConfig(name="WFR25", year=2025, database="WFR25", table="WFR25", color="#DE4C99")]
2728

2829
seasons = []
2930
for part in raw.split(","):
@@ -45,13 +46,13 @@ def _parse_seasons(raw: str | None) -> List[SeasonConfig]:
4546

4647
color = parts[2] if len(parts) > 2 else None
4748

48-
# Assume DB name matches Season Name
49-
seasons.append(SeasonConfig(name=name, year=year, database=name, color=color))
49+
# DB and table name both match season name by convention (WFR25→WFR25, WFR26→WFR26)
50+
seasons.append(SeasonConfig(name=name, year=year, database=name, table=name, color=color))
5051
except ValueError:
5152
continue
5253

5354
if not seasons:
54-
return [SeasonConfig(name="WFR25", year=2025, database="WFR25")]
55+
return [SeasonConfig(name="WFR25", year=2025, database="WFR25", table="WFR25")]
5556

5657
# Sort by year descending (newest first)
5758
seasons.sort(key=lambda s: s.year, reverse=True)

installer/data-downloader/backend/periodic_worker.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,9 @@ async def run_worker():
2424

2525
while True:
2626
try:
27-
logging.info("Running scheduled scan...")
28-
service.run_full_scan(source="periodic")
27+
active_season = settings.seasons[0] # sorted descending by year; first = active
28+
logging.info(f"Running scheduled scan for active season: {active_season.name}")
29+
service.run_full_scan(source="periodic", season_names=[active_season.name])
2930
logging.info("Finished scheduled scan.")
3031

3132
if daily_time:

installer/data-downloader/backend/requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ fastapi==0.115.4
22
uvicorn[standard]==0.23.2
33
influxdb3-python==0.16.0
44
pydantic==2.9.2
5-
slicks>=0.2.0
5+
slicks>=0.2.1
6+
docker>=7.0.0

installer/data-downloader/backend/services.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,15 @@ def get_seasons(self) -> List[dict]:
8080
for s in self.settings.seasons
8181
]
8282

83-
def run_full_scan(self, source: str = "manual") -> Dict[str, dict]:
83+
def run_full_scan(self, source: str = "manual", season_names: list[str] | None = None) -> Dict[str, dict]:
8484
self.status_repo.mark_start(source)
8585
results = {}
8686
errors = []
87-
87+
8888
try:
89-
# Sort seasons by year descending to ensure most recent is scanned first
9089
sorted_seasons = sorted(self.settings.seasons, key=lambda s: s.year, reverse=True)
90+
if season_names is not None:
91+
sorted_seasons = [s for s in sorted_seasons if s.name in season_names]
9192
for season in sorted_seasons:
9293
try:
9394
logger.info(f"Scanning season {season.name} (DB: {season.database})...")
@@ -97,7 +98,7 @@ def run_full_scan(self, source: str = "manual") -> Dict[str, dict]:
9798
host=self.settings.influx_host,
9899
token=self.settings.influx_token,
99100
database=season.database,
100-
table=f"{self.settings.influx_schema}.{self.settings.influx_table}",
101+
table=f"{self.settings.influx_schema}.{season.table}",
101102
year=season.year,
102103
bin_size=self.settings.scanner_bin,
103104
include_counts=self.settings.scanner_include_counts,
@@ -116,7 +117,7 @@ def run_full_scan(self, source: str = "manual") -> Dict[str, dict]:
116117
token=self.settings.influx_token,
117118
database=season.database,
118119
schema=self.settings.influx_schema,
119-
table=self.settings.influx_table,
120+
table=season.table,
120121
window_days=self.settings.sensor_window_days,
121122
lookback_days=self.settings.sensor_lookback_days,
122123
fallback_start=fallback_start,
@@ -136,13 +137,18 @@ def run_full_scan(self, source: str = "manual") -> Dict[str, dict]:
136137
errors.append(f"{season.name}: {str(e)}")
137138
# Continue scanning other seasons even if one fails
138139

140+
total_runs = sum(v["runs"] for v in results.values())
141+
total_sensors = sum(v["sensors"] for v in results.values())
139142
if errors:
140143
self.status_repo.mark_finish(success=False, error="; ".join(errors))
141144
else:
142-
self.status_repo.mark_finish(success=True)
145+
self.status_repo.mark_finish(
146+
success=True,
147+
runs_count=total_runs,
148+
sensors_count=total_sensors,
149+
)
143150

144151
return results
145-
146152
except Exception as exc:
147153
self.status_repo.mark_finish(success=False, error=str(exc))
148154
raise

installer/data-downloader/backend/storage.py

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def merge_scanned_runs(self, scanned: List[dict]) -> dict:
6969

7070
# Keep runs that vanished but still have notes to preserve manual metadata
7171
for key, run in existing.items():
72-
if key not in merged:
72+
if key not in merged and run.get("note"):
7373
merged[key] = run
7474

7575
runs_list = sorted(
@@ -158,6 +158,11 @@ def __init__(self, data_dir: Path):
158158
"source": None,
159159
"last_result": None,
160160
"error": None,
161+
"last_successful_job_timestamp": None,
162+
"error_count": 0,
163+
"last_scan_runs_count": None,
164+
"last_scan_sensors_count": None,
165+
"last_scan_duration_seconds": None,
161166
}
162167
self.store = JSONStore(data_dir / "scanner_status.json", default)
163168

@@ -178,19 +183,41 @@ def mark_start(self, source: str) -> dict:
178183
self.store.write(payload)
179184
return payload
180185

181-
def mark_finish(self, success: bool, error: str | None = None) -> dict:
186+
def mark_finish(
187+
self,
188+
success: bool,
189+
error: str | None = None,
190+
runs_count: int | None = None,
191+
sensors_count: int | None = None,
192+
) -> dict:
182193
payload = self.store.read()
194+
now = now_iso()
183195
payload.update(
184196
{
185197
"scanning": False,
186-
"finished_at": now_iso(),
198+
"finished_at": now,
187199
"last_result": "success" if success else "error",
188200
}
189201
)
190202
if success:
191203
payload.pop("error", None)
204+
payload["last_successful_job_timestamp"] = now
205+
if runs_count is not None:
206+
payload["last_scan_runs_count"] = runs_count
207+
if sensors_count is not None:
208+
payload["last_scan_sensors_count"] = sensors_count
209+
started_at = payload.get("started_at")
210+
if started_at:
211+
try:
212+
duration = (
213+
datetime.fromisoformat(now) - datetime.fromisoformat(started_at)
214+
).total_seconds()
215+
payload["last_scan_duration_seconds"] = round(duration, 2)
216+
except ValueError:
217+
pass
192218
else:
193219
payload["error"] = error or "scan failed"
194-
payload["updated_at"] = now_iso()
220+
payload["error_count"] = payload.get("error_count", 0) + 1
221+
payload["updated_at"] = now
195222
self.store.write(payload)
196223
return payload

installer/data-downloader/frontend/src/App.tsx

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ export default function App() {
2424
const [noteDrafts, setNoteDrafts] = useState<Record<string, string>>({});
2525
const [savingKey, setSavingKey] = useState<string | null>(null);
2626
const [scanState, setScanState] = useState<ScanState>("idle");
27+
const [scanSeason, setScanSeason] = useState<string>("");
2728
const [downloaderSelection, setDownloaderSelection] = useState<DownloaderSelection | null>(null);
2829
const [scannerStatus, setScannerStatus] = useState<ScannerStatus | null>(null);
2930
const sensorsSectionRef = useRef<HTMLElement | null>(null);
@@ -43,6 +44,7 @@ export default function App() {
4344
if (seasonsList.length > 0 && !currentSeason) {
4445
currentSeason = seasonsList[0].name;
4546
setSelectedSeason(currentSeason);
47+
setScanSeason(currentSeason);
4648
}
4749
}
4850

@@ -114,7 +116,7 @@ export default function App() {
114116
updated_at: new Date().toISOString()
115117
}));
116118
try {
117-
await triggerScan();
119+
await triggerScan(scanSeason || undefined);
118120
setScanState("success");
119121
if (typeof window !== "undefined") {
120122
window.setTimeout(() => {
@@ -259,6 +261,18 @@ export default function App() {
259261
)}
260262

261263
<div className="actions">
264+
{seasons.length > 1 && (
265+
<select
266+
value={scanSeason}
267+
onChange={(e) => setScanSeason(e.target.value)}
268+
disabled={scanButtonDisabled}
269+
style={{ padding: "0.5rem", borderRadius: "4px", border: "1px solid #ccc", fontSize: "0.9rem" }}
270+
>
271+
{seasons.map(s => (
272+
<option key={s.name} value={s.name}>{s.name}</option>
273+
))}
274+
</select>
275+
)}
262276
<button className="button" onClick={handleScan} disabled={scanButtonDisabled}>
263277
{scanButtonLabel}
264278
</button>

0 commit comments

Comments
 (0)