From d1b3566607680353185bd2ebf90b4cca78744fd6 Mon Sep 17 00:00:00 2001 From: Aaron Gibson Date: Wed, 11 Mar 2026 13:35:33 +0100 Subject: [PATCH 1/9] add end-to-end tests with docker-compose.yml --- .github/workflows/test_end_to_end.yml | 58 +++++++++++++++++++++++++++ Dockerfile | 15 +++++++ tests/integration/data/.gitignore | 1 + tests/integration/docker-compose.yml | 46 +++++++++++++++++++++ 4 files changed, 120 insertions(+) create mode 100644 .github/workflows/test_end_to_end.yml create mode 100644 Dockerfile create mode 100644 tests/integration/data/.gitignore create mode 100644 tests/integration/docker-compose.yml diff --git a/.github/workflows/test_end_to_end.yml b/.github/workflows/test_end_to_end.yml new file mode 100644 index 00000000..e905c2b0 --- /dev/null +++ b/.github/workflows/test_end_to_end.yml @@ -0,0 +1,58 @@ +name: End to End Data Pipeline Test + +on: + push: + branches: + - main + +jobs: + build-and-run: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v6 + + - name: cache inputs and outputs + uses: actions/cache@v5 + with: + key: data-cache-${{ github.run_id }} + restore-keys: | + data-cache- + path: | + tests/integration/data + + - name: Install Podman + run: | + sudo apt update + sudo apt install -y podman podman-compose podman-docker + + - name: Run Docker Compose with Podman + run: | + cd tests/integration && podman-compose up -d + + - name: Wait for Downlaoder Container to Exit + run: | + # Replace `your_container_name` with the name of your container + while [ $(podman ps -q -f name=oonidata-pipeline | wc -l) -gt 0 ]; do + echo "Container is still running..." + sleep 5 # Wait for 5 seconds before checking again + done + echo "Container has exited." + + - name: Query and verify data from API + run: | + response=$(curl -s -o response.json -w "%{http_code}" http://localhost:8000/api/v1/measurements?since=2026-01-01T00%3A00%3A00&until=2026-01-02T00%3A00%3A00&order_by=measurement_start_time&order=DESC&offset=0&limit=100) + + if [ "$response" -eq 200 ]; then + echo "HTTP 200 OK: Data retrieved successfully." + cat response.json # Optional: Process or display the retrieved data + else + echo "Failed to retrieve data. HTTP Status Code: $response" + exit 1 + fi + + - name: Shutdown + run: | + cd tests/integration && podman-compose down + diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..6f0f1cae --- /dev/null +++ b/Dockerfile @@ -0,0 +1,15 @@ +# Use a specific Python image version (if compatible) +FROM python:3.11 + +# Set the working directory +WORKDIR /app + +# Copy oonidata and oonipipeline source files into the container +COPY oonidata ./oonidata +COPY oonipipeline ./oonipipeline + +# Install dependencies for both projects +RUN pip install ./oonidata && pip install ./oonipipeline + +# Set the default command for the container +CMD ["/bin/bash"] diff --git a/tests/integration/data/.gitignore b/tests/integration/data/.gitignore new file mode 100644 index 00000000..f935021a --- /dev/null +++ b/tests/integration/data/.gitignore @@ -0,0 +1 @@ +!.gitignore diff --git a/tests/integration/docker-compose.yml b/tests/integration/docker-compose.yml new file mode 100644 index 00000000..e00902b6 --- /dev/null +++ b/tests/integration/docker-compose.yml @@ -0,0 +1,46 @@ +version: '3.8' + +services: + downloader: + build: + context: ../../ + dockerfile: Dockerfile + image: oonidata_image + container_name: oonidata-pipeline + volumes: + - ./data:/data:Z + working_dir: /data + command: > + bash -c "oonidata sync --output-dir . --probe-cc IT --start-day 2026-01-01 --end-day 2026-01-02 --test-name webconnectivity && + oonipipeline run --create-tables --probe-cc IT --test-name signal --workflow-name observations --start-at 2026-01-01 --end-at 2026-01-02 + depends_on: + - clickhouse + environment: + CLICKHOUSE_URL: "http://testuser:testuser@clickhouse:9000/ooni" + + api: + container_name: oonidata-api + image: oonidata_image + command: "uvicorn --log-level debug --port 8000 --host 0.0.0.0 oonipipeline.api.main:app" + depends_on: + - clickhouse + ports: + - "8000:8000" # HTTP interface + environment: + CLICKHOUSE_URL: "http://testuser:testuser@clickhouse:9000/ooni" + + clickhouse: + image: clickhouse/clickhouse-server:latest + container_name: clickhouse_server + ports: + - "8123:8123" # HTTP interface + - "9000:9000" # Native interface + #volumes: + # - clickhouse_data:/var/lib/clickhouse + environment: + CLICKHOUSE_USER: "testuser" + CLICKHOUSE_PASSWORD: "testuser" + CLICKHOUSE_DB: "ooni" + +volumes: + clickhouse_data: From 19870b3884fde4b3b76237dda142a97db4a0adf6 Mon Sep 17 00:00:00 2001 From: Aaron Gibson Date: Wed, 11 Mar 2026 13:38:31 +0100 Subject: [PATCH 2/9] run workflow on this branch --- .github/workflows/test_end_to_end.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test_end_to_end.yml b/.github/workflows/test_end_to_end.yml index e905c2b0..7d5506b4 100644 --- a/.github/workflows/test_end_to_end.yml +++ b/.github/workflows/test_end_to_end.yml @@ -4,6 +4,7 @@ on: push: branches: - main + - add_end_to_end_tests jobs: build-and-run: From 66366ecdc7cdc48101644da323b2ac14f039fb80 Mon Sep 17 00:00:00 2001 From: Aaron Gibson Date: Wed, 11 Mar 2026 13:38:51 +0100 Subject: [PATCH 3/9] build oonipipeline as command --- oonipipeline/pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/oonipipeline/pyproject.toml b/oonipipeline/pyproject.toml index 1c8f92aa..a619df00 100644 --- a/oonipipeline/pyproject.toml +++ b/oonipipeline/pyproject.toml @@ -69,6 +69,9 @@ path = ".venv/" [tool.hatch.version] path = "src/oonipipeline/__about__.py" +[project.scripts] +oonipipeline = "oonipipeline.main:cli" + [tool.hatch.envs.default.scripts] oonipipeline = "python -m oonipipeline.main {args}" dataviz = "uvicorn oonipipeline.dataviz.main:app {args}" From 303b276e72abb93b9a4b4d6cc8ead05b9bf30ef0 Mon Sep 17 00:00:00 2001 From: Aaron Gibson Date: Wed, 18 Mar 2026 13:02:07 +0100 Subject: [PATCH 4/9] fix typo --- .github/workflows/test_end_to_end.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_end_to_end.yml b/.github/workflows/test_end_to_end.yml index 7d5506b4..ffef3911 100644 --- a/.github/workflows/test_end_to_end.yml +++ b/.github/workflows/test_end_to_end.yml @@ -32,7 +32,7 @@ jobs: run: | cd tests/integration && podman-compose up -d - - name: Wait for Downlaoder Container to Exit + - name: Wait for Downloader Container to Exit run: | # Replace `your_container_name` with the name of your container while [ $(podman ps -q -f name=oonidata-pipeline | wc -l) -gt 0 ]; do From 97629dc074ad464f79767c0855ed601b3a25c3f5 Mon Sep 17 00:00:00 2001 From: Aaron Gibson Date: Wed, 18 Mar 2026 13:02:38 +0100 Subject: [PATCH 5/9] add missing quote --- tests/integration/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/docker-compose.yml b/tests/integration/docker-compose.yml index e00902b6..a793116b 100644 --- a/tests/integration/docker-compose.yml +++ b/tests/integration/docker-compose.yml @@ -12,7 +12,7 @@ services: working_dir: /data command: > bash -c "oonidata sync --output-dir . --probe-cc IT --start-day 2026-01-01 --end-day 2026-01-02 --test-name webconnectivity && - oonipipeline run --create-tables --probe-cc IT --test-name signal --workflow-name observations --start-at 2026-01-01 --end-at 2026-01-02 + oonipipeline run --create-tables --probe-cc IT --test-name signal --workflow-name observations --start-at 2026-01-01 --end-at 2026-01-02" depends_on: - clickhouse environment: From 543856dec58174f027fd685946e347a676fdf7b3 Mon Sep 17 00:00:00 2001 From: Aaron Gibson Date: Wed, 18 Mar 2026 13:02:55 +0100 Subject: [PATCH 6/9] remove deprecated API endpoint --- .github/workflows/test_end_to_end.yml | 13 ------------- tests/integration/docker-compose.yml | 11 ----------- 2 files changed, 24 deletions(-) diff --git a/.github/workflows/test_end_to_end.yml b/.github/workflows/test_end_to_end.yml index ffef3911..1efca92f 100644 --- a/.github/workflows/test_end_to_end.yml +++ b/.github/workflows/test_end_to_end.yml @@ -41,19 +41,6 @@ jobs: done echo "Container has exited." - - name: Query and verify data from API - run: | - response=$(curl -s -o response.json -w "%{http_code}" http://localhost:8000/api/v1/measurements?since=2026-01-01T00%3A00%3A00&until=2026-01-02T00%3A00%3A00&order_by=measurement_start_time&order=DESC&offset=0&limit=100) - - if [ "$response" -eq 200 ]; then - echo "HTTP 200 OK: Data retrieved successfully." - cat response.json # Optional: Process or display the retrieved data - else - echo "Failed to retrieve data. HTTP Status Code: $response" - exit 1 - fi - - name: Shutdown run: | cd tests/integration && podman-compose down - diff --git a/tests/integration/docker-compose.yml b/tests/integration/docker-compose.yml index a793116b..cb8cd532 100644 --- a/tests/integration/docker-compose.yml +++ b/tests/integration/docker-compose.yml @@ -18,17 +18,6 @@ services: environment: CLICKHOUSE_URL: "http://testuser:testuser@clickhouse:9000/ooni" - api: - container_name: oonidata-api - image: oonidata_image - command: "uvicorn --log-level debug --port 8000 --host 0.0.0.0 oonipipeline.api.main:app" - depends_on: - - clickhouse - ports: - - "8000:8000" # HTTP interface - environment: - CLICKHOUSE_URL: "http://testuser:testuser@clickhouse:9000/ooni" - clickhouse: image: clickhouse/clickhouse-server:latest container_name: clickhouse_server From 7d8120accbdc1344e6e89089eca11e01bf229984 Mon Sep 17 00:00:00 2001 From: Aaron Gibson Date: Wed, 18 Mar 2026 13:04:00 +0100 Subject: [PATCH 7/9] add clickhouse_init.sql --- tests/integration/clickhouse_init.sql | 33 +++++++++++++++++++++++++++ tests/integration/docker-compose.yml | 4 ++-- 2 files changed, 35 insertions(+), 2 deletions(-) create mode 100644 tests/integration/clickhouse_init.sql diff --git a/tests/integration/clickhouse_init.sql b/tests/integration/clickhouse_init.sql new file mode 100644 index 00000000..ea8fc8c6 --- /dev/null +++ b/tests/integration/clickhouse_init.sql @@ -0,0 +1,33 @@ +CREATE TABLE ooni.fingerprints_dns +( + `name` String, + `scope` Enum8('nat' = 1, 'isp' = 2, 'prod' = 3, 'inst' = 4, 'vbw' = 5, 'fp' = 6), + `other_names` String, + `location_found` String, + `pattern_type` Enum8('full' = 1, 'prefix' = 2, 'contains' = 3, 'regexp' = 4), + `pattern` String, + `confidence_no_fp` UInt8, + `expected_countries` String, + `source` String, + `exp_url` String, + `notes` String +) +ENGINE = EmbeddedRocksDB +PRIMARY KEY name; + +CREATE TABLE ooni.fingerprints_http +( + `name` String, + `scope` Enum8('nat' = 1, 'isp' = 2, 'prod' = 3, 'inst' = 4, 'vbw' = 5, 'fp' = 6, 'injb' = 7, 'prov' = 8), + `other_names` String, + `location_found` String, + `pattern_type` Enum8('full' = 1, 'prefix' = 2, 'contains' = 3, 'regexp' = 4), + `pattern` String, + `confidence_no_fp` UInt8, + `expected_countries` String, + `source` String, + `exp_url` String, + `notes` String +) +ENGINE = EmbeddedRocksDB +PRIMARY KEY name; diff --git a/tests/integration/docker-compose.yml b/tests/integration/docker-compose.yml index cb8cd532..0d076387 100644 --- a/tests/integration/docker-compose.yml +++ b/tests/integration/docker-compose.yml @@ -24,8 +24,8 @@ services: ports: - "8123:8123" # HTTP interface - "9000:9000" # Native interface - #volumes: - # - clickhouse_data:/var/lib/clickhouse + volumes: + - ./clickhouse_init.sql:/docker-entrypoint-initdb.d/init.sql:Z environment: CLICKHOUSE_USER: "testuser" CLICKHOUSE_PASSWORD: "testuser" From 3e3664362d0febe36bbd66aa0db3ff3e7b0b304c Mon Sep 17 00:00:00 2001 From: Aaron Gibson Date: Wed, 18 Mar 2026 13:04:23 +0100 Subject: [PATCH 8/9] run fastpath_feeder --- tests/integration/docker-compose.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/integration/docker-compose.yml b/tests/integration/docker-compose.yml index 0d076387..e4cf250b 100644 --- a/tests/integration/docker-compose.yml +++ b/tests/integration/docker-compose.yml @@ -31,5 +31,17 @@ services: CLICKHOUSE_PASSWORD: "testuser" CLICKHOUSE_DB: "ooni" + fastpath: + image: ooni/fastpath:latest + container_name: fastpath_feeder + volumes: + - ./data:/data + - ./fastpath:/etc/ooni:Z + command: > + bash -c "sleep 10 && ./run_fastpath --debug --clickhouse-url "clickhouse://testuser:testuser@clickhouse:9000/ooni" --stdout" + depends_on: + - clickhouse + - downloader + volumes: clickhouse_data: From 4cf1ac79ae7e0269e384c65a340857549a4adf56 Mon Sep 17 00:00:00 2001 From: Aaron Gibson Date: Wed, 18 Mar 2026 13:28:05 +0100 Subject: [PATCH 9/9] update apache-airflow to 2.11.1 --- oonipipeline/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oonipipeline/pyproject.toml b/oonipipeline/pyproject.toml index a619df00..5f6d4450 100644 --- a/oonipipeline/pyproject.toml +++ b/oonipipeline/pyproject.toml @@ -32,7 +32,7 @@ dependencies = [ "opentelemetry-exporter-otlp-proto-grpc ~= 1.29.0", "uvicorn ~= 0.25.0", "pydantic-settings ~= 2.4.0", - "apache-airflow == 2.10.4" + "apache-airflow == 2.11.1" ] [project.optional-dependencies]