diff --git a/.github/workflows/test_end_to_end.yml b/.github/workflows/test_end_to_end.yml new file mode 100644 index 00000000..1efca92f --- /dev/null +++ b/.github/workflows/test_end_to_end.yml @@ -0,0 +1,46 @@ +name: End to End Data Pipeline Test + +on: + push: + branches: + - main + - add_end_to_end_tests + +jobs: + build-and-run: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v6 + + - name: cache inputs and outputs + uses: actions/cache@v5 + with: + key: data-cache-${{ github.run_id }} + restore-keys: | + data-cache- + path: | + tests/integration/data + + - name: Install Podman + run: | + sudo apt update + sudo apt install -y podman podman-compose podman-docker + + - name: Run Docker Compose with Podman + run: | + cd tests/integration && podman-compose up -d + + - name: Wait for Downloader Container to Exit + run: | + # Replace `your_container_name` with the name of your container + while [ $(podman ps -q -f name=oonidata-pipeline | wc -l) -gt 0 ]; do + echo "Container is still running..." + sleep 5 # Wait for 5 seconds before checking again + done + echo "Container has exited." + + - name: Shutdown + run: | + cd tests/integration && podman-compose down diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..6f0f1cae --- /dev/null +++ b/Dockerfile @@ -0,0 +1,15 @@ +# Use a specific Python image version (if compatible) +FROM python:3.11 + +# Set the working directory +WORKDIR /app + +# Copy oonidata and oonipipeline source files into the container +COPY oonidata ./oonidata +COPY oonipipeline ./oonipipeline + +# Install dependencies for both projects +RUN pip install ./oonidata && pip install ./oonipipeline + +# Set the default command for the container +CMD ["/bin/bash"] diff --git a/oonipipeline/pyproject.toml b/oonipipeline/pyproject.toml index 1c8f92aa..5f6d4450 100644 --- a/oonipipeline/pyproject.toml +++ b/oonipipeline/pyproject.toml @@ -32,7 +32,7 @@ dependencies = [ "opentelemetry-exporter-otlp-proto-grpc ~= 1.29.0", "uvicorn ~= 0.25.0", "pydantic-settings ~= 2.4.0", - "apache-airflow == 2.10.4" + "apache-airflow == 2.11.1" ] [project.optional-dependencies] @@ -69,6 +69,9 @@ path = ".venv/" [tool.hatch.version] path = "src/oonipipeline/__about__.py" +[project.scripts] +oonipipeline = "oonipipeline.main:cli" + [tool.hatch.envs.default.scripts] oonipipeline = "python -m oonipipeline.main {args}" dataviz = "uvicorn oonipipeline.dataviz.main:app {args}" diff --git a/tests/integration/clickhouse_init.sql b/tests/integration/clickhouse_init.sql new file mode 100644 index 00000000..ea8fc8c6 --- /dev/null +++ b/tests/integration/clickhouse_init.sql @@ -0,0 +1,33 @@ +CREATE TABLE ooni.fingerprints_dns +( + `name` String, + `scope` Enum8('nat' = 1, 'isp' = 2, 'prod' = 3, 'inst' = 4, 'vbw' = 5, 'fp' = 6), + `other_names` String, + `location_found` String, + `pattern_type` Enum8('full' = 1, 'prefix' = 2, 'contains' = 3, 'regexp' = 4), + `pattern` String, + `confidence_no_fp` UInt8, + `expected_countries` String, + `source` String, + `exp_url` String, + `notes` String +) +ENGINE = EmbeddedRocksDB +PRIMARY KEY name; + +CREATE TABLE ooni.fingerprints_http +( + `name` String, + `scope` Enum8('nat' = 1, 'isp' = 2, 'prod' = 3, 'inst' = 4, 'vbw' = 5, 'fp' = 6, 'injb' = 7, 'prov' = 8), + `other_names` String, + `location_found` String, + `pattern_type` Enum8('full' = 1, 'prefix' = 2, 'contains' = 3, 'regexp' = 4), + `pattern` String, + `confidence_no_fp` UInt8, + `expected_countries` String, + `source` String, + `exp_url` String, + `notes` String +) +ENGINE = EmbeddedRocksDB +PRIMARY KEY name; diff --git a/tests/integration/data/.gitignore b/tests/integration/data/.gitignore new file mode 100644 index 00000000..f935021a --- /dev/null +++ b/tests/integration/data/.gitignore @@ -0,0 +1 @@ +!.gitignore diff --git a/tests/integration/docker-compose.yml b/tests/integration/docker-compose.yml new file mode 100644 index 00000000..e4cf250b --- /dev/null +++ b/tests/integration/docker-compose.yml @@ -0,0 +1,47 @@ +version: '3.8' + +services: + downloader: + build: + context: ../../ + dockerfile: Dockerfile + image: oonidata_image + container_name: oonidata-pipeline + volumes: + - ./data:/data:Z + working_dir: /data + command: > + bash -c "oonidata sync --output-dir . --probe-cc IT --start-day 2026-01-01 --end-day 2026-01-02 --test-name webconnectivity && + oonipipeline run --create-tables --probe-cc IT --test-name signal --workflow-name observations --start-at 2026-01-01 --end-at 2026-01-02" + depends_on: + - clickhouse + environment: + CLICKHOUSE_URL: "http://testuser:testuser@clickhouse:9000/ooni" + + clickhouse: + image: clickhouse/clickhouse-server:latest + container_name: clickhouse_server + ports: + - "8123:8123" # HTTP interface + - "9000:9000" # Native interface + volumes: + - ./clickhouse_init.sql:/docker-entrypoint-initdb.d/init.sql:Z + environment: + CLICKHOUSE_USER: "testuser" + CLICKHOUSE_PASSWORD: "testuser" + CLICKHOUSE_DB: "ooni" + + fastpath: + image: ooni/fastpath:latest + container_name: fastpath_feeder + volumes: + - ./data:/data + - ./fastpath:/etc/ooni:Z + command: > + bash -c "sleep 10 && ./run_fastpath --debug --clickhouse-url "clickhouse://testuser:testuser@clickhouse:9000/ooni" --stdout" + depends_on: + - clickhouse + - downloader + +volumes: + clickhouse_data: