Skip to content

Commit c68a836

Browse files
Update tests to reduce amount of times py_api fixture is used (#293)
Working towards new guidelines for writing tests that provides more structure around when to use the `py_api` fixture versus calling the methods directly. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent fde90bf commit c68a836

7 files changed

Lines changed: 631 additions & 590 deletions

File tree

.github/workflows/tests.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ on:
66
pull_request:
77
paths:
88
- 'src/**'
9+
- 'tests/**'
910
- 'docker/**'
1011
- 'docker-compose.yaml'
1112
- 'pyproject.toml'

tests/routers/openml/dataset_tag_test.py

Lines changed: 53 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@
44
import pytest
55
from sqlalchemy.ext.asyncio import AsyncConnection
66

7-
from core.errors import AuthenticationFailedError, TagAlreadyExistsError
7+
from core.errors import TagAlreadyExistsError
88
from database.datasets import get_tags_for
9+
from database.users import User
10+
from routers.openml.datasets import tag_dataset
911
from tests import constants
10-
from tests.users import ApiKey
12+
from tests.users import ADMIN_USER, OWNER_USER, SOME_USER, ApiKey
1113

1214

1315
@pytest.mark.parametrize(
@@ -22,73 +24,71 @@ async def test_dataset_tag_rejects_unauthorized(key: ApiKey, py_api: httpx.Async
2224
json={"data_id": next(iter(constants.PRIVATE_DATASET_ID)), "tag": "test"},
2325
)
2426
assert response.status_code == HTTPStatus.UNAUTHORIZED
25-
assert response.headers["content-type"] == "application/problem+json"
26-
error = response.json()
27-
assert error["type"] == AuthenticationFailedError.uri
28-
assert error["code"] == "103"
27+
28+
29+
@pytest.mark.parametrize(
30+
"tag",
31+
["", "h@", " a", "a" * 65],
32+
ids=["too short", "@", "space", "too long"],
33+
)
34+
async def test_dataset_tag_invalid_tag_is_rejected(
35+
# Constraints for the tag are handled by FastAPI
36+
tag: str,
37+
py_api: httpx.AsyncClient,
38+
) -> None:
39+
new = await py_api.post(
40+
f"/datasets/tag?api_key={ApiKey.ADMIN}",
41+
json={"data_id": 1, "tag": tag},
42+
)
43+
44+
assert new.status_code == HTTPStatus.UNPROCESSABLE_ENTITY
45+
assert new.json()["detail"][0]["loc"] == ["body", "tag"]
46+
47+
48+
# ── Direct call tests: tag_dataset ──
2949

3050

3151
@pytest.mark.mut
3252
@pytest.mark.parametrize(
33-
"key",
34-
[ApiKey.ADMIN, ApiKey.SOME_USER, ApiKey.OWNER_USER],
53+
"user",
54+
[ADMIN_USER, SOME_USER, OWNER_USER],
3555
ids=["administrator", "non-owner", "owner"],
3656
)
37-
async def test_dataset_tag(
38-
key: ApiKey, expdb_test: AsyncConnection, py_api: httpx.AsyncClient
39-
) -> None:
57+
async def test_dataset_tag(user: User, expdb_test: AsyncConnection) -> None:
4058
dataset_id, tag = next(iter(constants.PRIVATE_DATASET_ID)), "test"
41-
response = await py_api.post(
42-
f"/datasets/tag?api_key={key}",
43-
json={"data_id": dataset_id, "tag": tag},
59+
result = await tag_dataset(
60+
data_id=dataset_id,
61+
tag=tag,
62+
user=user,
63+
expdb_db=expdb_test,
4464
)
45-
assert response.status_code == HTTPStatus.OK
46-
assert response.json() == {"data_tag": {"id": str(dataset_id), "tag": [tag]}}
65+
assert result == {"data_tag": {"id": str(dataset_id), "tag": [tag]}}
4766

4867
tags = await get_tags_for(id_=dataset_id, connection=expdb_test)
4968
assert tag in tags
5069

5170

5271
@pytest.mark.mut
53-
async def test_dataset_tag_returns_existing_tags(py_api: httpx.AsyncClient) -> None:
54-
dataset_id, tag = 1, "test"
55-
response = await py_api.post(
56-
f"/datasets/tag?api_key={ApiKey.ADMIN}",
57-
json={"data_id": dataset_id, "tag": tag},
72+
async def test_dataset_tag_returns_existing_tags(expdb_test: AsyncConnection) -> None:
73+
dataset_id, tag = 1, "test" # Dataset 1 already is tagged with 'study_14'
74+
result = await tag_dataset(
75+
data_id=dataset_id,
76+
tag=tag,
77+
user=ADMIN_USER,
78+
expdb_db=expdb_test,
5879
)
59-
assert response.status_code == HTTPStatus.OK
60-
assert response.json() == {"data_tag": {"id": str(dataset_id), "tag": ["study_14", tag]}}
80+
assert result == {"data_tag": {"id": str(dataset_id), "tag": ["study_14", tag]}}
6181

6282

6383
@pytest.mark.mut
64-
async def test_dataset_tag_fails_if_tag_exists(py_api: httpx.AsyncClient) -> None:
84+
async def test_dataset_tag_fails_if_tag_exists(expdb_test: AsyncConnection) -> None:
6585
dataset_id, tag = 1, "study_14" # Dataset 1 already is tagged with 'study_14'
66-
response = await py_api.post(
67-
f"/datasets/tag?api_key={ApiKey.ADMIN}",
68-
json={"data_id": dataset_id, "tag": tag},
69-
)
70-
assert response.status_code == HTTPStatus.CONFLICT
71-
assert response.headers["content-type"] == "application/problem+json"
72-
error = response.json()
73-
assert error["type"] == TagAlreadyExistsError.uri
74-
assert error["code"] == "473"
75-
assert str(dataset_id) in error["detail"]
76-
assert tag in error["detail"]
77-
78-
79-
@pytest.mark.parametrize(
80-
"tag",
81-
["", "h@", " a", "a" * 65],
82-
ids=["too short", "@", "space", "too long"],
83-
)
84-
async def test_dataset_tag_invalid_tag_is_rejected(
85-
tag: str,
86-
py_api: httpx.AsyncClient,
87-
) -> None:
88-
new = await py_api.post(
89-
f"/datasets/tag?api_key={ApiKey.ADMIN}",
90-
json={"data_id": 1, "tag": tag},
91-
)
92-
93-
assert new.status_code == HTTPStatus.UNPROCESSABLE_ENTITY
94-
assert new.json()["detail"][0]["loc"] == ["body", "tag"]
86+
with pytest.raises(TagAlreadyExistsError) as e:
87+
await tag_dataset(
88+
data_id=dataset_id,
89+
tag=tag,
90+
user=ADMIN_USER,
91+
expdb_db=expdb_test,
92+
)
93+
assert str(dataset_id) in e.value.detail
94+
assert tag in e.value.detail
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
"""Tests for the GET /datasets/features/{dataset_id} endpoint."""
2+
3+
from http import HTTPStatus
4+
5+
import httpx
6+
import pytest
7+
from sqlalchemy.ext.asyncio import AsyncConnection
8+
9+
from core.errors import DatasetNoAccessError, DatasetNotFoundError, DatasetProcessingError
10+
from database.users import User
11+
from routers.openml.datasets import get_dataset_features
12+
from tests.users import ADMIN_USER, DATASET_130_OWNER
13+
14+
15+
async def test_get_features_via_api(py_api: httpx.AsyncClient) -> None:
16+
response = await py_api.get("/datasets/features/4")
17+
assert response.status_code == HTTPStatus.OK
18+
assert response.json() == [
19+
{
20+
"index": 0,
21+
"name": "left-weight",
22+
"data_type": "numeric",
23+
"is_target": False,
24+
"is_ignore": False,
25+
"is_row_identifier": False,
26+
"number_of_missing_values": 0,
27+
},
28+
{
29+
"index": 1,
30+
"name": "left-distance",
31+
"data_type": "numeric",
32+
"is_target": False,
33+
"is_ignore": False,
34+
"is_row_identifier": False,
35+
"number_of_missing_values": 0,
36+
},
37+
{
38+
"index": 2,
39+
"name": "right-weight",
40+
"data_type": "numeric",
41+
"is_target": False,
42+
"is_ignore": False,
43+
"is_row_identifier": False,
44+
"number_of_missing_values": 0,
45+
},
46+
{
47+
"index": 3,
48+
"name": "right-distance",
49+
"data_type": "numeric",
50+
"is_target": False,
51+
"is_ignore": False,
52+
"is_row_identifier": False,
53+
"number_of_missing_values": 0,
54+
},
55+
{
56+
"index": 4,
57+
"name": "class",
58+
"data_type": "nominal",
59+
"nominal_values": ["B", "L", "R"],
60+
"is_target": True,
61+
"is_ignore": False,
62+
"is_row_identifier": False,
63+
"number_of_missing_values": 0,
64+
},
65+
]
66+
67+
68+
async def test_dataset_features_with_ontology(expdb_test: AsyncConnection) -> None:
69+
features = await get_dataset_features(dataset_id=11, user=None, expdb=expdb_test)
70+
by_index = {f.index: f for f in features}
71+
assert by_index[1].ontology == ["https://en.wikipedia.org/wiki/Service_(motor_vehicle)"]
72+
assert by_index[2].ontology == [
73+
"https://en.wikipedia.org/wiki/Car_door",
74+
"https://en.wikipedia.org/wiki/Door",
75+
]
76+
assert by_index[3].ontology == [
77+
"https://en.wikipedia.org/wiki/Passenger_vehicles_in_the_United_States"
78+
]
79+
assert by_index[0].ontology is None
80+
assert by_index[4].ontology is None
81+
82+
83+
async def test_dataset_features_no_access(expdb_test: AsyncConnection) -> None:
84+
with pytest.raises(DatasetNoAccessError):
85+
await get_dataset_features(dataset_id=130, user=None, expdb=expdb_test)
86+
87+
88+
@pytest.mark.parametrize("user", [ADMIN_USER, DATASET_130_OWNER])
89+
async def test_dataset_features_access_to_private(user: User, expdb_test: AsyncConnection) -> None:
90+
features = await get_dataset_features(dataset_id=130, user=user, expdb=expdb_test)
91+
assert isinstance(features, list)
92+
93+
94+
async def test_dataset_features_with_processing_error(expdb_test: AsyncConnection) -> None:
95+
dataset_id = 55
96+
with pytest.raises(DatasetProcessingError) as e:
97+
await get_dataset_features(dataset_id=dataset_id, user=None, expdb=expdb_test)
98+
assert "No features found" in e.value.detail
99+
assert str(dataset_id) in e.value.detail
100+
101+
102+
async def test_dataset_features_dataset_does_not_exist(expdb_test: AsyncConnection) -> None:
103+
with pytest.raises(DatasetNotFoundError):
104+
await get_dataset_features(dataset_id=1000, user=None, expdb=expdb_test)
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
"""Tests for the GET /datasets/{dataset_id} endpoint."""
2+
3+
import re
4+
from http import HTTPStatus
5+
6+
import httpx
7+
import pytest
8+
from sqlalchemy import text
9+
from sqlalchemy.ext.asyncio import AsyncConnection
10+
11+
from core.errors import DatasetNoAccessError, DatasetNotFoundError
12+
from database.users import User
13+
from routers.openml.datasets import get_dataset
14+
from schemas.datasets.openml import DatasetMetadata
15+
from tests.users import ADMIN_USER, DATASET_130_OWNER, NO_USER, SOME_USER
16+
17+
18+
async def test_get_dataset_via_api(py_api: httpx.AsyncClient) -> None:
19+
response = await py_api.get("/datasets/1")
20+
assert response.status_code == HTTPStatus.OK
21+
description = response.json()
22+
assert description.pop("description").startswith("**Author**:")
23+
assert description == {
24+
"id": 1,
25+
"name": "anneal",
26+
"version": 1,
27+
"format": "arff",
28+
"description_version": 1,
29+
"upload_date": "2014-04-06T23:19:24",
30+
"licence": "Public",
31+
"url": "http://php-api/data/v1/download/1/anneal.arff",
32+
"parquet_url": "http://minio:9000/datasets/0000/0001/dataset_1.pq",
33+
"file_id": 1,
34+
"default_target_attribute": ["class"],
35+
"version_label": "1",
36+
"tag": ["study_14"],
37+
"visibility": "public",
38+
"status": "active",
39+
"processing_date": "2024-01-04T10:13:59",
40+
"md5_checksum": "4eaed8b6ec9d8211024b6c089b064761",
41+
"row_id_attribute": [],
42+
"ignore_attribute": [],
43+
"language": "",
44+
"error": None,
45+
"warning": None,
46+
"citation": "",
47+
"collection_date": None,
48+
"contributor": [],
49+
"creator": [],
50+
"paper_url": None,
51+
"original_data_url": [],
52+
}
53+
54+
55+
async def test_rfc9457_error_format(py_api: httpx.AsyncClient) -> None:
56+
"""Single test for the generic RFC 9457 exception handler — covers all error types."""
57+
response = await py_api.get("/datasets/100000")
58+
assert response.status_code == HTTPStatus.NOT_FOUND
59+
assert response.headers["content-type"] == "application/problem+json"
60+
error = response.json()
61+
assert error["type"] == DatasetNotFoundError.uri
62+
assert error["title"] == "Dataset Not Found"
63+
assert error["status"] == HTTPStatus.NOT_FOUND
64+
assert re.match(r"No dataset with id \d+ found.", error["detail"])
65+
assert error["code"] == "111"
66+
67+
68+
@pytest.mark.mut
69+
async def test_dataset_no_500_with_multiple_processing_entries(
70+
py_api: httpx.AsyncClient,
71+
expdb_test: AsyncConnection,
72+
) -> None:
73+
"""Regression test for issue #145: multiple processing entries caused 500."""
74+
await expdb_test.execute(
75+
text("INSERT INTO evaluation_engine(id, name, description) VALUES (99, 'test_engine', '')"),
76+
)
77+
await expdb_test.execute(
78+
text(
79+
"INSERT INTO data_processed(did, evaluation_engine_id, user_id, processing_date) "
80+
"VALUES (1, 99, 2, '2020-01-01 00:00:00')",
81+
),
82+
)
83+
response = await py_api.get("/datasets/1")
84+
assert response.status_code == HTTPStatus.OK
85+
86+
87+
@pytest.mark.parametrize(
88+
"dataset_id",
89+
[-1, 138, 100_000],
90+
)
91+
async def test_get_dataset_not_found(
92+
dataset_id: int,
93+
expdb_test: AsyncConnection,
94+
user_test: AsyncConnection,
95+
) -> None:
96+
with pytest.raises(DatasetNotFoundError):
97+
await get_dataset(
98+
dataset_id=dataset_id,
99+
user=None,
100+
user_db=user_test,
101+
expdb_db=expdb_test,
102+
)
103+
104+
105+
@pytest.mark.parametrize(
106+
"user",
107+
[
108+
NO_USER,
109+
SOME_USER,
110+
],
111+
)
112+
async def test_private_dataset_no_access(
113+
user: User | None,
114+
expdb_test: AsyncConnection,
115+
user_test: AsyncConnection,
116+
) -> None:
117+
with pytest.raises(DatasetNoAccessError) as e:
118+
await get_dataset(
119+
dataset_id=130,
120+
user=user,
121+
user_db=user_test,
122+
expdb_db=expdb_test,
123+
)
124+
assert e.value.status_code == HTTPStatus.FORBIDDEN
125+
assert e.value.uri == DatasetNoAccessError.uri
126+
no_access = 112
127+
assert e.value.code == no_access
128+
129+
130+
@pytest.mark.parametrize(
131+
"user", [DATASET_130_OWNER, ADMIN_USER, pytest.param(SOME_USER, marks=pytest.mark.xfail)]
132+
)
133+
async def test_private_dataset_access(
134+
user: User, expdb_test: AsyncConnection, user_test: AsyncConnection
135+
) -> None:
136+
dataset = await get_dataset(
137+
dataset_id=130,
138+
user=user,
139+
user_db=user_test,
140+
expdb_db=expdb_test,
141+
)
142+
assert isinstance(dataset, DatasetMetadata)

0 commit comments

Comments
 (0)