Skip to content

Commit ba8be42

Browse files
Further optimize e2e: 4 workers, lower long-query threshold, split lz4
- Use -n 4 instead of -n auto in coverage workflow. The e2e tests are network-bound (waiting on warehouse), not CPU-bound, so 4 workers on a 2-CPU runner is fine and doubles parallelism. - Lower test_long_running_query min_duration from 3 min to 1 min. The test validates long-running query completion — 1 minute is sufficient and saves ~4 min per variant. - Split lz4 on/off loop in test_query_with_large_wide_result_set into separate parametrized test cases so xdist can run them on different workers instead of sequentially in one test. Co-authored-by: Isaac Signed-off-by: Vikrant Puppala <vikrant.puppala@databricks.com>
1 parent 1625263 commit ba8be42

File tree

2 files changed

+16
-16
lines changed

2 files changed

+16
-16
lines changed

.github/workflows/code-coverage.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ jobs:
4141
continue-on-error: false
4242
run: |
4343
poetry run pytest tests/unit tests/e2e \
44-
-n auto \
44+
-n 4 \
4545
--dist=loadgroup \
4646
--cov=src \
4747
--cov-report=xml \

tests/e2e/common/large_queries_mixin.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,8 @@ class LargeWideResultSetMixin(LargeQueriesFetchMixin):
5353
{"use_sea": True},
5454
],
5555
)
56-
def test_query_with_large_wide_result_set(self, extra_params):
56+
@pytest.mark.parametrize("lz4_compression", [False, True])
57+
def test_query_with_large_wide_result_set(self, extra_params, lz4_compression):
5758
resultSize = 100 * 1000 * 1000 # 100 MB
5859
width = 8192 # B
5960
rows = resultSize // width
@@ -64,20 +65,19 @@ def test_query_with_large_wide_result_set(self, extra_params):
6465
# This is used by PyHive tests to determine the buffer size
6566
self.arraysize = 1000
6667
with self.cursor(extra_params) as cursor:
67-
for lz4_compression in [False, True]:
68-
cursor.connection.lz4_compression = lz4_compression
69-
uuids = ", ".join(["uuid() uuid{}".format(i) for i in range(cols)])
70-
cursor.execute(
71-
"SELECT id, {uuids} FROM RANGE({rows})".format(
72-
uuids=uuids, rows=rows
73-
)
68+
cursor.connection.lz4_compression = lz4_compression
69+
uuids = ", ".join(["uuid() uuid{}".format(i) for i in range(cols)])
70+
cursor.execute(
71+
"SELECT id, {uuids} FROM RANGE({rows})".format(
72+
uuids=uuids, rows=rows
7473
)
75-
assert lz4_compression == cursor.active_result_set.lz4_compressed
76-
for row_id, row in enumerate(
77-
self.fetch_rows(cursor, rows, fetchmany_size)
78-
):
79-
assert row[0] == row_id # Verify no rows are dropped in the middle.
80-
assert len(row[1]) == 36
74+
)
75+
assert lz4_compression == cursor.active_result_set.lz4_compressed
76+
for row_id, row in enumerate(
77+
self.fetch_rows(cursor, rows, fetchmany_size)
78+
):
79+
assert row[0] == row_id # Verify no rows are dropped in the middle.
80+
assert len(row[1]) == 36
8181

8282

8383
class LargeNarrowResultSetMixin(LargeQueriesFetchMixin):
@@ -120,7 +120,7 @@ def test_long_running_query(self, extra_params):
120120
and asserts that the query completes successfully.
121121
"""
122122
minutes = 60
123-
min_duration = 3 * minutes
123+
min_duration = 1 * minutes
124124

125125
duration = -1
126126
scale0 = 10000

0 commit comments

Comments
 (0)