Further optimize e2e: 4 workers, lower long-query threshold, split lz4

vikrantpuppala · vikrantpuppala · commit ba8be422d1c0 · 2026-04-13T15:57:15.000Z
- Use -n 4 instead of -n auto in coverage workflow. The e2e tests are
  network-bound (waiting on warehouse), not CPU-bound, so 4 workers on
  a 2-CPU runner is fine and doubles parallelism.
- Lower test_long_running_query min_duration from 3 min to 1 min.
  The test validates long-running query completion — 1 minute is
  sufficient and saves ~4 min per variant.
- Split lz4 on/off loop in test_query_with_large_wide_result_set into
  separate parametrized test cases so xdist can run them on different
  workers instead of sequentially in one test.

Co-authored-by: Isaac
Signed-off-by: Vikrant Puppala &lt;vikrant.puppala@databricks.com&gt;
diff --git a/.github/workflows/code-coverage.yml b/.github/workflows/code-coverage.yml
@@ -41,7 +41,7 @@ jobs:
         continue-on-error: false
         run: |
           poetry run pytest tests/unit tests/e2e \
-            -n auto \
+            -n 4 \
             --dist=loadgroup \
             --cov=src \
             --cov-report=xml \
diff --git a/tests/e2e/common/large_queries_mixin.py b/tests/e2e/common/large_queries_mixin.py
@@ -53,7 +53,8 @@ class LargeWideResultSetMixin(LargeQueriesFetchMixin):
             {"use_sea": True},
         ],
     )
-    def test_query_with_large_wide_result_set(self, extra_params):
+    @pytest.mark.parametrize("lz4_compression", [False, True])
+    def test_query_with_large_wide_result_set(self, extra_params, lz4_compression):
         resultSize = 100 * 1000 * 1000  # 100 MB
         width = 8192  # B
         rows = resultSize // width
@@ -64,20 +65,19 @@ def test_query_with_large_wide_result_set(self, extra_params):
         # This is used by PyHive tests to determine the buffer size
         self.arraysize = 1000
         with self.cursor(extra_params) as cursor:
-            for lz4_compression in [False, True]:
-                cursor.connection.lz4_compression = lz4_compression
-                uuids = ", ".join(["uuid() uuid{}".format(i) for i in range(cols)])
-                cursor.execute(
-                    "SELECT id, {uuids} FROM RANGE({rows})".format(
-                        uuids=uuids, rows=rows
-                    )
+            cursor.connection.lz4_compression = lz4_compression
+            uuids = ", ".join(["uuid() uuid{}".format(i) for i in range(cols)])
+            cursor.execute(
+                "SELECT id, {uuids} FROM RANGE({rows})".format(
+                    uuids=uuids, rows=rows
                 )
-                assert lz4_compression == cursor.active_result_set.lz4_compressed
-                for row_id, row in enumerate(
-                    self.fetch_rows(cursor, rows, fetchmany_size)
-                ):
-                    assert row[0] == row_id  # Verify no rows are dropped in the middle.
-                    assert len(row[1]) == 36
+            )
+            assert lz4_compression == cursor.active_result_set.lz4_compressed
+            for row_id, row in enumerate(
+                self.fetch_rows(cursor, rows, fetchmany_size)
+            ):
+                assert row[0] == row_id  # Verify no rows are dropped in the middle.
+                assert len(row[1]) == 36
 
 
 class LargeNarrowResultSetMixin(LargeQueriesFetchMixin):
@@ -120,7 +120,7 @@ def test_long_running_query(self, extra_params):
         and asserts that the query completes successfully.
         """
         minutes = 60
-        min_duration = 3 * minutes
+        min_duration = 1 * minutes
 
         duration = -1
         scale0 = 10000