Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
3cc6c70
Add GitHub Actions workflow for type checking (mypy, pyright, ty). Ad…
rok Dec 21, 2025
ffcc113
Update ci/scripts/python_test_type_annotations.sh
rok Dec 23, 2025
d756753
Apply suggestion from @raulcd
rok Jan 14, 2026
8d24d4c
review feedback
rok Jan 14, 2026
48463b6
include dev/update_stub_docstrings.py
rok Jan 14, 2026
8554fdc
Use PYARROW_TEST_ANNOTATIONS in windows build, disable wheel docstrin…
rok Jan 14, 2026
eb15ab1
work on dev/update_stub_docstrings.py
rok Jan 14, 2026
051f66f
further work on dev/update_stub_docstrings.py
rok Jan 14, 2026
50a8da0
lint
rok Jan 14, 2026
4de0ea5
add click for docstring population
rok Jan 14, 2026
9ade339
Remove dependencies on click and griffe
rok Jan 14, 2026
a75c01e
fix import paths
rok Jan 14, 2026
8e4d016
add PYARROW_TEST_ANNOTATIONS to AMD64 Windows 2022 Python 3.13
rok Jan 14, 2026
7de8c87
move check to pre-commit
rok Jan 23, 2026
a03c3ef
change pre-commit, add note
rok Jan 23, 2026
6025c79
change pre-commit script
rok Jan 23, 2026
10d2b3a
Revert from pre-commit to ci/scripts/python_test_type_annotations.sh
rok Jan 23, 2026
e642257
Apply suggestions from code review
rok Jan 25, 2026
927967f
apply review suggestion
rok Jan 25, 2026
4cd7abd
fix shellcheck
rok Jan 25, 2026
110002a
Try single build
rok Jan 26, 2026
25a6d09
lint
rok Jan 26, 2026
0ca6fd1
review feedback
rok Jan 31, 2026
398756b
reintroduce pythonVersion = 3.10
rok Jan 31, 2026
6a3fae6
revert change
rok Jan 31, 2026
daf7f00
post rebase fix
rok Jan 31, 2026
0951e78
--no-build-isolation bypasses pyproject.toml's build requirements
rok Jan 31, 2026
973ae28
libcs added
rok Jan 31, 2026
5baf06b
Added DYLD_LIBRARY_PATH export to python_build.sh
rok Jan 31, 2026
1ea6448
Remove some sys.path hackery
rok Feb 9, 2026
c4ecb79
trigger CI
rok Feb 9, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ repos:
?^ci/scripts/python_sdist_build\.sh$|
?^ci/scripts/python_sdist_test\.sh$|
?^ci/scripts/python_wheel_unix_test\.sh$|
?^ci/scripts/python_test_type_annotations\.sh$|
?^ci/scripts/r_build\.sh$|
?^ci/scripts/r_revdepcheck\.sh$|
?^ci/scripts/release_test\.sh$|
Expand Down Expand Up @@ -377,6 +378,7 @@ repos:
# TODO: Remove this when we fix all lint failures
files: >-
(
?^ci/scripts/python_test_type_annotations\.sh$|
?^dev/release/05-binary-upload\.sh$|
?^dev/release/binary-recover\.sh$|
?^dev/release/post-03-binary\.sh$|
Expand Down
1 change: 1 addition & 0 deletions ci/conda_env_python.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ cython>=3.1
cloudpickle
fsspec
hypothesis
libcst>=1.8.6
numpy>=1.16.6
pytest
pytest-faulthandler
Expand Down
1 change: 1 addition & 0 deletions ci/scripts/python_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ export PYARROW_PARALLEL=${n_jobs}
: "${CMAKE_PREFIX_PATH:=${ARROW_HOME}}"
export CMAKE_PREFIX_PATH
export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
export DYLD_LIBRARY_PATH=${ARROW_HOME}/lib${DYLD_LIBRARY_PATH:+:${DYLD_LIBRARY_PATH}}

# https://github.com/apache/arrow/issues/41429
# TODO: We want to out-of-source build. This is a workaround. We copy
Expand Down
38 changes: 38 additions & 0 deletions ci/scripts/python_test_type_annotations.sh
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you enable lint for this file?

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c4c4f04188..fb46b2eda0 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -337,6 +337,7 @@ repos:
           ?^ci/scripts/python_sdist_build\.sh$|
           ?^ci/scripts/python_sdist_test\.sh$|
           ?^ci/scripts/python_wheel_unix_test\.sh$|
+          ?^ci/scripts/python_test_type_annotations\.sh$|
           ?^ci/scripts/r_build\.sh$|
           ?^ci/scripts/r_revdepcheck\.sh$|
           ?^ci/scripts/release_test\.sh$|
@@ -379,6 +380,7 @@ repos:
         # TODO: Remove this when we fix all lint failures
         files: >-
           (
+          ?^ci/scripts/python_test_type_annotations\.sh$|
           ?^dev/release/05-binary-upload\.sh$|
           ?^dev/release/binary-recover\.sh$|
           ?^dev/release/post-03-binary\.sh$|

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll give it a try! I'm afraid runtime annotations might not be seen at this point and in such case we can disable this in a later PR.

Copy link
Member Author

@rok rok Jan 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.
Do any of these scripts actually build pyarrow that we could test annotations against?

Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

set -ex
pyarrow_dir=${1}

if [ -n "${ARROW_PYTHON_VENV:-}" ]; then
# shellcheck source=/dev/null
. "${ARROW_PYTHON_VENV}/bin/activate"
fi

# Install library stubs. Note some libraries contain their own type hints so they need to be installed.
pip install fsspec pandas-stubs scipy-stubs types-cffi types-psutil types-requests types-python-dateutil

# Install type checkers
pip install mypy pyright ty

# Run type checkers
cd "${pyarrow_dir}"
mypy
pyright
ty check
2 changes: 1 addition & 1 deletion ci/scripts/python_wheel_validate_contents.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def validate_wheel(path):
for info in f.filelist), \
f"{filename} is missing from the wheel."
print(f"The wheel: {wheels[0]} seems valid.")

# TODO(GH-32609): Validate some docstrings were generated and added.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the type checking is split into several PRs, perhaps create sub-issues for clarify as well?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I created two issues (for this PR and one following it) and linked them into the original issue #32609


def main():
parser = argparse.ArgumentParser()
Expand Down
3 changes: 2 additions & 1 deletion compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1539,7 +1539,8 @@ services:
/arrow/ci/scripts/python_build.sh /arrow /build &&
pip install -e /arrow/dev/archery[numpydoc] &&
archery numpydoc --allow-rule GL10,PR01,PR03,PR04,PR05,PR10,RT03,YD01 &&
/arrow/ci/scripts/python_test.sh /arrow"]
/arrow/ci/scripts/python_test.sh /arrow &&
/arrow/ci/scripts/python_test_type_annotations.sh /arrow/python"]

conda-python-dask:
# Possible $DASK parameters:
Expand Down
68 changes: 68 additions & 0 deletions docs/source/developers/python/development.rst
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,74 @@ The test groups currently include:
* ``s3``: Tests for Amazon S3
* ``tensorflow``: Tests that involve TensorFlow

Type Checking
=============

PyArrow provides type stubs (``*.pyi`` files) for static type checking. These
stubs are located in the ``pyarrow-stubs/`` directory and are automatically
included in the distributed wheel packages.

Running Type Checkers
---------------------

We support multiple type checkers. Their configurations are in
``pyproject.toml``.

**mypy**

To run mypy on the PyArrow codebase:

.. code-block::

$ cd arrow/python
$ mypy

The mypy configuration is in the ``[tool.mypy]`` section of ``pyproject.toml``.

**pyright**

To run pyright:

.. code-block::

$ cd arrow/python
$ pyright

The pyright configuration is in the ``[tool.pyright]`` section of ``pyproject.toml``.

**ty**

To run ty (note: currently only partially configured):

.. code-block::

$ cd arrow/python
$ ty check

Maintaining Type Stubs
-----------------------

Type stubs for PyArrow are maintained in the ``pyarrow-stubs/``
directory. These stubs mirror the structure of the main ``pyarrow/`` package.

When adding or modifying public APIs:

1. **Update the corresponding ``.pyi`` stub file** in ``pyarrow-stubs/``
to reflect the new or changed function/class signatures.

2. **Include type annotations** where possible. For Cython modules or
dynamically generated APIs such as compute kernels add the corresponding
stub in ``pyarrow-stubs/``.

3. **Run type checkers** to ensure the stubs are correct and complete.

The stub files are automatically copied into the built wheel during the build
process and will be included when users install PyArrow, enabling type checking
in downstream projects and for users' IDEs.

Note: ``py.typed`` marker file in the ``pyarrow/`` directory indicates to type
checkers that PyArrow supports type checking according to :pep:`561`.

Doctest
=======

Expand Down
1 change: 1 addition & 0 deletions python/MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ include ../NOTICE.txt

global-include CMakeLists.txt
graft pyarrow
graft pyarrow-stubs
graft cmake_modules

global-exclude *.so
Expand Down
29 changes: 29 additions & 0 deletions python/pyarrow-stubs/pyarrow/__init__.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

"""Type stubs for PyArrow.

This is a placeholder stub file.
Complete type annotations will be added in subsequent PRs.
"""

from typing import Any

# TODO(GH-48970): remove __getattr__ before release as this
# will annotate non-existing attributes as Any.
# https://github.com/apache/arrow/issues/48970
def __getattr__(name: str) -> Any: ...
Comment on lines 18 to 29

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC this isn't intended to be permanent.
But I thought you might want to consider adding a note/opening an issue to remove it before release.

Here's some examples of how it can mask runtime errors:

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, interesting I didn't consider this one! Yes this is meant as a temporary and we would want it out before the release. I'll make a note and issue, thanks for pointing it out!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

16 changes: 16 additions & 0 deletions python/pyarrow/py.typed
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
40 changes: 39 additions & 1 deletion python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
[build-system]
requires = [
"cython >= 3.1",
# Needed for build-time stub docstring extraction
"libcst>=1.8.6",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At some point we'll want to deduplicate pyproject.toml and the various requirements files? @raulcd

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is probably best done in another PR?

"numpy>=1.25",
# configuring setuptools_scm in pyproject.toml requires
# versions released after 2022
Expand Down Expand Up @@ -88,11 +90,47 @@ include = ["pyarrow"]
namespaces = false

[tool.setuptools.package-data]
pyarrow = ["*.pxd", "*.pyx", "includes/*.pxd"]
pyarrow = ["*.pxd", "*.pyi", "*.pyx", "includes/*.pxd", "py.typed"]

[tool.setuptools_scm]
root = '..'
version_file = 'pyarrow/_generated_version.py'
version_scheme = 'guess-next-dev'
git_describe_command = 'git describe --dirty --tags --long --match "apache-arrow-[0-9]*.*"'
fallback_version = '24.0.0a0'

# TODO: Enable type checking once stubs are merged
[tool.mypy]
files = ["pyarrow-stubs"]
mypy_path = "$MYPY_CONFIG_FILE_DIR/pyarrow-stubs"
exclude = [
"^pyarrow/",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This exclusion means we're not checking anything currently, right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, if we don't we get type check failurea (IIRC).

"^benchmarks/",
"^examples/",
"^scripts/",
]

# TODO: Enable type checking once stubs are merged
[tool.pyright]
pythonPlatform = "All"
pythonVersion = "3.10"
include = ["pyarrow-stubs"]
exclude = [
"pyarrow",
"benchmarks",
"examples",
"scripts",
"build",
]
stubPath = "pyarrow-stubs"
typeCheckingMode = "basic"

# TODO: Enable type checking once stubs are merged
[tool.ty.src]
include = ["pyarrow-stubs"]
exclude = [
"pyarrow",
"benchmarks",
"examples",
"scripts",
]
1 change: 1 addition & 0 deletions python/requirements-build.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
cython>=3.1
libcst>=1.8.6
numpy>=1.25
setuptools_scm>=8
setuptools>=77
2 changes: 2 additions & 0 deletions python/requirements-wheel-build.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
build
cython>=3.1
# Needed for build-time stub docstring extraction
libcst>=1.8.6
numpy>=2.0.0
setuptools_scm
setuptools>=77
Expand Down
Loading
Loading