diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index d6384966..00000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,125 +0,0 @@ -name: CI - -on: - pull_request: - push: - branches: - - main - -jobs: - # pre-commit: - # name: Lint - # runs-on: ubuntu-latest - # steps: - # - uses: actions/checkout@v4 - # - uses: actions/setup-python@v5 - # with: - # python-version: '3.11' - # - uses: pre-commit/action@v3.0.1 - unit: - name: Unit tests - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install '.[dev,debug,inference]' - - - name: Run tests - run: | - python -m pytest --cov=sc2ts tests/ - # - name: Upload coverage to Coveralls - # uses: coverallsapp/github-action@v2.3.0 - # with: - # github-token: ${{ secrets.GITHUB_TOKEN }} - # # The first coveralls upload will succeed and others seem to fail now. - # # This is a quick workaround for doing a proper "parallel" setup: - # # https://github.com/coverallsapp/github-action - # fail-on-error: false - - end_to_end: - name: End to end tests - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install '.[inference]' - - - name: Create basedir - run: | - mkdir -p testrun - gunzip -k tests/data/alignments.fasta.gz - - - name: Import alignments - run: | - sc2ts import-alignments -i testrun/dataset.zarr tests/data/alignments.fasta - - - name: Import metadata - run: | - sc2ts import-metadata testrun/dataset.zarr tests/data/metadata.tsv - - - name: Info dataset - run: | - sc2ts info-dataset testrun/dataset.zarr - - - name: Run inference - run: | - # doing ~10 days here as this is taking a while - sc2ts infer tests/data/testrun-conf.toml --stop 2020-02-03 - - - name: Validate - run: | - sc2ts validate -v --date-field=date testrun/dataset.zarr testrun/results/test/test_2020-02-02.ts - - - name: MatchDB - run: | - sc2ts info-matches testrun/test.matches.db - - bare_api: - name: Bare API tests - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install '.' - python -m pip install pytest - - name: Run tests - run: | - # We are careful to pull in the minimum dependencies here - # and only operate on files stored in the repo - python -m pytest -v -c /dev/null -p no:sc2ts_fixtures tests/test_api.py - - packaging: - name: Packaging tests - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install build twine validate-pyproject[all] - - name: Check and install package - run: | - validate-pyproject pyproject.toml - python -m build - python -m twine check --strict dist/* - python -m pip install dist/*.whl diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 928edb3a..a88e0b88 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -1,13 +1,11 @@ -name: Docs +name: Build docs on: pull_request: push: branches: [main] - tags: - - '*' merge_group: jobs: - Docs: - uses: tskit-dev/.github/.github/workflows/docs-build-template.yml@v1 + docs: + uses: tskit-dev/.github/.github/workflows/docs.yml@v14 diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml new file mode 100644 index 00000000..2c06cb88 --- /dev/null +++ b/.github/workflows/integration.yml @@ -0,0 +1,59 @@ +name: Integration tests + +on: + pull_request: + push: + branches: [main, test] + merge_group: + +jobs: + end-to-end: + name: End to end tests + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v4.2.2 + - uses: astral-sh/setup-uv@v6 + with: + python-version: "3.11" + version: "0.8.15" + - name: Install dependencies + run: uv sync --locked --group test + + - name: Create basedir + run: | + mkdir -p testrun + gunzip -k tests/data/alignments.fasta.gz + + - name: Import alignments + run: uv run sc2ts import-alignments -i testrun/dataset.zarr tests/data/alignments.fasta + + - name: Import metadata + run: uv run sc2ts import-metadata testrun/dataset.zarr tests/data/metadata.tsv + + - name: Info dataset + run: uv run sc2ts info-dataset testrun/dataset.zarr + + - name: Run inference + run: | + # doing ~10 days here as this is taking a while + uv run sc2ts infer tests/data/testrun-conf.toml --stop 2020-02-03 + + - name: Validate + run: uv run sc2ts validate -v --date-field=date testrun/dataset.zarr testrun/results/test/test_2020-02-02.ts + + - name: MatchDB + run: uv run sc2ts info-matches testrun/test.matches.db + + bare-api: + name: Bare API tests + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v4.2.2 + - uses: astral-sh/setup-uv@v6 + with: + python-version: "3.11" + version: "0.8.15" + - name: Run tests + run: | + # Minimal deps only; avoid picking up conftest and fixtures + uv run pytest -v -c /dev/null -p no:sc2ts_fixtures tests/test_api.py diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 00000000..78fa03ca --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,9 @@ +name: Lint + +on: + pull_request: + merge_group: + +jobs: + lint: + uses: tskit-dev/.github/.github/workflows/lint.yml@v14 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 00000000..dcc19989 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,25 @@ +name: Tests + +on: + pull_request: + push: + branches: [main, test] + merge_group: + +jobs: + packaging: + name: Python packaging + uses: tskit-dev/.github/.github/workflows/python-packaging.yml@v14 + + test: + name: Python + uses: tskit-dev/.github/.github/workflows/python-tests.yml@v14 + with: + os: ${{ matrix.os }} + python-version: ${{ matrix.python }} + coverage-directory: sc2ts + secrets: inherit + strategy: + matrix: + python: [3.11, 3.13] + os: [macos-latest, ubuntu-24.04] diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml new file mode 100644 index 00000000..1ca87e43 --- /dev/null +++ b/.github/workflows/wheels.yml @@ -0,0 +1,47 @@ +name: Publish Python release + +on: + push: + branches: [test-publish] + release: + types: [published] + +jobs: + build: + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v4.2.2 + with: + fetch-depth: 0 + - uses: astral-sh/setup-uv@v6 + with: + python-version: "3.12" + version: "0.8.15" + - run: uv build + - uses: actions/upload-artifact@v4.6.1 + with: + name: dist + path: dist/ + + publish: + runs-on: ubuntu-24.04 + environment: release + needs: [build] + permissions: + id-token: write + steps: + - uses: actions/download-artifact@v4.2.0 + with: + name: dist + path: dist + + - name: Publish to Test PyPI + if: github.event_name == 'push' && github.ref_name == 'test-publish' + uses: pypa/gh-action-pypi-publish@v1.13.0 + with: + repository-url: https://test.pypi.org/legacy/ + verbose: true + + - name: Publish to PyPI + if: github.event_name == 'release' + uses: pypa/gh-action-pypi-publish@v1.13.0 diff --git a/docs/Makefile b/docs/Makefile index fe6c42d1..5fabd25d 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -1,16 +1,6 @@ -# Need to set PYTHONPATH so that we pick up the local sc2ts -PYPATH=${PWD}/.. -SC2TS_VERSION:=$(shell PYTHONPATH=${PYPATH} \ - python3 -c 'import sc2ts; print(sc2ts.__version__.split("+")[0])') -dev: - PYTHONPATH=${PYPATH} ./build.sh - -dist: - @echo Building distribution for sc2ts version ${SC2TS_VERSION} - sed -i s/__SC2TS_VERSION__/${SC2TS_VERSION}/g _config.yml - PYTHONPATH=${PYPATH} ./build.sh +all: + ./build.sh clean: rm -fR _build - diff --git a/docs/_config.yml b/docs/_config.yml index 553428e6..8466510f 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -43,7 +43,7 @@ sphinx: navigation_with_keys: false pygments_dark_style: monokai logo: - text: "Version __SC2TS_VERSION__" + text: "Version __PKG_VERSION__" myst_enable_extensions: - colon_fence diff --git a/docs/build.sh b/docs/build.sh index 50be22cd..a3a3dbf1 100755 --- a/docs/build.sh +++ b/docs/build.sh @@ -6,7 +6,7 @@ REPORTDIR=_build/html/reports -jupyter-book build -W . +uv run --project=../python --group docs jupyter-book build . -vnW --keep-going RETVAL=$? if [ $RETVAL -ne 0 ]; then if [ -e $REPORTDIR ]; then @@ -18,4 +18,3 @@ else rm -f $REPORTDIR/* fi exit $RETVAL - diff --git a/docs/make_sc2ts_arg_subset.py b/docs/make_sc2ts_arg_subset.py index a2b01c88..8ebe3cc6 100644 --- a/docs/make_sc2ts_arg_subset.py +++ b/docs/make_sc2ts_arg_subset.py @@ -1,5 +1,5 @@ -import tszip import numpy as np +import tszip ts = tszip.load("sc2ts_viridian_v1.2.trees.tsz") diff --git a/docs/make_viridian_subset.py b/docs/make_viridian_subset.py index 011cb521..953ad8ce 100644 --- a/docs/make_viridian_subset.py +++ b/docs/make_viridian_subset.py @@ -9,4 +9,3 @@ path = f"viridian_mafft_subset_{k}_v1.vcz" ds.copy(path, sample_id=samples) sc2ts.Dataset.create_zip(path, path + ".zip") - diff --git a/prek.toml b/prek.toml new file mode 100644 index 00000000..16363167 --- /dev/null +++ b/prek.toml @@ -0,0 +1,40 @@ +# The prek configuration defining linting requirements. This +# setup is optimised for long-term stability and determinism, +# and therefore only uses either "builtin" rules or "local" +# rules implementing lint workflows. We do not use any remote +# workflow repos. + +[[repos]] +repo = "builtin" +hooks = [ + { id = "check-added-large-files" }, + { id = "check-merge-conflict" }, + { id = "mixed-line-ending" }, + { id = "check-case-conflict" }, + { id = "check-yaml" }, + { id = "check-toml" }, +] + +[[repos]] +repo = "local" +hooks = [ + { + id = "ruff-check", + name = "ruff check", + language = "system", + entry = "uv run --only-group lint ruff check --fix --force-exclude", + types = ["python"], + }, +] + +[[repos]] +repo = "local" +hooks = [ + { + id = "ruff-format", + name = "ruff format", + language = "system", + entry = "uv run --only-group lint ruff format --force-exclude", + types = ["python"], + }, +] diff --git a/pyproject.toml b/pyproject.toml index 9c0f6740..156077f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,6 @@ dependencies = [ "tszip", "pandas", "tqdm", - "zarr<3", "click>=8.2", # Not strictly needed for base functionality, but easier to include # as imported by the CLI @@ -27,46 +26,71 @@ Documentation = "https://tskit.dev/sc2ts/docs/" "Bug Tracker" = "https://github.com/tskit-dev/sc2ts/issues" GitHub = "https://github.com/tskit-dev/sc2ts/" - [project.scripts] sc2ts = "sc2ts.cli:cli" +# NOTE: project.optional-dependencies and the dependency-groups below +# cannot be deduplicated as they serve different purposes. [project.optional-dependencies] -dev = [ - "msprime", - "sgkit", - "pytest", - "pytest-coverage", - "tomli-w", -] inference = [ "scipy", "biotite", - "tsinfer>=0.5", + "tsinfer>=0.5,<1", "pyfaidx", "numba", + "zarr<3", ] debug = [ "matplotlib", "IPython", ] + +[dependency-groups] +test = [ + "msprime", + "pytest", + "pytest-cov", + "pytest-xdist", + "xarray", + "tomli-w", + # inference and debug deps needed by the test suite + "scipy", + "biotite", + "tsinfer>=0.5,<1", + "pyfaidx", + "numba", + "matplotlib", + "IPython", +] docs = [ - "jupyter-book==1.0.4.post1", + "jupyter-book<2", "sphinx-book-theme", "sphinx-copybutton", "sphinx-click", - "setuptools_scm", - "sphinx-argparse==0.5.2", - "sphinx-issues==5.0.1", + "sphinx-argparse", + "sphinx-issues", "IPython", - # docs requires running the CLI, which means we need to full inference - # requirements also + # docs require running the CLI, which means the full inference deps are needed "scipy", "biotite", "tsinfer>=0.5", "pyfaidx", "numba", ] +lint = [ + "ruff==0.15.1", + "prek==0.3.3", +] +packaging = [ + "twine", + "validate-pyproject[all]", +] +dev = [ + {include-group = "docs"}, + {include-group = "lint"}, + {include-group = "test"}, + {include-group = "packaging"}, +] [build-system] requires = [ @@ -84,4 +108,28 @@ write_to = "sc2ts/_version.py" [tool.pytest.ini_options] testpaths = "tests" -addopts = "--cov=sc2ts --cov-report term-missing" + +[tool.ruff] +target-version = "py311" +line-length = 89 + +[tool.ruff.lint] +select = ["E", "F", "B", "W", "I", "N", "UP", "A", "PT"] +fixable = ["ALL"] + + +ignore = [ + "A001", "A002", "RUF", + "B905", #Don't add strict=False to zips (B905) + "E741", # using 'l' as a variable name + "N806", "N802", "N803", # Various nags about uppercase vars + "F821", # Undefined name `List` + "B006", # Do not use mutable data structures for argument defaults + "E721", # Use `is` and `is not` for type comparisons, or `isinstance()` + "F841", # Local variable `shape` is assigned to but never + "PT018", # Assertion should be broken down into multiple parts + "B007", # Loop control variable `j` not used within + "PT006", # Wrong type passed to first argument of `pytest.mar + "PT007", # Wrong values type in `pytest.mark.parametrize` + "PT009", # Use a regular `assert` instead of unittest-style +] diff --git a/sc2ts/__init__.py b/sc2ts/__init__.py index 69a09969..ca55d3a0 100644 --- a/sc2ts/__init__.py +++ b/sc2ts/__init__.py @@ -1,6 +1,11 @@ -from .core import __version__ - # star imports are fine here as it's just a bunch of constants -from .core import * -from .dataset import mask_ambiguous, mask_flanking_deletions, decode_alleles, Dataset, Variant -from .stats import node_data, mutation_data +from .core import * # noqa +from .core import __version__ # noqa +from .dataset import ( + Dataset, # noqa + Variant, # noqa + decode_alleles, # noqa + mask_ambiguous, # noqa + mask_flanking_deletions, # noqa +) +from .stats import mutation_data, node_data # noqa diff --git a/sc2ts/__main__.py b/sc2ts/__main__.py index 0b6a7bb7..91e12cb2 100644 --- a/sc2ts/__main__.py +++ b/sc2ts/__main__.py @@ -1,5 +1,4 @@ from . import cli - if __name__ == "__main__": cli.cli() diff --git a/sc2ts/cli.py b/sc2ts/cli.py index 8df17e68..abbb45ed 100644 --- a/sc2ts/cli.py +++ b/sc2ts/cli.py @@ -1,31 +1,22 @@ -import json import collections import concurrent.futures as cf +import datetime +import json import logging -import itertools import pathlib import sys -import contextlib -import dataclasses -import datetime -import time -from typing import List -import tomli -import numpy as np -import tqdm -import tskit -import tszip import click import humanize +import numpy as np import pandas as pd +import tomli +import tqdm +import tszip import sc2ts -from . import core -from . import data_import -from . import tree_ops -from . import jit -from . import validation + +from . import core, data_import, jit, tree_ops, validation from . import inference as si # sc2ts inference logger = logging.getLogger(__name__) @@ -86,7 +77,11 @@ def summarise_usage(ts): max_mem = d["max_memory"] if max_mem > 0: maxmem_str = "; max_memory=" + humanize.naturalsize(max_mem, binary=True) - return f"elapsed={wall_time:.2f}m; user={user_time:.2f}m; sys={sys_time:.2f}m{maxmem_str}" + return ( + f"elapsed={wall_time:.2f}m; " + f"user={user_time:.2f}m; " + f"sys={sys_time:.2f}m{maxmem_str}" + ) def setup_logging(verbosity, log_file=None, date=None): @@ -129,8 +124,7 @@ def setup_logging(verbosity, log_file=None, date=None): is_flag=True, flag_value=True, help=( - "If true, initialise a new dataset. WARNING! This will erase an existing " - "store" + "If true, initialise a new dataset. WARNING! This will erase an existing store" ), ) @progress @@ -280,9 +274,7 @@ def _run_extend(out_path, verbose, log_file, **params): @click.command() @click.argument("config_file", type=click.File(mode="rb")) -@click.option( - "--start", default=None, help="Start inference at this date (inclusive). " -) +@click.option("--start", default=None, help="Start inference at this date (inclusive). ") @click.option( "--stop", default="3000", @@ -373,9 +365,7 @@ def infer(config_file, start, stop, force): base_ts = ts_file_pattern.format(date=date) with cf.ProcessPoolExecutor(1) as executor: - future = executor.submit( - _run_extend, base_ts, log_level, log_file, **params - ) + future = executor.submit(_run_extend, base_ts, log_level, log_file, **params) # Block and wait, raising exception if it occured future.result() @@ -428,9 +418,7 @@ def validate( setup_logging(verbose) ts = tszip.load(ts_file) - ds = sc2ts.Dataset( - dataset, date_field=date_field, chunk_cache_size=chunk_cache_size - ) + ds = sc2ts.Dataset(dataset, date_field=date_field, chunk_cache_size=chunk_cache_size) if genotypes: validation.validate_genotypes(ts, ds, deletions_as_missing, show_progress=True) if metadata: @@ -749,9 +737,7 @@ def find_previous_date_path(date, path_pattern): if path.exists(): break else: - raise ValueError( - f"No path exists for pattern {path_pattern} starting at {date}" - ) + raise ValueError(f"No path exists for pattern {path_pattern} starting at {date}") return path diff --git a/sc2ts/data_import.py b/sc2ts/data_import.py index fbb1939b..dc14e961 100644 --- a/sc2ts/data_import.py +++ b/sc2ts/data_import.py @@ -1,6 +1,6 @@ import collections.abc -import pathlib import csv +import pathlib import numpy as np import pyfaidx @@ -140,4 +140,3 @@ def massage_viridian_metadata(df): a[~missing] = np.array(data[~missing], dtype=int) df[name] = a return df - diff --git a/sc2ts/dataset.py b/sc2ts/dataset.py index 61c89704..d6f1c02c 100644 --- a/sc2ts/dataset.py +++ b/sc2ts/dataset.py @@ -2,20 +2,19 @@ Methods for managing a sc2ts Zarr based dataset. """ -import dataclasses -import os.path -import zipfile import collections +import concurrent.futures as cf +import dataclasses import logging +import os.path import pathlib -import concurrent.futures as cf +import zipfile -import tskit -import tqdm -import pandas as pd -import zarr import numcodecs import numpy as np +import pandas as pd +import tqdm +import zarr from sc2ts import core @@ -236,6 +235,7 @@ class Variant: Represents a single variant, including the genomic position and the integer encoded genotypes. """ + position: int genotypes: np.ndarray alleles: list @@ -252,6 +252,7 @@ class Dataset(collections.abc.Mapping): :param int chunk_cache_size: Maximum number of chunks to cache for alignments and metadata. Defaults to 1. """ + def __init__(self, path, chunk_cache_size=1, date_field=None): logger.info(f"Loading dataset @{path} using {date_field} as date field") self.date_field = date_field diff --git a/sc2ts/debug.py b/sc2ts/debug.py index 80a76d4f..68545225 100644 --- a/sc2ts/debug.py +++ b/sc2ts/debug.py @@ -4,28 +4,24 @@ included if the "debug"] option is included with the sc2ts install. """ + import collections -import logging -import json -import warnings import dataclasses import datetime +import json +import logging import re -from typing import List +import warnings -import tskit -import numpy as np -from tqdm.auto import tqdm -import pandas as pd import humanize import matplotlib.pyplot as plt -from matplotlib import colors -from IPython.display import Markdown, HTML +import numpy as np +import pandas as pd +import tskit +from IPython.display import HTML, Markdown +from tqdm.auto import tqdm -from . import jit -from . import core -from . import inference -from . import data_import +from . import core, data_import, inference, jit logger = logging.getLogger(__name__) @@ -89,7 +85,6 @@ def max_descendant_samples(ts, show_progress=True): class CopyingTable: - default_colours = ( # Chosen to be light enough that black text on top is readable "#FC0", # Gold for de-novo mutations "#8D8", # Copy from first parent: light green @@ -117,7 +112,7 @@ def line_cell(pos, prev_pos, next_pos): dist_to_left = 0 if dist_to_right > 2: dist_to_right = 0 - return f'' + return f'' # noqa E501 def node_mutations(self): muts = {} @@ -206,10 +201,11 @@ def html( matches the first parent, ``colours[2]`` for the second parent, etc. Default: None, treated as ``["#FC0", "#8D8", "#6AD", "#B9D", "#A88"]``. :param exclude_stylesheet bool: - If True, exclude the default stylesheet from the HTML output. This is useful - simply to save space if you want to include the copying table in a larger HTML - document (e.g. a Jupyter notebook) that already has one copying table shown with - the standard stylesheet. If False or None (default), include the default stylesheet. + If True, exclude the default stylesheet from the HTML output. This + is useful simply to save space if you want to include the copying + table in a larger HTML document (e.g. a Jupyter notebook) that + already has one copying table shown with the standard stylesheet. + If False or None (default), include the default stylesheet. :param font_family str: The font family to use for the table. Default: None. :param css_class str: @@ -284,7 +280,7 @@ def label(allele, default=""): except IndexError as e: raise ValueError( "Displaying the copying path only deals with a max of " - f"{len(parent_colours)-1} parents" + f"{len(parent_colours) - 1} parents" ) from e elif parent_allele == var.site.ancestral_state: col = "#DDD" @@ -310,19 +306,20 @@ def label(allele, default=""): runlength_cols = ("white", "red", "orange") bg_im_src = ( "background-image:linear-gradient(to right, {0} 50%, {1} 50%);" - "background-image:-webkit-linear-gradient(left, {0} 50%, {1} 50%);" # for imgkit/wkhtmltopdf + # for imgkit/wkhtmltopdf + "background-image:-webkit-linear-gradient(left, {0} 50%, {1} 50%);" ) html += "