diff --git a/README.rst b/README.rst index 7862443..ebd1ed6 100644 --- a/README.rst +++ b/README.rst @@ -27,21 +27,21 @@ HGVS position crossmapper This library provides an interface to convert (cross map) between different HGVS numbering_ systems. -Converting between the transcript oriented c. or n. and the genomic oriented g. +Converting between the transcript oriented ``c.`` or ``n.`` and the genomic oriented ``g.`` numbering systems can be difficult, especially when the transcript in question -resides on the complement strand. +resides on the complement strand. This library provides functions to convert between any HGVS +numbering system to standard (0-based) coordinates and vice versa. **Features:** -- Support for genomic positions to standard coordinates and vice versa. -- Support for noncoding positions to standard coordinates and vice versa. -- Support for coding positions to standard coordinates and vice versa. -- Support for protein positions to standard coordinates and vice versa. -- Basic classes for loci that can be used for genomic loci other than genes. +- Support for genomic (``g.``, ``m.``, ``o.``) positions to standard coordinates and vice versa. +- Support for noncoding (``n.``, ``r.``) positions to standard coordinates and vice versa. +- Support for coding (``c.``, ``r.``) positions to standard coordinates and vice versa. +- Support for protein (``p.``) positions to standard coordinates and vice versa. +- Basic classes that can be used for loci other than genes or transcripts. Please see ReadTheDocs_ for the latest documentation. - Quick start ----------- @@ -53,8 +53,8 @@ positions and coordinates. >>> from mutalyzer_crossmapper import Genomic >>> crossmap = Genomic() >>> crossmap.coordinate_to_genomic(0) - 1 - >>> crossmap.genomic_to_coordinate(1) + {'position': 1} + >>> crossmap.genomic_to_coordinate({'position': 1}) 0 On top of the functionality provided by the ``Genomic`` class, the @@ -67,8 +67,8 @@ positions and coordinates. >>> exons = [(5, 8), (14, 20), (30, 35), (40, 44), (50, 52), (70, 72)] >>> crossmap = NonCoding(exons) >>> crossmap.coordinate_to_noncoding(35) - (14, 1, 0) - >>> crossmap.noncoding_to_coordinate((14, 1)) + {'position': 14, 'offset': 1, 'region': ''} + >>> crossmap.noncoding_to_coordinate({'position': 14, 'offset': 1, 'region': ''}) 35 Add the flag ``inverted=True`` to the constructor when the transcript resides @@ -84,8 +84,8 @@ coordinates as well as conversions between protein positions and coordinates. >>> cds = (32, 43) >>> crossmap = Coding(exons, cds) >>> crossmap.coordinate_to_coding(31) - (-1, 0, -1, 0) - >>> crossmap.coding_to_coordinate((-1, 0, -1)) + {'position': 1, 'offset': 0, 'region': '-'} + >>> crossmap.coding_to_coordinate({'position':1, 'offset':0, 'region':'-'}) 31 Again, the flag ``inverted=True`` can be used for transcripts that reside on @@ -96,10 +96,11 @@ Conversions between protein positions and coordinates are done as follows. .. code:: python >>> crossmap.coordinate_to_protein(41) - (2, 2, 0, 0, 0) - >>> crossmap.protein_to_coordinate((2, 2, 0, 0)) + {'position': 2, 'position_in_codon': 2, 'offset': 0, 'region': ''} + >>> crossmap.protein_to_coordinate({'position': 2, 'position_in_codon': 2, 'offset': 0, 'region': ''}) 41 + .. _numbering: http://varnomen.hgvs.org/bg-material/numbering/ .. _ReadTheDocs: https://mutalyzer-crossmapper.readthedocs.io diff --git a/docs/conf.py b/docs/conf.py index d87866b..3dbdd7d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -7,5 +7,8 @@ release = _get_metadata('Version') autoclass_content = 'both' -extensions = ['sphinx.ext.autodoc'] +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.intersphinx' + ] master_doc = 'index' diff --git a/docs/library.rst b/docs/library.rst index 6ef095a..64aa788 100644 --- a/docs/library.rst +++ b/docs/library.rst @@ -8,21 +8,38 @@ The ``Genomic`` class --------------------- The ``Genomic`` class provides an interface to conversions between genomic -positions and coordinates. +(``g.``, ``m.``, ``o.``) positions and coordinates. + +Genomic Position Model +~~~~~~~~~~~~~~~~~~~~~~~ + +Genomic positions follow the HGVS genomic coordinate system. +They are represented as 1-key dictionaries. Below is an example of ``g.1`` in HGVS. + +.. code-block:: python + + {'position': 1} + +Where: + +- **position**: an integer representing a nucleotide position (> 0) + +Genomic Position Conversion +~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: python >>> from mutalyzer_crossmapper import Genomic >>> crossmap = Genomic() -The functions ``coordinate_to_genomic()`` and ``genomic_to_coordinate`` can be +The functions ``coordinate_to_genomic()`` and ``genomic_to_coordinate()`` can be used to convert to and from genomic positions. .. code:: python >>> crossmap.coordinate_to_genomic(0) - 1 - >>> crossmap.genomic_to_coordinate(1) + {'position': 1} + >>> crossmap.genomic_to_coordinate({'position': 1}) 0 See section :doc:`api/crossmap` for a detailed description. @@ -32,8 +49,33 @@ The ``NonCoding`` class On top of the functionality provided by the ``Genomic`` class, the ``NonCoding`` class provides an interface to conversions between noncoding -positions and coordinates. Conversions between positioning systems should be -done via a coordinate. +(``n.``, ``r.``) positions and coordinates. Conversions between positioning +systems should be done via a coordinate. + +NonCoding Position Model +~~~~~~~~~~~~~~~~~~~~~~~~ + +Noncoding positions follow the HGVS ``n.`` coordinate system. They are represented +as 3-key dictionaries. Below is an example of ``n.14+1`` in HGVS. + +.. code-block:: python + + { + 'position': 14, + 'offset': 1, + 'region': '' + } + +Where: + +- **position**: an integer representing a nucleotide position (> 0) +- **offset**: an integer indicating the offset relative to the position (negative for upstream, + positive for downstream) +- **region**: a string describing the region type (empty for positions within a non-coding + transcript, ``u`` for upstream, ``d`` for downstream) + +NonCoding Position Conversion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: python @@ -41,69 +83,141 @@ done via a coordinate. >>> exons = [(5, 8), (14, 20), (30, 35), (40, 44), (50, 52), (70, 72)] >>> crossmap = NonCoding(exons) -Now the functions ``coordinate_to_noncoding()`` and -``noncoding_to_coordinate()`` can be used. These functions use a 3-tuple to -represent a noncoding position. +Now the functions ``coordinate_to_noncoding()`` and ``noncoding_to_coordinate()`` +can be used. -.. _table_noncoding: -.. list-table:: Noncoding positions. - :header-rows: 1 - - * - index - - description - * - 0 - - Transcript position. - * - 1 - - Offset. - * - 2 - - Upstream or downstream offset. - -In our example, the HGVS position "g.36" (coordinate ``35``) is equivalent to -position "n.14+1". We can convert between these two as follows. +In our example, the HGVS position ``g.36`` (coordinate *35*) is equivalent to +position ``n.14+1``. We can convert between these two as follows. .. code:: python >>> crossmap.coordinate_to_noncoding(35) - (14, 1, 0) + {'position': 14, 'offset': 1, 'region': ''} + >>> crossmap.noncoding_to_coordinate({'position': 14, 'offset': 1, 'region': ''}) + 35 -When the coordinate is upstream or downstream of the transcript, the last -element of the tuple denotes the offset with respect to the transcript. This -makes it possible to distinguish between intronic positions and those outside -of the transcript. +When the coordinate is upstream or downstream of the transcript, we use ``u`` to +denote upstream and ``d`` to denote downstream. .. code:: python >>> crossmap.coordinate_to_noncoding(2) - (1, -3, -3) + {'position': 3, 'offset': 0, 'region': 'u'} + >>> crossmap.noncoding_to_coordinate({'position': 3, 'offset': 0, 'region': 'u'}) + 2 >>> crossmap.coordinate_to_noncoding(73) - (22, 2, 2) + {'position': 2, 'offset': 0, 'region': 'd'} + >>> crossmap.noncoding_to_coordinate({'position': 2, 'offset': 0, 'region': 'd'}) + 73 + +The ``coordinate_to_noncoding()`` function accepts an optional ``degenerate`` +argument. When set to ``True``, positions outside of the transcript are no +longer described using the ``u`` or ``d`` notation, ``-`` and ``*`` are used +instead. The values in ``position`` and ``offset`` will change accordingly. -Note that this last element is optional (and ignored) when a conversion to a -coordinate is requested. +.. code:: python - >>> crossmap.noncoding_to_coordinate((14, 1)) - 35 + >>> crossmap.coordinate_to_noncoding(2) + {'position': 3, 'offset': 0, 'region': 'u'} + >>> crossmap.coordinate_to_noncoding(2, True) + {'position': 1, 'offset': -3, 'region': '-'} For transcripts that reside on the reverse complement strand, the ``inverted`` -parameter should be set to ``True``. In our example, HGVS position "g.36" -(coordinate ``35``) is now equivalent to position "n.9-1". +parameter should be set to ``True``. In our example, HGVS position ``g.36`` +(coordinate *35*) is now equivalent to position ``n.9-1``. .. code:: python >>> crossmap = NonCoding(exons, inverted=True) >>> crossmap.coordinate_to_noncoding(35) - (9, -1, 0) - >>> crossmap.noncoding_to_coordinate((9, -1)) + {'position': 9, 'offset': -1, 'region': ''} + >>> crossmap.noncoding_to_coordinate({'position': 9, 'offset': -1, 'region': ''}) 35 +In the following table, we show a number of annotated examples. + +.. _table_noncoding: +.. list-table:: Coordinates to Noncoding Positions mapping. + :header-rows: 1 + + * - coordinate + - position + - offset + - region + - HGVS + * - 0 + - 5 + - 0 + - ``u`` + - ``n.u5`` + * - 4 + - 1 + - 0 + - ``u`` + - ``n.u1`` + * - 5 + - 1 + - 0 + - + - ``n.1`` + * - 24 + - 9 + - 5 + - + - ``n.9+5`` + * - 25 + - 10 + - -5 + - + - ``n.10-5`` + * - 71 + - 22 + - 0 + - + - ``n.22`` + * - 72 + - 1 + - 0 + - ``d`` + - ``n.d1`` + * - 79 + - 8 + - 0 + - ``d`` + - ``n.d8`` + See section :doc:`api/crossmap` for a detailed description. The ``Coding`` class -------------------- The ``Coding`` class provides an interface to all conversions between -positioning systems and coordinates. Conversions between positioning systems -should be done via a coordinate. +coding (``c.``, ``r.``) positions and coordinates. Conversions between +positioning systems should be done via a coordinate. + +Coding Position Model +~~~~~~~~~~~~~~~~~~~~~ +Coding positions follow the HGVS ``c.`` coordinate system. They are +represented as 3-key dictionaries. Here is an example of ``c.*1+3``. + +.. code-block:: python + + { + 'position': 1, + 'offset': 3, + 'region': '*' + } + +Where: + +- **position**: an integer representing a transcript position (> 0) +- **offset**: an integer indicating the offset relative to the position (negative for upstream, + positive for downstream) +- **region**: a string describing the region type (empty for positions within coding DNA sequence, + ``-`` for 5' UTR, ``*`` for 3' UTR, ``u`` for upstream and ``d`` for downstream) + +Coding Position Conversion +~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: python @@ -114,131 +228,236 @@ should be done via a coordinate. On top of the functionality provided by the ``NonCoding`` class, the functions ``coordinate_to_coding()`` and ``coding_to_coordinate()`` can be used. These -functions use a 4-tuple to represent a coding position. +functions use a 3-key dictionary to represent a coding position. -.. list-table:: Coding positions. - :header-rows: 1 - - * - index - - description - * - 0 - - Transcript position. - * - 1 - - Offset. - * - 2 - - Region. - * - 3 - - Upstream or downstream offset. - -The region denotes the location of the position with respect to the CDS. This -is needed in order to work with the HGVS "-" and "*" positions. - -.. list-table:: Coding position regions. - :header-rows: 1 - - * - value - - description - - HGVS example - * - ``-1`` - - Upstream of the CDS. - - "c.-10" - * - ``0`` - - In the CDS. - - "c.1" - * - ``1`` - - Downstream of the CDS. - - "c.*10" - -In our example, the HGVS position "g.32" (coordinate ``31``) is equivalent to -position "c.-1". We can convert between these two as follows. +In our example, the HGVS position ``g.32`` (coordinate *31*) is equivalent to +position ``c.-1``. We can convert between these two as follows. .. code:: python >>> crossmap.coordinate_to_coding(31) - (-1, 0, -1, 0) - >>> crossmap.coding_to_coordinate((-1, 0, -1)) + {'position': 1, 'offset': 0, 'region': '-'} + >>> crossmap.coding_to_coordinate({'position': 1, 'offset': 0, 'region': '-'}) 31 The ``coordinate_to_coding()`` function accepts an optional ``degenerate`` argument. When set to ``True``, positions outside of the transcript are no -longer described using the offset notation. +longer described using the ``u`` or ``d`` notation, ``-`` and ``*`` are used +instead. Note that the value of ``position`` is adjusted accordingly. .. code:: python >>> crossmap.coordinate_to_coding(4) - (-11, -1, -1, -1) + {'position': 1, 'offset': 0, 'region': 'u'} >>> crossmap.coordinate_to_coding(4, True) - (-12, 0, -1, -1) + {'position': 12, 'offset': 0, 'region': '-'} -Additionally, the functions ``coordinate_to_protein()`` and -``protein_to_coordinate()`` can be used. These functions use a 5-tuple to -represent a protein position. +In the following table, we show a number of annotated examples. -.. list-table:: Protein positions. +.. _table_coding: +.. list-table:: Coordinates to Coding Positions mapping :header-rows: 1 - * - index - - description + * - coordinate + - position + - offset + - region + - HGVS * - 0 - - Protein position. - * - 1 - - Codon position. - * - 2 - - Offset. - * - 3 - - Region. + - 5 + - 0 + - ``u`` + - ``c.u5`` * - 4 - - Upstream or downstream offset. + - 1 + - 0 + - ``u`` + - ``c.u1`` + * - 5 + - 11 + - 0 + - ``-`` + - ``c.-11`` + * - 24 + - 3 + - 5 + - ``-`` + - ``c.-3+5`` + * - 31 + - 1 + - 0 + - ``-`` + - ``c.-1`` + * - 32 + - 1 + - 0 + - + - ``c.1`` + * - 37 + - 3 + - 3 + - + - ``c.3+3`` + * - 38 + - 4 + - -2 + - + - ``c.4-2`` + * - 43 + - 1 + - 0 + - ``*`` + - ``c.*1`` + * - 61 + - 4 + - -9 + - ``*`` + - ``c.*4-9`` + * - 71 + - 5 + - 0 + - ``*`` + - ``c.*5`` + * - 72 + - 1 + - 0 + - ``d`` + - ``c.d1`` + * - 79 + - 8 + - 0 + - ``d`` + - ``c.d8`` + + +Protein +------- + +Additionally, the functions ``coordinate_to_protein()`` and +``protein_to_coordinate()`` can be used. These functions use a 4-key dictionary +to represent a protein position. Here is one example of three possibilities +for ``p.1`` in HGVS. + +.. code-block:: python + + { + 'position': 1, + 'position_in_codon': 3, + 'offset': 0, + 'region': '' + } -In our example the HGVS position "g.42" (coordinate ``41``) corresponds with -position "p.2". We can convert between these to as follows. +Where: + +- **position**: an integer representing an amino acid position (> 0) +- **position_in_codon**: an integer indexing the position in a codon (1, 2, or 3) +- **offset**: an integer indicating offset relative to the nucleotide specified by `position_in_codon` in the codon +- **region**: a string describing the region type (empty for valid amino acid positions) + +In our example, the HGVS position ``g.42`` (coordinate *41*) corresponds with +position ``p.2``. We can convert between these two as follows. .. code:: python >>> crossmap.coordinate_to_protein(41) - (2, 2, 0, 0, 0) - >>> crossmap.protein_to_coordinate((2, 2, 0, 0)) + {'position': 2, 'position_in_codon': 2, 'offset': 0, 'region': ''} + >>> crossmap.protein_to_coordinate({'position': 2, 'position_in_codon': 2, 'offset': 0, 'region': ''}) 41 -Note that the protein position only corresponds with the HGVS "p." notation -when the offset equals ``0`` and the region equals ``1``. In the following +**Note:** protein position only corresponds with the HGVS "p." notation +when the offset equals ``0`` and the region equals empty. In the following table, we show a number of annotated examples. -.. list-table:: Protein positions examples. +.. _table_protein: +.. list-table:: Coordinates to Protein Positions mapping :header-rows: 1 * - coordinate - - protein position - - description - - HGVS position - * - ``4`` - - ``(-4, 2, -1, -1, -1)`` - - Upstream position. - - invalid - * - ``31`` - - ``(-1, 3, 0, -1, 0)`` - - 5' UTR position. - - invalid - * - ``36`` - - ``(1, 3, 2, 0, 0)`` - - Intronic position. - - invalid - * - ``40`` - - ``(2, 1, 0, 0, 0)`` - - Second amino acid, first nucleotide. - - "p.2" - * - ``41`` - - ``(2, 2, 0, 0, 0)`` - - Second amino acid, second nucleotide. - - "p.2" - * - ``43`` - - ``(1, 1, 0, 1, 0)`` - - 3' UTR position. - - invalid - * - ``43`` - - ``(2, 2, 2, 1, 2)`` - - Downstream position. - - invalid + - position + - position_in_codon + - offset + - region + - HGVS + * - 0 + - 2 + - 2 + - 0 + - ``u`` + - + * - 4 + - 1 + - 3 + - 0 + - ``u`` + - + * - 5 + - 4 + - 2 + - 0 + - ``-`` + - + * - 31 + - 1 + - 3 + - 0 + - ``-`` + - + * - 32 + - 1 + - 1 + - 0 + - + - ``p.1`` + * - 33 + - 1 + - 2 + - 0 + - + - ``p.1`` + * - 34 + - 1 + - 3 + - 0 + - + - ``p.1`` + * - 35 + - 1 + - 3 + - 1 + - + - + * - 42 + - 2 + - 3 + - 0 + - + - ``p.2`` + * - 43 + - 1 + - 1 + - 0 + - ``*`` + - + * - 44 + - 1 + - 1 + - 1 + - ``*`` + - + * - 72 + - 1 + - 1 + - 0 + - ``d`` + - + + * - 79 + - 2 + - 2 + - 0 + - ``d`` + - + See section :doc:`api/crossmap` for a detailed description. @@ -276,10 +495,10 @@ The ``Coding`` class makes use of a number of basic classes described in this section. The ``Locus`` class -^^^^^^^^^^^^^^^^^^^ +~~~~~~~~~~~~~~~~~~~ The ``Locus`` class is used to deal with offsets with respect to a single -locus. +locus. .. code:: python @@ -288,13 +507,19 @@ locus. This class provides the functions ``to_position()`` and ``to_coordinate()`` for converting from a locus position to a coordinate and vice versa. These -functions work with a 2-tuple, see the section about `The NonCoding class`_ +functions work with a 2-key dictionary, see the section about `The NonCoding class`_ for the semantics. +**Note:** the ``position`` values in the position dictionaries are **0-based**, +so the first base of the locus corresponds to ``{'position': 0, 'offset': 0}``. +This differs from HGVS numbering, which is **1-based**. + .. code:: python >>> locus.to_position(9) - (1, -1) + {'position': 0, 'offset': -1} + >>> locus.to_coordinate({'position': 0, 'offset': -1}) + 9 For loci that reside on the reverse complement strand, the optional ``inverted`` constructor parameter should be set to ``True``. @@ -312,13 +537,20 @@ loci. >>> from mutalyzer_crossmapper import MultiLocus >>> multilocus = MultiLocus([(10, 20), (40, 50)]) -The interface to this class is similar to that of the ``Locus`` class. +The interface to this class is similar to that of the ``Locus`` class. Functions +``to_position()`` and ``to_coordinate()`` work with a 3-key dictionary. + +**Note:** again, the ``position`` values in the position dictionaries are **0-based**. .. code:: python >>> multilocus.to_position(22) - (10, 3) + {'position': 9, 'offset': 3, 'region': ''} + >>> multilocus.to_coordinate({'position': 9, 'offset': 3, 'region': ''}) + 22 >>> multilocus.to_position(38) - (11, -2) + {'position': 10, 'offset': -2, 'region': ''} + >>> multilocus.to_coordinate({'position': 10, 'offset': -2, 'region': ''}) + 38 See section :doc:`api/multi_locus` for a detailed description. diff --git a/mutalyzer_crossmapper/__init__.py b/mutalyzer_crossmapper/__init__.py index 284f6c8..7d1abc0 100644 --- a/mutalyzer_crossmapper/__init__.py +++ b/mutalyzer_crossmapper/__init__.py @@ -1,18 +1,4 @@ -"""Crossmapper position conversion library. - -Definitions: - -- Coordinates are zero based, non-negative integers. -- Locations are zero based right-open non-negative integer intervals, - consistent with Python's range() and sequence slicing functions. -- Loci and exons are locations. -- An exon list is a list of locations that, when flattened, is an increasing - sequence. -- A position is a 2-tuple of which the first element is a one based non-zero - integer relative to an element in a location and the second element is an - integer offset relative to the first element. -""" -from pkg_resources import get_distribution +from importlib.metadata import metadata from .crossmapper import Coding, Genomic, NonCoding from .location import nearest_location @@ -20,14 +6,13 @@ from .multi_locus import MultiLocus -def _get_metadata(name): - pkg = get_distribution('mutalyzer_crossmapper') - - for line in pkg.get_metadata_lines(pkg.PKG_INFO): - if line.startswith('{}: '.format(name)): - return line.split(': ')[1] - - return '' +def _get_metadata(name: str) -> str: + """Get metadata from the package using importlib.metadata""" + try: + meta = metadata('mutalyzer_crossmapper') + return meta.get(name, '') + except Exception: + return '' _copyright_notice = 'Copyright (c) {} <{}>'.format( diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index 537efd3..88cba2e 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -3,28 +3,28 @@ class Genomic(object): """Genomic crossmap object.""" - def coordinate_to_genomic(self, coordinate): + def coordinate_to_genomic(self, coordinate: int) -> dict[str, int]: """Convert a coordinate to a genomic position (g./m./o.). :arg int coordinate: Coordinate. - :returns int: Genomic position. + :returns dict: Genomic position model. """ - return coordinate + 1 + return {'position': coordinate + 1} - def genomic_to_coordinate(self, position): + def genomic_to_coordinate(self, pos_m: dict[str, int]) -> int: """Convert a genomic position (g./m./o.) to a coordinate. - :arg int position: Genomic position. + :arg dict pos_m: Genomic position model. :returns int: Coordinate. """ - return position - 1 + return pos_m['position'] - 1 class NonCoding(Genomic): """NonCoding crossmap object.""" - def __init__(self, locations, inverted=False): + def __init__(self, locations: list[tuple[int, int]], inverted: bool = False) -> None: """ :arg list locations: List of locus locations. :arg bool inverted: Orientation. @@ -32,34 +32,63 @@ def __init__(self, locations, inverted=False): self._inverted = inverted self._noncoding = MultiLocus(locations, inverted) + if self._inverted: + self._exons_len = self._noncoding.to_position(locations[0][0])['position'] + 1 + else: + self._exons_len = self._noncoding.to_position(locations[-1][1] - 1)['position'] + 1 - def coordinate_to_noncoding(self, coordinate): + def coordinate_to_noncoding( + self, + coordinate: int, + degenerate: bool = False + ) -> dict[str, int | str]: """Convert a coordinate to a noncoding position (n./r.). :arg int coordinate: Coordinate. - :returns tuple: Noncoding position. + :returns dict: Noncoding position model. """ - pos = self._noncoding.to_position(coordinate) - - return pos[0] + 1, pos[1], pos[2] - - def noncoding_to_coordinate(self, position): + multilocus_pos_m = self._noncoding.to_position(coordinate) + pos_m = {**multilocus_pos_m, 'position': multilocus_pos_m['position'] + 1} + region = pos_m['region'] + if degenerate: + if region == 'u': + pos_m['region'] = '-' + pos_m['offset'] = -pos_m['position'] + pos_m['position'] = 1 + elif region == 'd': + pos_m['region'] = '*' + pos_m['offset'] = pos_m['position'] + pos_m['position'] = self._exons_len + return pos_m + + def noncoding_to_coordinate(self, pos_m: dict[str, int | str]) -> int: """Convert a noncoding position (n./r.) to a coordinate. - :arg tuple position: Noncoding position. + :arg dict pos_m: Noncoding position model. :returns int: Coordinate. """ - if position[0] > 0: - return self._noncoding.to_coordinate( - (position[0] - 1, position[1])) - return self._noncoding.to_coordinate(position) + multilocus_pos_m = {**pos_m, 'position': pos_m['position'] - 1} + if pos_m['region'] == '-': # degenerate results + multilocus_pos_m['region'] = 'u' + multilocus_pos_m['position'] = abs(pos_m['offset']) - 1 + multilocus_pos_m['offset'] = 0 + if pos_m['region'] == '*': # degenerate results + multilocus_pos_m['region'] = 'd' + multilocus_pos_m['position'] = abs(pos_m['offset']) - 1 + multilocus_pos_m['offset'] = 0 + return self._noncoding.to_coordinate(multilocus_pos_m) class Coding(NonCoding): """Coding crossmap object.""" - def __init__(self, locations, cds, inverted=False): + def __init__( + self, + locations: list[tuple[int, int]], + cds: tuple[int, int], + inverted: bool = False + ) -> None: """ :arg list locations: List of locus locations. :arg tuple cds: Locus location. @@ -67,90 +96,142 @@ def __init__(self, locations, cds, inverted=False): """ NonCoding.__init__(self, locations, inverted) - b0 = self._noncoding.to_position(cds[0]) - b1 = self._noncoding.to_position(cds[1]) + cds_start = self._noncoding.to_position(cds[0]) + cds_end = self._noncoding.to_position(cds[1] - 1) + exons_start = self._noncoding.to_position(locations[0][0]) + exons_end = self._noncoding.to_position(locations[-1][1] - 1) if self._inverted: - self._coding = (b1[0] + b1[1] + 1, b0[0] + b0[1] + 1) - self._cds_len = (b0[0] + b0[1]) - (b1[0] + b1[1]) + self._coding = ( + cds_end['position'] + cds_end['offset'], + cds_start['position'] + cds_start['offset'] + 1 + ) + # Used in degenerate option + self._exons_len = exons_start['position'] + 1 else: - self._coding = (b0[0] + b0[1], b1[0] + b1[1]) - self._cds_len = (b1[0] + b1[1]) - (b0[0] + b0[1]) - - def _coordinate_to_coding(self, coordinate): + self._coding = ( + cds_start['position'] + cds_start['offset'], + cds_end['position'] + cds_end['offset'] + 1 + ) + # Used in degenerate option + self._exons_len = exons_end['position'] + 1 + + def _coordinate_to_coding(self, coordinate: int) -> dict[str, int | str]: """Convert a coordinate to a coding position (c./r.). :arg int coordinate: Coordinate. - :returns tuple: Coding position (c./r.). + :returns dict: Coding position model (c./r.). """ - pos = self._noncoding.to_position(coordinate) - - if pos[0] < self._coding[0]: - return pos[0] - self._coding[0], pos[1], -1, pos[2] - elif pos[0] >= self._coding[1]: - return pos[0] - self._coding[1] + 1, pos[1], 1, pos[2] - return pos[0] - self._coding[0] + 1, pos[1], 0, pos[2] - - def coordinate_to_coding(self, coordinate, degenerate=False): + multilocus_pos_m = self._noncoding.to_position(coordinate) + + if multilocus_pos_m['region'] in ('u', 'd'): + return {**multilocus_pos_m, 'position': multilocus_pos_m['position'] + 1} + + location = multilocus_pos_m['position'] + offset = multilocus_pos_m['offset'] + if location < self._coding[0]: + return { + 'position': self._coding[0] - location, + 'offset': offset, + 'region': '-' + } + if location >= self._coding[1]: + return { + 'position': location - self._coding[1] + 1, + 'offset': offset, + 'region': '*' + } + return { + 'position': location - self._coding[0] + 1, + 'offset': offset, + 'region': '' + } + + def coordinate_to_coding(self, coordinate: int, degenerate: bool = False) -> dict: """Convert a coordinate to a coding position (c./r.). :arg int coordinate: Coordinate. :arg bool degenerate: Return a degenerate position. - :returns tuple: Coding position (c./r.). + :returns dict: Coding position model (c./r.). """ - pos = self._coordinate_to_coding(coordinate) - - if degenerate and pos[3]: - if pos[2] == 0: - if pos[0] == 1 and pos[1] < 0: - return pos[1], 0, -1, pos[3] - if pos[0] == self._cds_len and pos[1] > 0: - return pos[0] + pos[1] - self._cds_len, 0, 1, pos[3] - return pos[0] + pos[1], 0, pos[2], pos[3] - - return pos - - def coding_to_coordinate(self, position): + pos_m = self._coordinate_to_coding(coordinate) + + region = pos_m['region'] + if not degenerate or region == '': + return pos_m + + degenerate_pos_m = {**pos_m, 'offset': pos_m['offset']} + location = pos_m['position'] + if region == 'u': + degenerate_pos_m['position'] = location + self._coding[0] + degenerate_pos_m['region'] = '-' + if region == 'd': + degenerate_pos_m['position'] = location + self._exons_len - self._coding[1] + degenerate_pos_m['region'] = '*' + return degenerate_pos_m + + def coding_to_coordinate(self, pos_m: dict[str, int | str]) -> int: """Convert a coding position (c./r.) to a coordinate. - :arg tuple position: Coding position (c./r.). + :arg dict pos_m: Coding position model (c./r.). :returns int: Coordinate. """ - if position[2] == -1: - return self._noncoding.to_coordinate( - (position[0] + self._coding[0], position[1])) - elif position[2] == 1: - return self._noncoding.to_coordinate( - (position[0] + self._coding[1] - 1, position[1])) - return self._noncoding.to_coordinate( - (position[0] + self._coding[0] - 1, position[1])) - - def coordinate_to_protein(self, coordinate): + location = pos_m['position'] + region = pos_m['region'] + multilocus_pos_m = {**pos_m} + + if region in ('u', 'd'): + multilocus_pos_m['position'] = location - 1 + return self._noncoding.to_coordinate(multilocus_pos_m) + + multilocus_pos_m['region'] = '' + if region == '': + multilocus_pos_m['position'] = location + self._coding[0] - 1 + elif region == '-': + multilocus_pos_m['position'] = self._coding[0] - location + else: + multilocus_pos_m['position'] = self._coding[1] + location - 1 + return self._noncoding.to_coordinate(multilocus_pos_m) + + def coordinate_to_protein(self, coordinate: int) -> dict[str, int | str]: """Convert a coordinate to a protein position (p.). :arg int coordinate: Coordinate. - :returns tuple: Protein position (p.). + :returns dict: Protein position model(p.). """ - pos = self.coordinate_to_coding(coordinate) - - if pos[2] == -1: - return (pos[0] // 3, pos[0] % 3 + 1, *pos[1:]) - return ((pos[0] + 2) // 3, (pos[0] + 2) % 3 + 1, *pos[1:]) - - def protein_to_coordinate(self, position): + pos_m = self.coordinate_to_coding(coordinate) + + location = pos_m['position'] + if pos_m['region'] in ('-', 'u'): + return { + 'position': abs(-location // 3), + 'position_in_codon': -location % 3 + 1, + 'region': pos_m['region'], + 'offset': pos_m['offset']} + return { + 'position': (location + 2) // 3, + 'position_in_codon': (location + 2) % 3 + 1, + 'region': pos_m['region'], + 'offset': pos_m['offset']} + + def protein_to_coordinate(self, pos_m: dict[str, int | str]) -> int: """Convert a protein position (p.) to a coordinate. - :arg tuple position: Protein position (p.). + :arg dict pos_m: Protein position model(p.). :returns int: Coordinate. """ - if position[3] == -1: + if pos_m['region'] in ('-', 'u'): return self.coding_to_coordinate( - (3 * position[0] + position[1] - 1, *position[2:])) + {'position': 3 * pos_m['position'] - pos_m['position_in_codon'] + 1, + 'offset': pos_m['offset'], + 'region': pos_m['region']}) return self.coding_to_coordinate( - (3 * position[0] + position[1] - 3, *position[2:])) + {'position': 3 * pos_m['position'] + pos_m['position_in_codon'] - 3, + 'offset': pos_m['offset'], + 'region': pos_m['region']}) diff --git a/mutalyzer_crossmapper/location.py b/mutalyzer_crossmapper/location.py index e580672..9b534e4 100644 --- a/mutalyzer_crossmapper/location.py +++ b/mutalyzer_crossmapper/location.py @@ -1,4 +1,4 @@ -def _nearest_boundary(lb, rb, c, p): +def _nearest_boundary(lb: int, rb: int, c: int, p: int) -> int: """Find the boundary nearest to `c`. In case of a draw, the parameter `p` decides which one is chosen. @@ -19,7 +19,7 @@ def _nearest_boundary(lb, rb, c, p): return p -def nearest_location(ls, c, p=0): +def nearest_location(ls: list[tuple[int, int]], c: int, p: int = 0) -> int: """Find the location nearest to `c`. In case of a draw, the parameter `p` decides which index is chosen. diff --git a/mutalyzer_crossmapper/locus.py b/mutalyzer_crossmapper/locus.py index 14a9d20..7fbb7b3 100644 --- a/mutalyzer_crossmapper/locus.py +++ b/mutalyzer_crossmapper/locus.py @@ -1,6 +1,6 @@ class Locus(object): """Locus object.""" - def __init__(self, location, inverted=False): + def __init__(self, location: tuple[int, int], inverted: bool = False) -> None: """ :arg tuple location: Locus location. :arg bool inverted: Orientation. @@ -10,33 +10,33 @@ def __init__(self, location, inverted=False): self.boundary = location[0], location[1] - 1 self._end = self.boundary[1] - self.boundary[0] - def to_position(self, coordinate): - """Convert a coordinate to a proper position. + def to_position(self, coordinate: int) -> dict[str, int]: + """Convert a coordinate to a proper position model. :arg int coordinate: Coordinate. - :returns tuple: Position. + :returns dict: Position model with 'position' and 'offset' keys. """ if self._inverted: if coordinate > self.boundary[1]: - return 0, self.boundary[1] - coordinate + return {'position': 0, 'offset': self.boundary[1] - coordinate} if coordinate < self.boundary[0]: - return self._end, self.boundary[0] - coordinate - return self.boundary[1] - coordinate, 0 + return {'position': self._end, 'offset': self.boundary[0] - coordinate} + return {'position': self.boundary[1] - coordinate, 'offset': 0} if coordinate < self.boundary[0]: - return 0, coordinate - self.boundary[0] + return {'position': 0, 'offset': coordinate - self.boundary[0]} if coordinate > self.boundary[1]: - return self._end, coordinate - self.boundary[1] - return coordinate - self.boundary[0], 0 + return {'position': self._end, 'offset': coordinate - self.boundary[1]} + return {'position': coordinate - self.boundary[0], 'offset': 0} - def to_coordinate(self, position): - """Convert a position to a coordinate. + def to_coordinate(self, pos_m: dict[str, int]) -> int: + """Convert a position model to a coordinate. - :arg int position: Position. + :arg dict position: Position model with 'position' and 'offset' keys. :returns int: Coordinate. """ if self._inverted: - return self.boundary[1] - position[0] - position[1] - return self.boundary[0] + position[0] + position[1] + return self.boundary[1] - pos_m['position'] - pos_m['offset'] + return self.boundary[0] + pos_m['position'] + pos_m['offset'] diff --git a/mutalyzer_crossmapper/multi_locus.py b/mutalyzer_crossmapper/multi_locus.py index 9836549..d2c5662 100644 --- a/mutalyzer_crossmapper/multi_locus.py +++ b/mutalyzer_crossmapper/multi_locus.py @@ -5,7 +5,7 @@ from .locus import Locus -def _offsets(locations, orientation): +def _offsets(locations: list[tuple[int, int]], orientation: int) -> list[int]: """For each location, calculate the length of the preceding locations. :arg list locations: List of locations. @@ -19,7 +19,7 @@ def _offsets(locations, orientation): class MultiLocus(object): """MultiLocus object.""" - def __init__(self, locations, inverted=False): + def __init__(self, locations: list[tuple[int, int]], inverted: bool = False) -> None: """ :arg list locations: List of locus locations. :arg bool inverted: Orientation. @@ -31,12 +31,12 @@ def __init__(self, locations, inverted=False): self._orientation = -1 if inverted else 1 self._offsets = _offsets(locations, self._orientation) - def _direction(self, index): + def _direction(self, index: int) -> int: if self._inverted: return len(self._offsets) - index - 1 return index - def outside(self, coordinate): + def outside(self, coordinate: int) -> int: """Calculate the offset relative to this MultiLocus. :arg int coordinate: Coordinate. @@ -49,32 +49,51 @@ def outside(self, coordinate): return coordinate - self._loci[-1].boundary[1] return 0 - def to_position(self, coordinate): + def to_position(self, coordinate: int) -> dict[str, int | str]: """Convert a coordinate to a position. :arg int coordinate: Coordinate. - :returns tuple: Position. + :returns dict: Position model 'position', 'offset' and 'region' keys. """ index = nearest_location(self._locations, coordinate, self._inverted) outside = self._orientation * self.outside(coordinate) - location = self._loci[index].to_position(coordinate) - - return ( - location[0] + self._offsets[self._direction(index)], - location[1], - outside) - - def to_coordinate(self, position): - """Convert a position to a coordinate. - - :arg int position: Position. + region = 'u' if outside < 0 else 'd' if outside > 0 else '' + locus_pos_m = self._loci[index].to_position(coordinate) + + if outside: + return { + 'position': abs(locus_pos_m['offset']) - 1, + 'offset': 0, + 'region': region + } + return { + 'position': locus_pos_m['position'] + self._offsets[self._direction(index)], + 'offset': locus_pos_m['offset'], + 'region': region + } + + def to_coordinate(self, pos_m: dict[str, int | str]) -> int: + """Convert a position model to a coordinate. + + :arg dict pos_m: Position model with 'position','offset' and 'region' keys. :returns int: Coordinate. """ + region = pos_m['region'] + + if region == 'u': + if self._inverted: + return self._locations[-1][1] + abs(pos_m['position']) - pos_m['offset'] + return self._locations[0][0] - abs(pos_m['position']) + pos_m['offset'] - 1 + if region == 'd': + if self._inverted: + return self._locations[0][0] - abs(pos_m['position']) - pos_m['offset'] - 1 + return self._locations[-1][1] + abs(pos_m['position']) + pos_m['offset'] + index = min( len(self._offsets), - max(0, bisect_right(self._offsets, position[0]) - 1)) - - return self._loci[self._direction(index)].to_coordinate( - (position[0] - self._offsets[index], position[1])) + max(0, bisect_right(self._offsets, pos_m['position']) - 1) + ) + locus_pos_m = {**pos_m, 'position': pos_m['position'] - self._offsets[index]} + return self._loci[self._direction(index)].to_coordinate(locus_pos_m) diff --git a/setup.cfg b/setup.cfg index ca7be54..c58c160 100644 --- a/setup.cfg +++ b/setup.cfg @@ -17,6 +17,7 @@ classifiers = [options] packages = find: +python_requires = >=3.10 [options.extras_require] tests = diff --git a/tests/helper.py b/tests/helper.py index a17b119..b3ca042 100644 --- a/tests/helper.py +++ b/tests/helper.py @@ -6,4 +6,4 @@ def invariant(f, x, f_i, y): def degenerate_equal(f, coordinate, locations): assert f(locations[0]) == coordinate assert len( - set(map(f, locations))) == 1 + set(map(f, locations))) == 1 \ No newline at end of file diff --git a/tests/test_crossmapper.py b/tests/test_crossmapper.py index dbe6b47..dcdb718 100644 --- a/tests/test_crossmapper.py +++ b/tests/test_crossmapper.py @@ -11,9 +11,17 @@ def test_Genomic(): crossmap = Genomic() invariant( - crossmap.coordinate_to_genomic, 0, crossmap.genomic_to_coordinate, 1) + crossmap.coordinate_to_genomic, + 0, + crossmap.genomic_to_coordinate, + {'position': 1}, + ) invariant( - crossmap.coordinate_to_genomic, 98, crossmap.genomic_to_coordinate, 99) + crossmap.coordinate_to_genomic, + 98, + crossmap.genomic_to_coordinate, + {'position': 99}, + ) def test_NonCoding(): @@ -22,19 +30,37 @@ def test_NonCoding(): # Boundary between upstream and transcript. invariant( - crossmap.coordinate_to_noncoding, 4, - crossmap.noncoding_to_coordinate, (1, -1, -1)) + crossmap.coordinate_to_noncoding, + 3, + crossmap.noncoding_to_coordinate, + {'position': 2, 'offset': 0, 'region': 'u'}, + ) invariant( - crossmap.coordinate_to_noncoding, 5, - crossmap.noncoding_to_coordinate, (1, 0, 0)) + crossmap.coordinate_to_noncoding, + 4, + crossmap.noncoding_to_coordinate, + {'position': 1, 'offset': 0, 'region': 'u'}, + ) + invariant( + crossmap.coordinate_to_noncoding, + 5, + crossmap.noncoding_to_coordinate, + {'position': 1, 'offset': 0, 'region': ''}, + ) # Boundary between downstream and transcript. invariant( - crossmap.coordinate_to_noncoding, 71, - crossmap.noncoding_to_coordinate, (22, 0, 0)) + crossmap.coordinate_to_noncoding, + 71, + crossmap.noncoding_to_coordinate, + {'position': 22, 'offset': 0, 'region': ''}, + ) invariant( - crossmap.coordinate_to_noncoding, 72, - crossmap.noncoding_to_coordinate, (22, 1, 1)) + crossmap.coordinate_to_noncoding, + 72, + crossmap.noncoding_to_coordinate, + {'position': 1, 'offset': 0, 'region': 'd'}, + ) def test_NonCoding_inverted(): @@ -43,19 +69,31 @@ def test_NonCoding_inverted(): # Boundary between upstream and transcript. invariant( - crossmap.coordinate_to_noncoding, 72, - crossmap.noncoding_to_coordinate, (1, -1, -1)) + crossmap.coordinate_to_noncoding, + 72, + crossmap.noncoding_to_coordinate, + {'position': 1, 'offset': 0, 'region': 'u'}, + ) invariant( - crossmap.coordinate_to_noncoding, 71, - crossmap.noncoding_to_coordinate, (1, 0, 0)) + crossmap.coordinate_to_noncoding, + 71, + crossmap.noncoding_to_coordinate, + {'position': 1, 'offset': 0, 'region': ''}, + ) # Boundary between downstream and transcript. invariant( - crossmap.coordinate_to_noncoding, 5, - crossmap.noncoding_to_coordinate, (22, 0, 0)) + crossmap.coordinate_to_noncoding, + 5, + crossmap.noncoding_to_coordinate, + {'position': 22, 'offset': 0, 'region': ''}, + ) invariant( - crossmap.coordinate_to_noncoding, 4, - crossmap.noncoding_to_coordinate, (22, 1, 1)) + crossmap.coordinate_to_noncoding, + 4, + crossmap.noncoding_to_coordinate, + {'position': 1, 'offset': 0, 'region': 'd'}, + ) def test_NonCoding_degenerate(): @@ -64,13 +102,27 @@ def test_NonCoding_degenerate(): # Boundary between upstream and transcript. degenerate_equal( - crossmap.noncoding_to_coordinate, 4, - [(1, -1, -1), (-1, 0, -1)]) + crossmap.noncoding_to_coordinate, + 4, + [ + {'position': 1, 'offset': -1, 'region': ''}, + {'position': 1, 'offset': 0, 'region': 'u'}, + {'position': 1, 'offset': -1, 'region': '-'}, + ], + ) # Boundary between downstream and transcript. degenerate_equal( - crossmap.noncoding_to_coordinate, 72, - [(22, 1, 1), (23, 0, 1)]) + crossmap.noncoding_to_coordinate, + 72, + [ + {'position': 1, 'offset': 0, 'region': 'd'}, + {'position': 22, 'offset': 1, 'region': ''}, + {'position': 23, 'offset': 0, 'region': ''}, + {'position': 24, 'offset': -1, 'region': ''}, + {'position': 22, 'offset': 1, 'region': '*'}, # standard degenerate result + ], + ) def test_NonCoding_inverted_degenerate(): @@ -79,13 +131,72 @@ def test_NonCoding_inverted_degenerate(): # Boundary between upstream and transcript. degenerate_equal( - crossmap.noncoding_to_coordinate, 72, - [(1, -1, -1), (-1, 0, -1)]) + crossmap.noncoding_to_coordinate, + 72, + [ + {'position': 1, 'offset': -1, 'region': ''}, + {'position': 1, 'offset': 0, 'region': 'u'}, + {'position': 1, 'offset': -1, 'region': '-'}, + ], + ) # Boundary between downstream and transcript. degenerate_equal( - crossmap.noncoding_to_coordinate, 4, - [(22, 1, 1), (23, 0, 1)]) + crossmap.noncoding_to_coordinate, + 4, + [ + {'position': 1, 'offset': 0, 'region': 'd'}, + {'position': 22, 'offset': 1, 'region': '*'}, + {'position': 23, 'offset': 0, 'region': ''}, + {'position': 22, 'offset': 1, 'region': ''}, + ], + ) + + +def test_NonCoding_degenerate_return(): + crossmap = NonCoding(_exons) + + assert crossmap.coordinate_to_noncoding(4, True) == { + 'position': 1, + 'offset': -1, + 'region': '-', + } + + assert crossmap.coordinate_to_noncoding(72, True) == { + 'position': 22, + 'offset': 1, + 'region': '*', + } + + +def test_NonCoding_inverted_degenerate_return(): + crossmap = NonCoding(_exons, True) + + assert crossmap.coordinate_to_noncoding(72, True) == { + 'position': 1, + 'offset': -1, + 'region': '-', + } + + assert crossmap.coordinate_to_noncoding(4, True) == { + 'position': 22, + 'offset': 1, + 'region': '*', + } + + +def test_NonCoding_degenerate_no_return(): + """Degenerate internal positions do not exist.""" + crossmap = NonCoding(_exons) + + assert crossmap.coordinate_to_noncoding(25) == crossmap.coordinate_to_noncoding(25, True) + + +def test_NonCoding_inverted_degenerate_no_return(): + """Degenerate internal positions do not exist.""" + crossmap = NonCoding(_exons, True) + + assert crossmap.coordinate_to_noncoding(25) == crossmap.coordinate_to_noncoding(25, True) def test_Coding(): @@ -94,19 +205,31 @@ def test_Coding(): # Boundary between 5' and CDS. invariant( - crossmap.coordinate_to_coding, 31, - crossmap.coding_to_coordinate, (-1, 0, -1, 0)) + crossmap.coordinate_to_coding, + 31, + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 0, 'region': '-'}, + ) invariant( - crossmap.coordinate_to_coding, 32, - crossmap.coding_to_coordinate, (1, 0, 0, 0)) + crossmap.coordinate_to_coding, + 32, + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 0, 'region': ''}, + ) # Boundary between CDS and 3'. invariant( - crossmap.coordinate_to_coding, 42, - crossmap.coding_to_coordinate, (6, 0, 0, 0)) + crossmap.coordinate_to_coding, + 42, + crossmap.coding_to_coordinate, + {'position': 6, 'offset': 0, 'region': ''}, + ) invariant( - crossmap.coordinate_to_coding, 43, - crossmap.coding_to_coordinate, (1, 0, 1, 0)) + crossmap.coordinate_to_coding, + 43, + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 0, 'region': '*'}, + ) def test_Coding_inverted(): @@ -115,19 +238,31 @@ def test_Coding_inverted(): # Boundary between 5' and CDS. invariant( - crossmap.coordinate_to_coding, 43, - crossmap.coding_to_coordinate, (-1, 0, -1, 0)) + crossmap.coordinate_to_coding, + 43, + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 0, 'region': '-'}, + ) invariant( - crossmap.coordinate_to_coding, 42, - crossmap.coding_to_coordinate, (1, 0, 0, 0)) + crossmap.coordinate_to_coding, + 42, + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 0, 'region': ''}, + ) # Boundary between CDS and 3'. invariant( - crossmap.coordinate_to_coding, 32, - crossmap.coding_to_coordinate, (6, 0, 0, 0)) + crossmap.coordinate_to_coding, + 32, + crossmap.coding_to_coordinate, + {'position': 6, 'offset': 0, 'region': ''}, + ) invariant( - crossmap.coordinate_to_coding, 31, - crossmap.coding_to_coordinate, (1, 0, 1, 0)) + crossmap.coordinate_to_coding, + 31, + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 0, 'region': '*'}, + ) def test_Coding_regions(): @@ -136,19 +271,31 @@ def test_Coding_regions(): # Upstream odd length intron between two regions. invariant( - crossmap.coordinate_to_coding, 25, - crossmap.coding_to_coordinate, (-1, 5, -1, 0)) + crossmap.coordinate_to_coding, + 25, + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 5, 'region': '-'}, + ) invariant( - crossmap.coordinate_to_coding, 26, - crossmap.coding_to_coordinate, (1, -4, 0, 0)) + crossmap.coordinate_to_coding, + 26, + crossmap.coding_to_coordinate, + {'position': 1, 'offset': -4, 'region': ''}, + ) # Downstream odd length intron between two regions. invariant( - crossmap.coordinate_to_coding, 44, - crossmap.coding_to_coordinate, (10, 5, 0, 0)) + crossmap.coordinate_to_coding, + 44, + crossmap.coding_to_coordinate, + {'position': 10, 'offset': 5, 'region': ''}, + ) invariant( - crossmap.coordinate_to_coding, 45, - crossmap.coding_to_coordinate, (1, -4, 1, 0)) + crossmap.coordinate_to_coding, + 45, + crossmap.coding_to_coordinate, + {'position': 1, 'offset': -4, 'region': '*'}, + ) def test_Coding_regions_inverted(): @@ -157,19 +304,31 @@ def test_Coding_regions_inverted(): # Upstream odd length intron between two regions. invariant( - crossmap.coordinate_to_coding, 44, - crossmap.coding_to_coordinate, (-1, 5, -1, 0)) + crossmap.coordinate_to_coding, + 44, + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 5, 'region': '-'}, + ) invariant( - crossmap.coordinate_to_coding, 43, - crossmap.coding_to_coordinate, (1, -4, 0, 0)) + crossmap.coordinate_to_coding, + 43, + crossmap.coding_to_coordinate, + {'position': 1, 'offset': -4, 'region': ''}, + ) # Downstream odd length intron between two regions. invariant( - crossmap.coordinate_to_coding, 25, - crossmap.coding_to_coordinate, (10, 5, 0, 0)) + crossmap.coordinate_to_coding, + 25, + crossmap.coding_to_coordinate, + {'position': 10, 'offset': 5, 'region': ''}, + ) invariant( - crossmap.coordinate_to_coding, 24, - crossmap.coding_to_coordinate, (1, -4, 1, 0)) + crossmap.coordinate_to_coding, + 24, + crossmap.coding_to_coordinate, + {'position': 1, 'offset': -4, 'region': '*'}, + ) def test_Coding_no_utr5(): @@ -178,11 +337,61 @@ def test_Coding_no_utr5(): # Direct transition from upstream to CDS. invariant( - crossmap.coordinate_to_coding, 9, - crossmap.coding_to_coordinate, (1, -1, 0, -1)) + crossmap.coordinate_to_coding, + 9, + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 0, 'region': 'u'}, + ) + invariant( + crossmap.coordinate_to_coding, + 10, + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 0, 'region': ''}, + ) + + +def test_Coding_no_intron(): + crossmap = Coding([(10, 20), (20, 30)], (15, 25)) + invariant( - crossmap.coordinate_to_coding, 10, - crossmap.coding_to_coordinate, (1, 0, 0, 0)) + crossmap.coordinate_to_coding, + 20, + crossmap.coding_to_coordinate, + {'position': 6, 'offset': 0, 'region': ''}, + ) + + +def test_Coding_no_intron_inverted(): + crossmap = Coding([(10, 20), (20, 30)], (15, 25), True) + + invariant( + crossmap.coordinate_to_coding, + 20, + crossmap.coding_to_coordinate, + {'position': 5, 'offset': 0, 'region': ''}, + ) + + +def test_Coding_one_base_intron(): + crossmap = Coding([(10, 19), (20, 30)], (15, 25)) + + invariant( + crossmap.coordinate_to_coding, + 19, + crossmap.coding_to_coordinate, + {'position': 4, 'offset': 1, 'region': ''}, + ) + + +def test_Coding_one_base_intron_inverted(): + crossmap = Coding([(10, 19), (20, 30)], (15, 25), True) + + invariant( + crossmap.coordinate_to_coding, + 19, + crossmap.coding_to_coordinate, + {'position': 5, 'offset': 1, 'region': ''}, + ) def test_Coding_no_utr5_inverted(): @@ -191,11 +400,17 @@ def test_Coding_no_utr5_inverted(): # Direct transition from upstream to CDS. invariant( - crossmap.coordinate_to_coding, 20, - crossmap.coding_to_coordinate, (1, -1, 0, -1)) + crossmap.coordinate_to_coding, + 20, + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 0, 'region': 'u'}, + ) invariant( - crossmap.coordinate_to_coding, 19, - crossmap.coding_to_coordinate, (1, 0, 0, 0)) + crossmap.coordinate_to_coding, + 19, + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 0, 'region': ''}, + ) def test_Coding_no_utr3(): @@ -204,11 +419,17 @@ def test_Coding_no_utr3(): # Direct transition from CDS to downstream. invariant( - crossmap.coordinate_to_coding, 19, - crossmap.coding_to_coordinate, (5, 0, 0, 0)) + crossmap.coordinate_to_coding, + 19, + crossmap.coding_to_coordinate, + {'position': 5, 'offset': 0, 'region': ''}, + ) invariant( - crossmap.coordinate_to_coding, 20, - crossmap.coding_to_coordinate, (5, 1, 0, 1)) + crossmap.coordinate_to_coding, + 20, + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 0, 'region': 'd'}, + ) def test_Coding_no_utr3_inverted(): @@ -217,11 +438,17 @@ def test_Coding_no_utr3_inverted(): # Direct transition from CDS to downstream. invariant( - crossmap.coordinate_to_coding, 10, - crossmap.coding_to_coordinate, (5, 0, 0, 0)) + crossmap.coordinate_to_coding, + 10, + crossmap.coding_to_coordinate, + {'position': 5, 'offset': 0, 'region': ''}, + ) invariant( - crossmap.coordinate_to_coding, 9, - crossmap.coding_to_coordinate, (5, 1, 0, 1)) + crossmap.coordinate_to_coding, + 9, + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 0, 'region': 'd'}, + ) def test_Coding_small_utr5(): @@ -230,14 +457,23 @@ def test_Coding_small_utr5(): # Transition from upstream to 5' UTR to CDS. invariant( - crossmap.coordinate_to_coding, 9, - crossmap.coding_to_coordinate, (-1, -1, -1, -1)) + crossmap.coordinate_to_coding, + 9, + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 0, 'region': 'u'}, + ) invariant( - crossmap.coordinate_to_coding, 10, - crossmap.coding_to_coordinate, (-1, 0, -1, 0)) + crossmap.coordinate_to_coding, + 10, + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 0, 'region': '-'}, + ) invariant( - crossmap.coordinate_to_coding, 11, - crossmap.coding_to_coordinate, (1, 0, 0, 0)) + crossmap.coordinate_to_coding, + 11, + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 0, 'region': ''}, + ) def test_Coding_small_utr5_inverted(): @@ -246,14 +482,23 @@ def test_Coding_small_utr5_inverted(): # Transition from upstream to 5' UTR to CDS. invariant( - crossmap.coordinate_to_coding, 20, - crossmap.coding_to_coordinate, (-1, -1, -1, -1)) + crossmap.coordinate_to_coding, + 20, + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 0, 'region': 'u'}, + ) invariant( - crossmap.coordinate_to_coding, 19, - crossmap.coding_to_coordinate, (-1, 0, -1, 0)) + crossmap.coordinate_to_coding, + 19, + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 0, 'region': '-'}, + ) invariant( - crossmap.coordinate_to_coding, 18, - crossmap.coding_to_coordinate, (1, 0, 0, 0)) + crossmap.coordinate_to_coding, + 18, + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 0, 'region': ''}, + ) def test_Coding_small_utr3(): @@ -262,14 +507,23 @@ def test_Coding_small_utr3(): # Transition from CDS to 3' UTR to downstream. invariant( - crossmap.coordinate_to_coding, 18, - crossmap.coding_to_coordinate, (4, 0, 0, 0)) + crossmap.coordinate_to_coding, + 18, + crossmap.coding_to_coordinate, + {'position': 4, 'offset': 0, 'region': ''}, + ) invariant( - crossmap.coordinate_to_coding, 19, - crossmap.coding_to_coordinate, (1, 0, 1, 0)) + crossmap.coordinate_to_coding, + 19, + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 0, 'region': '*'}, + ) invariant( - crossmap.coordinate_to_coding, 20, - crossmap.coding_to_coordinate, (1, 1, 1, 1)) + crossmap.coordinate_to_coding, + 20, + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 0, 'region': 'd'}, + ) def test_Coding_small_utr3_inverted(): @@ -278,26 +532,55 @@ def test_Coding_small_utr3_inverted(): # Transition from CDS to 3' UTR to downstream. invariant( - crossmap.coordinate_to_coding, 11, - crossmap.coding_to_coordinate, (4, 0, 0, 0)) + crossmap.coordinate_to_coding, + 11, + crossmap.coding_to_coordinate, + {'position': 4, 'offset': 0, 'region': ''}, + ) invariant( - crossmap.coordinate_to_coding, 10, - crossmap.coding_to_coordinate, (1, 0, 1, 0)) + crossmap.coordinate_to_coding, + 10, + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 0, 'region': '*'}, + ) invariant( - crossmap.coordinate_to_coding, 9, - crossmap.coding_to_coordinate, (1, 1, 1, 1)) + crossmap.coordinate_to_coding, + 9, + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 0, 'region': 'd'}, + ) def test_Coding_degenerate(): """Degenerate upstream and downstream positions are silently corrected.""" crossmap = Coding([(10, 20)], (11, 19)) + # Degenerate position in upstream. degenerate_equal( - crossmap.coding_to_coordinate, 9, - [(-1, -1, -1, -1), (-2, 0, -1, -1), (1, -2, 0, -1), (1, -10, 1, -1)]) + crossmap.coding_to_coordinate, + 9, + [ + {'position': 1, 'offset': 0, 'region': 'u'}, + {'position': 2, 'offset': 0, 'region': '-'}, + {'position': 1, 'offset': -2, 'region': ''}, + {'position': 1, 'offset': -10, 'region': '*'}, + {'position': 2, 'offset': -11, 'region': '*'}, + {'position': 3, 'offset': 1, 'region': '-'}, + {'position': 4, 'offset': 2, 'region': '-'}, + ], + ) degenerate_equal( - crossmap.coding_to_coordinate, 20, - [(1, 1, 1, 1), (2, 0, 1, 1), (8, 2, 0, 1), (-1, 10, -1, 1)]) + crossmap.coding_to_coordinate, + 20, + [ + {'position': 1, 'offset': 0, 'region': 'd'}, + {'position': 2, 'offset': 0, 'region': '*'}, + {'position': 8, 'offset': 2, 'region': ''}, + {'position': 1, 'offset': 10, 'region': '-'}, + {'position': 2, 'offset': 11, 'region': '-'}, + {'position': 7, 'offset': 3, 'region': ''}, + ], + ) def test_Coding_inverted_degenerate(): @@ -305,43 +588,106 @@ def test_Coding_inverted_degenerate(): crossmap = Coding([(10, 20)], (11, 19), True) degenerate_equal( - crossmap.coding_to_coordinate, 20, - [(-1, -1, -1, -1), (-2, 0, -1, -1), (1, -2, 0, -1), (1, -10, 1, -1)]) + crossmap.coding_to_coordinate, + 20, + [ + {'position': 1, 'offset': 0, 'region': 'u'}, + {'position': 2, 'offset': 0, 'region': '-'}, + {'position': 1, 'offset': -2, 'region': ''}, + {'position': 1, 'offset': -10, 'region': '*'}, + {'position': 1, 'offset': -11, 'region': 'd'}, + {'position': 2, 'offset': -3, 'region': ''}, + ], + ) degenerate_equal( - crossmap.coding_to_coordinate, 9, - [(1, 1, 1, 1), (2, 0, 1, 1), (8, 2, 0, 1), (-1, 10, -1, 1)]) + crossmap.coding_to_coordinate, + 9, + [ + {'position': 1, 'offset': 0, 'region': 'd'}, + {'position': 2, 'offset': 0, 'region': '*'}, + {'position': 8, 'offset': 2, 'region': ''}, + {'position': 1, 'offset': 10, 'region': '-'}, + {'position': 1, 'offset': 11, 'region': 'u'}, + {'position': 2, 'offset': 12, 'region': 'u'}, + + ], + ) def test_Coding_degenerate_return(): """Degenerate upstream and downstream positions may be returned.""" crossmap = Coding([(10, 20)], (11, 19)) - assert crossmap.coordinate_to_coding(9, True) == (-2, 0, -1, -1) - assert crossmap.coordinate_to_coding(20, True) == (2, 0, 1, 1) + assert crossmap.coordinate_to_coding(9, True) == { + 'position': 2, + 'offset': 0, + 'region': '-', + } + assert crossmap.coordinate_to_coding(20, True) == { + 'position': 2, + 'offset': 0, + 'region': '*', + } def test_Coding_inverted_degenerate_return(): """Degenerate upstream and downstream positions may be returned.""" crossmap = Coding([(10, 20)], (11, 19), True) - assert crossmap.coordinate_to_coding(20, True) == (-2, 0, -1, -1) - assert crossmap.coordinate_to_coding(9, True) == (2, 0, 1, 1) - + assert crossmap.coordinate_to_coding(20, True) == { + 'position': 2, + 'offset': 0, + 'region': '-', + } + assert crossmap.coordinate_to_coding(25, True) == { + 'position': 7, + 'offset': 0, + 'region': '-', + } + assert crossmap.coordinate_to_coding(9, True) == { + 'position': 2, + 'offset': 0, + 'region': '*', + } + + +def test_Coding_two_exons_inverted_degenerate_return(): + """Degenerate upstream and downstream positions may be returned.""" + crossmap = Coding([(10, 20), (30, 40)], (18, 37), True) + + assert crossmap.coordinate_to_coding(5, True) == { + 'position': 13, + 'offset': 0, + 'region': '*', + } + assert crossmap.coordinate_to_coding(25, True) == { + 'position': 7, + 'offset': 5, + 'region': '', + } + assert crossmap.coordinate_to_coding(35, True) == { + 'position': 2, + 'offset': 0, + 'region': '', + } + assert crossmap.coordinate_to_coding(38, True) == { + 'position': 2, + 'offset': 0, + 'region': '-', + } def test_Coding_degenerate_no_return(): """Degenerate internal positions do not exist.""" crossmap = Coding([(10, 20), (30, 40)], (10, 40)) - assert (crossmap.coordinate_to_coding(25) == - crossmap.coordinate_to_coding(25, True)) + assert crossmap.coordinate_to_coding(25) == crossmap.coordinate_to_coding(25, True) def test_Coding_inverted_degenerate_no_return(): """Degenerate internal positions do not exist.""" crossmap = Coding([(10, 20), (30, 40)], (10, 40), True) - assert (crossmap.coordinate_to_coding(25) == - crossmap.coordinate_to_coding(25, True)) + assert crossmap.coordinate_to_coding(25) == crossmap.coordinate_to_coding(25, True) def test_Coding_no_utr_degenerate(): @@ -349,11 +695,27 @@ def test_Coding_no_utr_degenerate(): crossmap = Coding([(10, 11)], (10, 11)) degenerate_equal( - crossmap.coding_to_coordinate, 9, - [(1, -1, 0, -1), (-1, 0, -1, -1), (1, -2, 1, -1)]) + crossmap.coding_to_coordinate, + 9, + [ + {'position': 1, 'offset': 0, 'region': 'u'}, + {'position': 1, 'offset': 0, 'region': '-'}, + {'position': 1, 'offset': -2, 'region': '*'}, + {'position': 1, 'offset': -1, 'region': ''}, + {'position': 1, 'offset': -2, 'region': 'd'}, + ], + ) degenerate_equal( - crossmap.coding_to_coordinate, 11, - [(1, 1, 0, 1), (1, 0, 1, 1), (-1, 2, -1, 1)]) + crossmap.coding_to_coordinate, + 11, + [ + {'position': 1, 'offset': 0, 'region': 'd'}, + {'position': 1, 'offset': 0, 'region': '*'}, + {'position': 1, 'offset': 2, 'region': '-'}, + {'position': 1, 'offset': 1, 'region': ''}, + {'position': 1, 'offset': 2, 'region': 'u'}, + ], + ) def test_Coding_inverted_no_utr_degenerate(): @@ -361,55 +723,216 @@ def test_Coding_inverted_no_utr_degenerate(): crossmap = Coding([(10, 11)], (10, 11), True) degenerate_equal( - crossmap.coding_to_coordinate, 11, - [(1, -1, 0, -1), (-1, 0, -1, -1), (1, -2, 1, -1)]) + crossmap.coding_to_coordinate, + 11, + [ + {'position': 1, 'offset': 0, 'region': 'u'}, + {'position': 1, 'offset': 0, 'region': '-'}, + {'position': 1, 'offset': -2, 'region': '*'}, + {'position': 1, 'offset': -1, 'region': ''}, + {'position': 1, 'offset': -2, 'region': 'd'}, + ], + ) degenerate_equal( - crossmap.coding_to_coordinate, 9, - [(1, 1, 0, 1), (1, 0, 1, 1), (-1, 2, -1, 1)]) + crossmap.coding_to_coordinate, + 9, + [ + {'position': 1, 'offset': 0, 'region': 'd'}, + {'position': 1, 'offset': 0, 'region': '*'}, + {'position': 1, 'offset': 2, 'region': '-'}, + {'position': 1, 'offset': 1, 'region': ''}, + {'position': 1, 'offset': 2, 'region': 'u'}, + ], + ) def test_Coding_no_utr_degenerate_return(): """UTRs may be missing.""" crossmap = Coding([(10, 11)], (10, 11)) - assert crossmap.coordinate_to_coding(8, True) == (-2, 0, -1, -2) - assert crossmap.coordinate_to_coding(9, True) == (-1, 0, -1, -1) - assert crossmap.coordinate_to_coding(11, True) == (1, 0, 1, 1) - assert crossmap.coordinate_to_coding(12, True) == (2, 0, 1, 2) + assert crossmap.coordinate_to_coding(8, True) == { + 'position': 2, + 'offset': 0, + 'region': '-', + } + assert crossmap.coordinate_to_coding(9, True) == { + 'position': 1, + 'offset': 0, + 'region': '-', + } + assert crossmap.coordinate_to_coding(11, True) == { + 'position': 1, + 'offset': 0, + 'region': '*', + } + assert crossmap.coordinate_to_coding(12, True) == { + 'position': 2, + 'offset': 0, + 'region': '*', + } def test_Coding_inverted_no_utr_degenerate_return(): """UTRs may be missing.""" crossmap = Coding([(10, 11)], (10, 11), True) - assert crossmap.coordinate_to_coding(11, True) == (-1, 0, -1, -1) - assert crossmap.coordinate_to_coding(9, True) == (1, 0, 1, 1) + assert crossmap.coordinate_to_coding(11, True) == { + 'position': 1, + 'offset': 0, + 'region': '-', + } + assert crossmap.coordinate_to_coding(9, True) == { + 'position': 1, + 'offset': 0, + 'region': '*', + } def test_Coding_protein(): """Protein positions.""" crossmap = Coding(_exons, _cds) - # Boundary between 5' UTR and CDS. + # Boundary between upstream and 5' UTR invariant( - crossmap.coordinate_to_protein, 31, - crossmap.protein_to_coordinate, (-1, 3, 0, -1, 0)) + crossmap.coordinate_to_protein, + 4, + crossmap.protein_to_coordinate, + {'position': 1, 'position_in_codon': 3, 'offset': 0, 'region': 'u'} + ) invariant( - crossmap.coordinate_to_protein, 32, - crossmap.protein_to_coordinate, (1, 1, 0, 0, 0)) + crossmap.coordinate_to_protein, + 5, + crossmap.protein_to_coordinate, + {'position': 4, 'position_in_codon': 2, 'offset': 0, 'region': '-'} + ) + + # Boundary between 5' UTR and CDS + invariant( + crossmap.coordinate_to_protein, + 31, + crossmap.protein_to_coordinate, + {'position': 1, 'position_in_codon': 3, 'offset': 0, 'region': '-'}, + ) + invariant( + crossmap.coordinate_to_protein, + 32, + crossmap.protein_to_coordinate, + {'position': 1, 'position_in_codon': 1, 'offset': 0, 'region': ''}, + ) # Intron boundary. invariant( - crossmap.coordinate_to_protein, 34, - crossmap.protein_to_coordinate, (1, 3, 0, 0, 0)) + crossmap.coordinate_to_protein, + 34, + crossmap.protein_to_coordinate, + {'position': 1, 'position_in_codon': 3, 'offset': 0, 'region': ''}, + ) invariant( - crossmap.coordinate_to_protein, 35, - crossmap.protein_to_coordinate, (1, 3, 1, 0, 0)) + crossmap.coordinate_to_protein, + 35, + crossmap.protein_to_coordinate, + {'position': 1, 'position_in_codon': 3, 'offset': 1, 'region': ''}, + ) # Boundary between CDS and 3' UTR. invariant( - crossmap.coordinate_to_protein, 42, - crossmap.protein_to_coordinate, (2, 3, 0, 0, 0)) + crossmap.coordinate_to_protein, + 42, + crossmap.protein_to_coordinate, + {'position': 2, 'position_in_codon': 3, 'offset': 0, 'region': ''}, + ) + invariant( + crossmap.coordinate_to_protein, + 43, + crossmap.protein_to_coordinate, + {'position': 1, 'position_in_codon': 1, 'offset': 0, 'region': '*'}, + ) + + # Boundary between 3' UTR and downstream + invariant( + crossmap.coordinate_to_protein, + 71, + crossmap.protein_to_coordinate, + {'position': 2, 'position_in_codon': 2, 'offset': 0, 'region': '*'} + ) + invariant( + crossmap.coordinate_to_protein, + 72, + crossmap.protein_to_coordinate, + {'position': 1, 'position_in_codon': 1, 'offset': 0, 'region': 'd'} + ) + + +def test_Coding_inverted_protein(): + """Protein positions.""" + crossmap = Coding(_exons, _cds, True) + + # Boundary between upstream and 5' UTR + invariant( + crossmap.coordinate_to_protein, + 4, + crossmap.protein_to_coordinate, + {'position': 1, 'position_in_codon': 1, 'offset': 0, 'region': 'd'} + ) + invariant( + crossmap.coordinate_to_protein, + 5, + crossmap.protein_to_coordinate, + {'position': 4, 'position_in_codon': 2, 'offset': 0, 'region': '*'} + ) + + # Boundary between 5' UTR and CDS + invariant( + crossmap.coordinate_to_protein, + 31, + crossmap.protein_to_coordinate, + {'position': 1, 'position_in_codon': 1, 'offset': 0, 'region': '*'}, + ) + invariant( + crossmap.coordinate_to_protein, + 32, + crossmap.protein_to_coordinate, + {'position': 2, 'position_in_codon': 3, 'offset': 0, 'region': ''}, + ) + + # Intron boundary. + invariant( + crossmap.coordinate_to_protein, + 34, + crossmap.protein_to_coordinate, + {'position': 2, 'position_in_codon': 1, 'offset': 0, 'region': ''}, + ) + invariant( + crossmap.coordinate_to_protein, + 35, + crossmap.protein_to_coordinate, + {'position': 2, 'position_in_codon': 1, 'offset': -1, 'region': ''}, + ) + + # Boundary between CDS and 3' UTR. invariant( - crossmap.coordinate_to_protein, 43, - crossmap.protein_to_coordinate, (1, 1, 0, 1, 0)) + crossmap.coordinate_to_protein, + 42, + crossmap.protein_to_coordinate, + {'position': 1, 'position_in_codon': 1, 'offset': 0, 'region': ''}, + ) + invariant( + crossmap.coordinate_to_protein, + 43, + crossmap.protein_to_coordinate, + {'position': 1, 'position_in_codon': 3, 'offset': 0, 'region': '-'}, + ) + + # Boundary between 3' UTR and downstream + invariant( + crossmap.coordinate_to_protein, + 71, + crossmap.protein_to_coordinate, + {'position': 2, 'position_in_codon': 2, 'offset': 0, 'region': '-'} + ) + invariant( + crossmap.coordinate_to_protein, + 72, + crossmap.protein_to_coordinate, + {'position': 1, 'position_in_codon': 3, 'offset': 0, 'region': 'u'} + ) diff --git a/tests/test_locus.py b/tests/test_locus.py index a873416..535f93d 100644 --- a/tests/test_locus.py +++ b/tests/test_locus.py @@ -7,37 +7,37 @@ def test_Locus(): """Forward orientent Lovus.""" locus = Locus((30, 35)) - invariant(locus.to_position, 29, locus.to_coordinate, (0, -1)) - invariant(locus.to_position, 30, locus.to_coordinate, (0, 0)) - invariant(locus.to_position, 31, locus.to_coordinate, (1, 0)) - invariant(locus.to_position, 33, locus.to_coordinate, (3, 0)) - invariant(locus.to_position, 34, locus.to_coordinate, (4, 0)) - invariant(locus.to_position, 35, locus.to_coordinate, (4, 1)) + invariant(locus.to_position, 29, locus.to_coordinate, {'position': 0, 'offset': -1}) + invariant(locus.to_position, 30, locus.to_coordinate, {'position': 0, 'offset': 0}) + invariant(locus.to_position, 31, locus.to_coordinate, {'position': 1, 'offset': 0}) + invariant(locus.to_position, 33, locus.to_coordinate, {'position': 3, 'offset': 0}) + invariant(locus.to_position, 34, locus.to_coordinate, {'position': 4, 'offset': 0}) + invariant(locus.to_position, 35, locus.to_coordinate, {'position': 4, 'offset': 1}) def test_Locus_inverted(): """Reverse orientent Lovus.""" locus = Locus((30, 35), True) - invariant(locus.to_position, 35, locus.to_coordinate, (0, -1)) - invariant(locus.to_position, 34, locus.to_coordinate, (0, 0)) - invariant(locus.to_position, 33, locus.to_coordinate, (1, 0)) - invariant(locus.to_position, 31, locus.to_coordinate, (3, 0)) - invariant(locus.to_position, 30, locus.to_coordinate, (4, 0)) - invariant(locus.to_position, 29, locus.to_coordinate, (4, 1)) + invariant(locus.to_position, 35, locus.to_coordinate, {'position': 0, 'offset': -1}) + invariant(locus.to_position, 34, locus.to_coordinate, {'position': 0, 'offset': 0}) + invariant(locus.to_position, 33, locus.to_coordinate, {'position': 1, 'offset': 0}) + invariant(locus.to_position, 31, locus.to_coordinate, {'position': 3, 'offset': 0}) + invariant(locus.to_position, 30, locus.to_coordinate, {'position': 4, 'offset': 0}) + invariant(locus.to_position, 29, locus.to_coordinate, {'position': 4, 'offset': 1}) def test_Locus_degenerate(): """Degenerate positions are silently corrected.""" locus = Locus((10, 20)) - degenerate_equal(locus.to_coordinate, 9, [(0, -1), (-1, 0)]) - degenerate_equal(locus.to_coordinate, 20, [(9, 1), (10, 0)]) + degenerate_equal(locus.to_coordinate, 9, [{'position': 0, 'offset': -1}, {'position': -1, 'offset': 0}]) + degenerate_equal(locus.to_coordinate, 20, [{'position': 9, 'offset': 1}, {'position': 10, 'offset': 0}]) def test_Locus_inverted_degenerate(): """Degenerate positions are silently corrected.""" locus = Locus((10, 20), True) - degenerate_equal(locus.to_coordinate, 20, [(0, -1), (-1, 0)]) - degenerate_equal(locus.to_coordinate, 9, [(9, 1), (10, 0)]) + degenerate_equal(locus.to_coordinate, 20, [{'position': 0, 'offset': -1}, {'position': -1, 'offset': 0}]) + degenerate_equal(locus.to_coordinate, 9, [{'position': 9, 'offset': 1}, {'position': 10, 'offset': 0}]) diff --git a/tests/test_multi_locus.py b/tests/test_multi_locus.py index 6ce0013..9725ad7 100644 --- a/tests/test_multi_locus.py +++ b/tests/test_multi_locus.py @@ -32,29 +32,70 @@ def test_MultiLocus(): # Boundary between upstream and the first locus. invariant( - multi_locus.to_position, 4, multi_locus.to_coordinate, (0, -1, -1)) + multi_locus.to_position, + 4, + multi_locus.to_coordinate, + {'position': 0, 'offset': 0, 'region': 'u'}, + ) + invariant( - multi_locus.to_position, 5, multi_locus.to_coordinate, (0, 0, 0)) + multi_locus.to_position, + 5, + multi_locus.to_coordinate, + {'position': 0, 'offset': 0, 'region': ''}, + ) # Internal locus. invariant( - multi_locus.to_position, 29, multi_locus.to_coordinate, (9, -1, 0)) - invariant( - multi_locus.to_position, 30, multi_locus.to_coordinate, (9, 0, 0)) - invariant( - multi_locus.to_position, 31, multi_locus.to_coordinate, (10, 0, 0)) - invariant( - multi_locus.to_position, 33, multi_locus.to_coordinate, (12, 0, 0)) - invariant( - multi_locus.to_position, 34, multi_locus.to_coordinate, (13, 0, 0)) - invariant( - multi_locus.to_position, 35, multi_locus.to_coordinate, (13, 1, 0)) + multi_locus.to_position, + 29, + multi_locus.to_coordinate, + {'position': 9, 'offset': -1, 'region': ''}, + ) + invariant( + multi_locus.to_position, + 30, + multi_locus.to_coordinate, + {'position': 9, 'offset': 0, 'region': ''}, + ) + invariant( + multi_locus.to_position, + 31, + multi_locus.to_coordinate, + {'position': 10, 'offset': 0, 'region': ''}, + ) + invariant( + multi_locus.to_position, + 33, + multi_locus.to_coordinate, + {'position': 12, 'offset': 0, 'region': ''}, + ) + invariant( + multi_locus.to_position, + 34, + multi_locus.to_coordinate, + {'position': 13, 'offset': 0, 'region': ''}, + ) + invariant( + multi_locus.to_position, + 35, + multi_locus.to_coordinate, + {'position': 13, 'offset': 1, 'region': ''}, + ) # Boundary between the last locus and downstream. invariant( - multi_locus.to_position, 71, multi_locus.to_coordinate, (21, 0, 0)) + multi_locus.to_position, + 71, + multi_locus.to_coordinate, + {'position': 21, 'offset': 0, 'region': ''}, + ) invariant( - multi_locus.to_position, 72, multi_locus.to_coordinate, (21, 1, 1)) + multi_locus.to_position, + 72, + multi_locus.to_coordinate, + {'position': 0, 'offset': 0, 'region': 'd'}, + ) def test_MultiLocus_inverted(): @@ -63,29 +104,69 @@ def test_MultiLocus_inverted(): # Boundary between upstream and the first locus. invariant( - multi_locus.to_position, 72, multi_locus.to_coordinate, (0, -1, -1)) + multi_locus.to_position, + 72, + multi_locus.to_coordinate, + {'position': 0, 'offset': 0, 'region': 'u'}, + ) invariant( - multi_locus.to_position, 71, multi_locus.to_coordinate, (0, 0, 0)) + multi_locus.to_position, + 71, + multi_locus.to_coordinate, + {'position': 0, 'offset': 0, 'region': ''}, + ) # Internal locus. invariant( - multi_locus.to_position, 35, multi_locus.to_coordinate, (8, -1, 0)) - invariant( - multi_locus.to_position, 34, multi_locus.to_coordinate, (8, 0, 0)) - invariant( - multi_locus.to_position, 33, multi_locus.to_coordinate, (9, 0, 0)) - invariant( - multi_locus.to_position, 31, multi_locus.to_coordinate, (11, 0, 0)) - invariant( - multi_locus.to_position, 30, multi_locus.to_coordinate, (12, 0, 0)) - invariant( - multi_locus.to_position, 29, multi_locus.to_coordinate, (12, 1, 0)) + multi_locus.to_position, + 35, + multi_locus.to_coordinate, + {'position': 8, 'offset': -1, 'region': ''}, + ) + invariant( + multi_locus.to_position, + 34, + multi_locus.to_coordinate, + {'position': 8, 'offset': 0, 'region': ''}, + ) + invariant( + multi_locus.to_position, + 33, + multi_locus.to_coordinate, + {'position': 9, 'offset': 0, 'region': ''}, + ) + invariant( + multi_locus.to_position, + 31, + multi_locus.to_coordinate, + {'position': 11, 'offset': 0, 'region': ''}, + ) + invariant( + multi_locus.to_position, + 30, + multi_locus.to_coordinate, + {'position': 12, 'offset': 0, 'region': ''}, + ) + invariant( + multi_locus.to_position, + 29, + multi_locus.to_coordinate, + {'position': 12, 'offset': 1, 'region': ''}, + ) # Boundary between the last locus and downstream. invariant( - multi_locus.to_position, 5, multi_locus.to_coordinate, (21, 0, 0)) + multi_locus.to_position, + 5, + multi_locus.to_coordinate, + {'position': 21, 'offset': 0, 'region': ''}, + ) invariant( - multi_locus.to_position, 4, multi_locus.to_coordinate, (21, 1, 1)) + multi_locus.to_position, + 4, + multi_locus.to_coordinate, + {'position': 0, 'offset': 0, 'region': 'd'}, + ) def test_MultiLocus_adjacent_loci(): @@ -93,9 +174,17 @@ def test_MultiLocus_adjacent_loci(): multi_locus = MultiLocus([(1, 3), (3, 5)]) invariant( - multi_locus.to_position, 2, multi_locus.to_coordinate, (1, 0, 0)) + multi_locus.to_position, + 2, + multi_locus.to_coordinate, + {'position': 1, 'offset': 0, 'region': ''}, + ) invariant( - multi_locus.to_position, 3, multi_locus.to_coordinate, (2, 0, 0)) + multi_locus.to_position, + 3, + multi_locus.to_coordinate, + {'position': 2, 'offset': 0, 'region': ''}, + ) def test_MultiLocus_adjacent_loci_inverted(): @@ -103,9 +192,17 @@ def test_MultiLocus_adjacent_loci_inverted(): multi_locus = MultiLocus([(1, 3), (3, 5)], True) invariant( - multi_locus.to_position, 3, multi_locus.to_coordinate, (1, 0, 0)) + multi_locus.to_position, + 3, + multi_locus.to_coordinate, + {'position': 1, 'offset': 0, 'region': ''}, + ) invariant( - multi_locus.to_position, 2, multi_locus.to_coordinate, (2, 0, 0)) + multi_locus.to_position, + 2, + multi_locus.to_coordinate, + {'position': 2, 'offset': 0, 'region': ''}, + ) def test_MultiLocus_offsets_odd(): @@ -113,9 +210,17 @@ def test_MultiLocus_offsets_odd(): multi_locus = MultiLocus([(1, 3), (6, 8)]) invariant( - multi_locus.to_position, 4, multi_locus.to_coordinate, (1, 2, 0)) + multi_locus.to_position, + 4, + multi_locus.to_coordinate, + {'position': 1, 'offset': 2, 'region': ''}, + ) invariant( - multi_locus.to_position, 5, multi_locus.to_coordinate, (2, -1, 0)) + multi_locus.to_position, + 5, + multi_locus.to_coordinate, + {'position': 2, 'offset': -1, 'region': ''}, + ) def test_MultiLocus_offsets_odd_inverted(): @@ -123,9 +228,17 @@ def test_MultiLocus_offsets_odd_inverted(): multi_locus = MultiLocus([(1, 3), (6, 8)], True) invariant( - multi_locus.to_position, 4, multi_locus.to_coordinate, (1, 2, 0)) + multi_locus.to_position, + 4, + multi_locus.to_coordinate, + {'position': 1, 'offset': 2, 'region': ''}, + ) invariant( - multi_locus.to_position, 3, multi_locus.to_coordinate, (2, -1, 0)) + multi_locus.to_position, + 3, + multi_locus.to_coordinate, + {'position': 2, 'offset': -1, 'region': ''}, + ) def test_MultiLocus_offsets_even(): @@ -133,9 +246,17 @@ def test_MultiLocus_offsets_even(): multi_locus = MultiLocus([(1, 3), (7, 9)]) invariant( - multi_locus.to_position, 4, multi_locus.to_coordinate, (1, 2, 0)) + multi_locus.to_position, + 4, + multi_locus.to_coordinate, + {'position': 1, 'offset': 2, 'region': ''}, + ) invariant( - multi_locus.to_position, 5, multi_locus.to_coordinate, (2, -2, 0)) + multi_locus.to_position, + 5, + multi_locus.to_coordinate, + {'position': 2, 'offset': -2, 'region': ''}, + ) def test_MultiLocus_offsets_even_inverted(): @@ -143,9 +264,17 @@ def test_MultiLocus_offsets_even_inverted(): multi_locus = MultiLocus([(1, 3), (7, 9)], True) invariant( - multi_locus.to_position, 5, multi_locus.to_coordinate, (1, 2, 0)) + multi_locus.to_position, + 5, + multi_locus.to_coordinate, + {'position': 1, 'offset': 2, 'region': ''}, + ) invariant( - multi_locus.to_position, 4, multi_locus.to_coordinate, (2, -2, 0)) + multi_locus.to_position, + 4, + multi_locus.to_coordinate, + {'position': 2, 'offset': -2, 'region': ''}, + ) def test_MultiLocus_degenerate(): @@ -153,9 +282,24 @@ def test_MultiLocus_degenerate(): multi_locus = MultiLocus(_locations) degenerate_equal( - multi_locus.to_coordinate, 4, [(0, -1, -1), (-1, 0, -1)]) + multi_locus.to_coordinate, + 4, + [ + {'position': 0, 'offset': -1, 'region': ''}, + {'position': -1, 'offset': 0, 'region': ''}, + {'position': 0, 'offset': 0, 'region': 'u'}, + ], + ) + degenerate_equal( - multi_locus.to_coordinate, 72, [(21, 1, 1), (22, 0, 1)]) + multi_locus.to_coordinate, + 72, + [ + {'position': 21, 'offset': 1, 'region': ''}, + {'position': 22, 'offset': 0, 'region': ''}, + {'position': 0, 'offset': 0, 'region': 'd'}, + ], + ) def test_MultiLocus_inverted_degenerate(): @@ -163,6 +307,21 @@ def test_MultiLocus_inverted_degenerate(): multi_locus = MultiLocus(_locations, True) degenerate_equal( - multi_locus.to_coordinate, 72, [(0, -1, -1), (-1, 0, -1)]) + multi_locus.to_coordinate, + 72, + [ + {'position': -1, 'offset': 0, 'region': ''}, + {'position': 0, 'offset': -1, 'region': ''}, + {'position': 0, 'offset': 0, 'region': 'u'}, + ], + ) + degenerate_equal( - multi_locus.to_coordinate, 4, [(21, 1, 1), (22, 0, 1)]) + multi_locus.to_coordinate, + 4, + [ + {'position': 21, 'offset': 1, 'region': ''}, + {'position': 22, 'offset': 0, 'region': ''}, + {'position': 0, 'offset': 0, 'region': 'd'}, + ], + )